Publishing 2019 R3 content
authorAlexey Suhov <alexey.suhov@intel.com>
Fri, 4 Oct 2019 16:26:43 +0000 (19:26 +0300)
committerAlexey Suhov <alexey.suhov@intel.com>
Fri, 4 Oct 2019 16:26:43 +0000 (19:26 +0300)
1735 files changed:
README.md
inference-engine/CMakeLists.txt
inference-engine/cmake/FindlibGNA.cmake
inference-engine/cmake/check_features.cmake
inference-engine/cmake/config.cmake.in
inference-engine/cmake/dependencies.cmake
inference-engine/cmake/developer_package.cmake
inference-engine/cmake/download_and_extract.cmake
inference-engine/cmake/features.cmake
inference-engine/cmake/ie_parallel.cmake
inference-engine/cmake/os_flags.cmake
inference-engine/cmake/sdl.cmake
inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in
inference-engine/cmake/share/InferenceEngineConfig.cmake.in
inference-engine/cmake/vpu_dependencies.cmake [new file with mode: 0644]
inference-engine/ie_bridges/python/CMakeLists.txt
inference-engine/ie_bridges/python/docs/api_overview.md
inference-engine/ie_bridges/python/sample/affinity_setting_sample/affinity_setting_sample.py [deleted file]
inference-engine/ie_bridges/python/sample/benchmark_app/README.md [deleted file]
inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py [deleted file]
inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/infer_request_wrap.py [deleted file]
inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/inputs_filling.py [deleted file]
inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/parameters.py [deleted file]
inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/utils.py [deleted file]
inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py [deleted file]
inference-engine/ie_bridges/python/sample/classification_sample/README.md
inference-engine/ie_bridges/python/sample/classification_sample_async/README.md
inference-engine/ie_bridges/python/sample/hello_query_device/hello_query_device.py
inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md [new file with mode: 0644]
inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py [new file with mode: 0644]
inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md
inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/CMakeLists.txt
inference-engine/include/builders/ie_concat_layer.hpp
inference-engine/include/cpp/ie_cnn_network.h
inference-engine/include/cpp/ie_executable_network.hpp
inference-engine/include/cpp/ie_infer_request.hpp
inference-engine/include/cpp/ie_memory_state.hpp
inference-engine/include/cpp/ie_plugin_cpp.hpp
inference-engine/include/details/ie_inetwork_iterator.hpp
inference-engine/include/details/ie_pre_allocator.hpp
inference-engine/include/dlia/dlia_config.hpp [new file with mode: 0644]
inference-engine/include/hetero/hetero_plugin_config.hpp
inference-engine/include/ie_allocator.hpp
inference-engine/include/ie_api.h
inference-engine/include/ie_blob.h
inference-engine/include/ie_common.h
inference-engine/include/ie_core.hpp
inference-engine/include/ie_data.h
inference-engine/include/ie_device.hpp
inference-engine/include/ie_icnn_network_stats.hpp
inference-engine/include/ie_iexecutable_network.hpp
inference-engine/include/ie_iextension.h
inference-engine/include/ie_ihetero_plugin.hpp
inference-engine/include/ie_iinfer_request.hpp
inference-engine/include/ie_layers.h
inference-engine/include/ie_layouts.h
inference-engine/include/ie_parallel.hpp
inference-engine/include/ie_parameter.hpp
inference-engine/include/ie_plugin.hpp
inference-engine/include/ie_plugin_config.hpp
inference-engine/include/ie_plugin_dispatcher.hpp
inference-engine/include/ie_precision.hpp
inference-engine/include/ie_primitive_info.hpp
inference-engine/include/ie_tensor_info.hpp
inference-engine/include/multi-device/multi_device_config.hpp [new file with mode: 0644]
inference-engine/include/vpu/hddl_plugin_config.hpp [new file with mode: 0644]
inference-engine/include/vpu/vpu_plugin_config.hpp
inference-engine/samples/CMakeLists.txt
inference-engine/samples/benchmark_app/README.md
inference-engine/samples/benchmark_app/benchmark_app.hpp
inference-engine/samples/benchmark_app/main.cpp
inference-engine/samples/benchmark_app/statistics_report.cpp
inference-engine/samples/benchmark_app/statistics_report.hpp
inference-engine/samples/benchmark_app/utils.hpp
inference-engine/samples/common/format_reader/CMakeLists.txt
inference-engine/samples/common/os/windows/w_dirent.h
inference-engine/samples/common/samples/common.hpp
inference-engine/samples/common/samples/console_progress.hpp
inference-engine/samples/hello_classification/README.md
inference-engine/samples/hello_query_device/README.md
inference-engine/samples/object_detection_sample_ssd/README.md
inference-engine/samples/speech_sample/README.md
inference-engine/samples/speech_sample/main.cpp
inference-engine/samples/speech_sample/speech_sample.hpp
inference-engine/samples/thirdparty/gflags/.gitmodules [deleted file]
inference-engine/src/CMakeLists.txt
inference-engine/src/cldnn_engine/CMakeLists.txt
inference-engine/src/cldnn_engine/cldnn_config.h
inference-engine/src/cldnn_engine/cldnn_custom_layer.h
inference-engine/src/cldnn_engine/cldnn_engine.cpp
inference-engine/src/cldnn_engine/cldnn_engine.h
inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
inference-engine/src/cldnn_engine/cldnn_graph.cpp
inference-engine/src/cldnn_engine/cldnn_graph.h
inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
inference-engine/src/cldnn_engine/cldnn_lstm.cpp
inference-engine/src/cldnn_engine/cldnn_program.cpp
inference-engine/src/cldnn_engine/cldnn_program.h
inference-engine/src/cldnn_engine/debug_options.cpp
inference-engine/src/cldnn_engine/debug_options.h
inference-engine/src/extension/README.md
inference-engine/src/extension/ext_broadcast.cpp
inference-engine/src/extension/ext_detectionoutput_onnx.cpp
inference-engine/src/extension/ext_gather.cpp
inference-engine/src/extension/ext_list.cpp
inference-engine/src/extension/ext_log_softmax.cpp
inference-engine/src/extension/ext_non_max_suppression.cpp [new file with mode: 0644]
inference-engine/src/extension/ext_proposal_onnx.cpp
inference-engine/src/extension/ext_reduce.cpp
inference-engine/src/extension/ext_resample.cpp
inference-engine/src/extension/ext_scatter.cpp [new file with mode: 0644]
inference-engine/src/extension/ext_simplernms.cpp
inference-engine/src/extension/ext_sparse_fill_empty_rows.cpp [new file with mode: 0644]
inference-engine/src/extension/ext_strided_slice.cpp
inference-engine/src/extension/ext_unique.cpp [new file with mode: 0644]
inference-engine/src/gna_plugin/CMakeLists.txt
inference-engine/src/gna_plugin/dnn.cpp
inference-engine/src/gna_plugin/gna_infer_request.hpp
inference-engine/src/gna_plugin/gna_pass_manager.cpp
inference-engine/src/gna_plugin/gna_pass_manager.hpp
inference-engine/src/gna_plugin/gna_plugin.cpp
inference-engine/src/gna_plugin/gna_plugin.hpp
inference-engine/src/gna_plugin/gna_plugin_config.hpp
inference-engine/src/gna_plugin/gna_plugin_entry_points.cpp
inference-engine/src/gna_plugin/gna_plugin_internal.hpp
inference-engine/src/gna_plugin/gna_plugin_log.hpp
inference-engine/src/gna_plugin/quantization/layer_quantizer.hpp
inference-engine/src/gna_plugin/quantization/quantization.h
inference-engine/src/gna_plugin/quantization/scale_factor_calc.hpp
inference-engine/src/hetero_plugin/CMakeLists.txt
inference-engine/src/hetero_plugin/hetero_ade_util.cpp [moved from inference-engine/src/inference_engine/ade_util.cpp with 98% similarity]
inference-engine/src/hetero_plugin/hetero_ade_util.hpp [moved from inference-engine/src/inference_engine/ade_util.hpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp [moved from inference-engine/src/inference_engine/hetero/hetero_async_infer_request.cpp with 99% similarity]
inference-engine/src/hetero_plugin/hetero_async_infer_request.hpp [moved from inference-engine/src/inference_engine/hetero/hetero_async_infer_request.hpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_device_loader.cpp [moved from inference-engine/src/inference_engine/hetero/hetero_device_loader.cpp with 97% similarity]
inference-engine/src/hetero_plugin/hetero_device_loader.hpp [moved from inference-engine/src/inference_engine/hetero/hetero_device_loader.hpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_executable_network.cpp [moved from inference-engine/src/inference_engine/hetero/hetero_executable_network.cpp with 94% similarity]
inference-engine/src/hetero_plugin/hetero_executable_network.hpp [moved from inference-engine/src/inference_engine/hetero/hetero_executable_network.hpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_fallback_policy.cpp [moved from inference-engine/src/inference_engine/hetero/fallback_policy.cpp with 84% similarity]
inference-engine/src/hetero_plugin/hetero_fallback_policy.hpp [moved from inference-engine/src/inference_engine/hetero/fallback_policy.hpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_graph_splitter.cpp [moved from inference-engine/src/inference_engine/ie_graph_splitter.cpp with 99% similarity]
inference-engine/src/hetero_plugin/hetero_graph_splitter.hpp [moved from inference-engine/src/inference_engine/ie_graph_splitter.hpp with 91% similarity]
inference-engine/src/hetero_plugin/hetero_infer_request.cpp [moved from inference-engine/src/inference_engine/hetero/hetero_infer_request.cpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_infer_request.hpp [moved from inference-engine/src/inference_engine/hetero/hetero_infer_request.hpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_plugin.cpp
inference-engine/src/hetero_plugin/hetero_plugin.hpp [moved from inference-engine/src/inference_engine/hetero/hetero_plugin.hpp with 100% similarity]
inference-engine/src/hetero_plugin/hetero_plugin_base.hpp [moved from inference-engine/src/inference_engine/hetero/hetero_plugin_base.hpp with 100% similarity]
inference-engine/src/inference_engine/CMakeLists.txt
inference-engine/src/inference_engine/blob_factory.hpp
inference-engine/src/inference_engine/blob_transform.cpp
inference-engine/src/inference_engine/builders/ie_layer_builder.cpp
inference-engine/src/inference_engine/cnn_network_impl.cpp
inference-engine/src/inference_engine/cnn_network_int8_normalizer.cpp
inference-engine/src/inference_engine/cpp_interfaces/base/ie_inference_plugin_api.hpp
inference-engine/src/inference_engine/cpp_interfaces/ie_task.hpp
inference-engine/src/inference_engine/cpp_interfaces/ie_task_synchronizer.hpp
inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
inference-engine/src/inference_engine/cpp_interfaces/impl/ie_memory_state_internal.hpp
inference-engine/src/inference_engine/cpp_interfaces/interface/ie_imemory_state_internal.hpp
inference-engine/src/inference_engine/cpp_interfaces/interface/ie_internal_plugin_config.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/debug.h
inference-engine/src/inference_engine/graph_tools.hpp
inference-engine/src/inference_engine/graph_transformer.h
inference-engine/src/inference_engine/hetero/hetero_plugin.cpp [deleted file]
inference-engine/src/inference_engine/ie_cnn_layer_builder.h
inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp
inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h
inference-engine/src/inference_engine/ie_core.cpp
inference-engine/src/inference_engine/ie_data.cpp
inference-engine/src/inference_engine/ie_device.cpp
inference-engine/src/inference_engine/ie_format_parser.cpp
inference-engine/src/inference_engine/ie_icore.hpp
inference-engine/src/inference_engine/ie_ir_parser.hpp
inference-engine/src/inference_engine/ie_layer_parsers.cpp
inference-engine/src/inference_engine/ie_layer_parsers.h
inference-engine/src/inference_engine/ie_layer_validators.cpp
inference-engine/src/inference_engine/ie_layer_validators.hpp
inference-engine/src/inference_engine/ie_layers_internal.cpp
inference-engine/src/inference_engine/ie_layouts.cpp
inference-engine/src/inference_engine/ie_metric_helpers.hpp
inference-engine/src/inference_engine/ie_plugin_dispatcher.cpp
inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.cpp
inference-engine/src/inference_engine/ie_util_internal.cpp
inference-engine/src/inference_engine/ie_utils.cpp
inference-engine/src/inference_engine/ie_version.cpp
inference-engine/src/inference_engine/layer_transform.hpp
inference-engine/src/inference_engine/net_pass.cpp
inference-engine/src/inference_engine/network_serializer.cpp
inference-engine/src/inference_engine/range_iterator.hpp [deleted file]
inference-engine/src/inference_engine/shape_infer/built-in/ie_broadcast_shape_infer.hpp
inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.cpp
inference-engine/src/inference_engine/shape_infer/built-in/ie_non_max_suppression_shape_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/built-in/ie_resample_shape_infer.hpp
inference-engine/src/inference_engine/shape_infer/built-in/ie_scatter_shape_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/built-in/ie_sparse_fill_empty_rows_shape_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/built-in/ie_topk_shape_infer.hpp
inference-engine/src/inference_engine/shape_infer/built-in/ie_unique_shape_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/built-in/ie_unsqueeze_shape_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/broadcast_offset.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/const_infer/ie_add_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_broadcast_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_concat_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_holder.cpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_impl.cpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_convert_const_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/const_infer/ie_eltw_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_gather_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_mul_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_onehot_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_permute_const_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/const_infer/ie_pow_const_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/const_infer/ie_reduce_const_infer.hpp [new file with mode: 0644]
inference-engine/src/inference_engine/shape_infer/const_infer/ie_shape_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/const_infer/ie_strided_slice_const_infer.hpp
inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.cpp
inference-engine/src/inference_engine/transform/transform_network.cpp [deleted file]
inference-engine/src/inference_engine/transform/transform_network.hpp [deleted file]
inference-engine/src/inference_engine/transform/transformation.cpp [deleted file]
inference-engine/src/inference_engine/transform/transformation.hpp [deleted file]
inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.cpp [deleted file]
inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.hpp [deleted file]
inference-engine/src/inference_engine/transform/transformations/lrn.cpp [deleted file]
inference-engine/src/inference_engine/transform/transformations/lrn.hpp [deleted file]
inference-engine/src/inference_engine/transform/transformations/sub.cpp [deleted file]
inference-engine/src/inference_engine/transform/transformations/sub.hpp [deleted file]
inference-engine/src/inference_engine/w_dirent.h
inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp [new file with mode: 0644]
inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h [new file with mode: 0644]
inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
inference-engine/src/mkldnn_plugin/mkldnn_graph.h
inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
inference-engine/src/mkldnn_plugin/mkldnn_memory.h
inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.cpp [moved from inference-engine/src/inference_engine/memory_solver.cpp with 97% similarity]
inference-engine/src/mkldnn_plugin/mkldnn_memory_solver.hpp [moved from inference-engine/src/inference_engine/memory_solver.hpp with 95% similarity]
inference-engine/src/mkldnn_plugin/mkldnn_memory_state.cpp [new file with mode: 0644]
inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h [new file with mode: 0644]
inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
inference-engine/src/mkldnn_plugin/mkldnn_node.h
inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
inference-engine/src/mkldnn_plugin/mkldnn_streams.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
inference-engine/src/vpu/CMakeLists.txt
inference-engine/src/vpu/common/CMakeLists.txt [new file with mode: 0644]
inference-engine/src/vpu/common/include/vpu/parsed_config_base.hpp [new file with mode: 0644]
inference-engine/src/vpu/common/include/vpu/utils/any.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/any.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/attributes_map.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/attributes_map.hpp with 87% similarity]
inference-engine/src/vpu/common/include/vpu/utils/auto_scope.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/auto_scope.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/checked_cast.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/checked_cast.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/containers.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/containers.hpp with 95% similarity]
inference-engine/src/vpu/common/include/vpu/utils/dot_io.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/dot_io.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/enums.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/enums.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/extra.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/extra.hpp with 95% similarity]
inference-engine/src/vpu/common/include/vpu/utils/file_system.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/file_system.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/func_ref.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/func_ref.hpp with 88% similarity]
inference-engine/src/vpu/common/include/vpu/utils/handle.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/handle.hpp with 85% similarity]
inference-engine/src/vpu/common/include/vpu/utils/heap.hpp [new file with mode: 0644]
inference-engine/src/vpu/common/include/vpu/utils/ie_helpers.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/ie_helpers.hpp with 54% similarity]
inference-engine/src/vpu/common/include/vpu/utils/io.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/io.hpp with 91% similarity]
inference-engine/src/vpu/common/include/vpu/utils/logger.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/logger.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/numeric.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/numeric.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/optional.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/optional.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/perf_report.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/perf_report.hpp with 60% similarity]
inference-engine/src/vpu/common/include/vpu/utils/range.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/range.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/simple_math.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/simple_math.hpp with 100% similarity]
inference-engine/src/vpu/common/include/vpu/utils/string.hpp [moved from inference-engine/src/vpu/graph_transformer/include/vpu/utils/string.hpp with 92% similarity]
inference-engine/src/vpu/common/src/parsed_config_base.cpp [new file with mode: 0644]
inference-engine/src/vpu/common/src/utils/dot_io.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/dot_io.cpp with 100% similarity]
inference-engine/src/vpu/common/src/utils/enums.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/enums.cpp with 100% similarity]
inference-engine/src/vpu/common/src/utils/file_system.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/file_system.cpp with 100% similarity]
inference-engine/src/vpu/common/src/utils/ie_helpers.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/ie_helpers.cpp with 50% similarity]
inference-engine/src/vpu/common/src/utils/io.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/io.cpp with 100% similarity]
inference-engine/src/vpu/common/src/utils/logger.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/logger.cpp with 100% similarity]
inference-engine/src/vpu/common/src/utils/perf_report.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/perf_report.cpp with 97% similarity]
inference-engine/src/vpu/common/src/utils/simple_math.cpp [moved from inference-engine/src/vpu/graph_transformer/src/utils/simple_math.cpp with 100% similarity]
inference-engine/src/vpu/custom_kernels/binary_layers.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/binary_layers.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/ctc.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/ctc.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/customLayerBindings.xml [new file with mode: 0644]
inference-engine/src/vpu/custom_kernels/cvtf32f16.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/cvtf32f16.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/cvtu8f16.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/cvtu8f16.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/grn.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/grn.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/mvn.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/mvn.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/region_chw.cl [new file with mode: 0644]
inference-engine/src/vpu/custom_kernels/region_chw_m7_branch0.cl [new file with mode: 0644]
inference-engine/src/vpu/custom_kernels/region_chw_m7_branch1.cl [new file with mode: 0644]
inference-engine/src/vpu/custom_kernels/reorg_chw.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/reorg_chw.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/reorg_chw_local.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/reorg_chw_local.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/reorg_chw_stack.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/reorg_chw_stack.cl with 100% similarity]
inference-engine/src/vpu/custom_kernels/reorg_hwc.cl [new file with mode: 0644]
inference-engine/src/vpu/custom_kernels/resample_nn.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/resample_nn.cl with 93% similarity]
inference-engine/src/vpu/custom_kernels/resample_with_antialias.cl [new file with mode: 0644]
inference-engine/src/vpu/custom_kernels/shuffle_channels.cl [moved from inference-engine/src/vpu/vpu_custom_kernels/shuffle_channels.cl with 100% similarity]
inference-engine/src/vpu/graph_transformer/CMakeLists.txt
inference-engine/src/vpu/graph_transformer/include/vpu/backend/backend.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/custom_layer.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/frontend/stage_builder.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/hw/mx_stage.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/hw/tiling.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/model/data_desc.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/parsed_config.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/pass_manager.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_conv_tiling/hw_convolution_tiler.hpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_conv_tiling/hw_stage_tiler.hpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_pooling_tiling/hw_pooling_tiler.hpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_pooling_tiling/hw_stage_tiler.hpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/special_stage_processor.hpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/include/vpu/stub_stage.hpp
inference-engine/src/vpu/graph_transformer/include/vpu/sw/post_op_stage.hpp
inference-engine/src/vpu/graph_transformer/src/allocator.cpp
inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp
inference-engine/src/vpu/graph_transformer/src/backend/get_meta_data.cpp
inference-engine/src/vpu/graph_transformer/src/blob_reader.cpp
inference-engine/src/vpu/graph_transformer/src/custom_layer.cpp
inference-engine/src/vpu/graph_transformer/src/frontend/detect_network_batch.cpp
inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp
inference-engine/src/vpu/graph_transformer/src/frontend/in_out_convert.cpp
inference-engine/src/vpu/graph_transformer/src/frontend/parse_data.cpp
inference-engine/src/vpu/graph_transformer/src/frontend/parse_network.cpp
inference-engine/src/vpu/graph_transformer/src/frontend/pre_process.cpp
inference-engine/src/vpu/graph_transformer/src/hw/mx_stage.cpp
inference-engine/src/vpu/graph_transformer/src/hw/tiling.cpp
inference-engine/src/vpu/graph_transformer/src/model/data.cpp
inference-engine/src/vpu/graph_transformer/src/model/data_desc.cpp
inference-engine/src/vpu/graph_transformer/src/model/model.cpp
inference-engine/src/vpu/graph_transformer/src/model/stage.cpp
inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
inference-engine/src/vpu/graph_transformer/src/pass_manager.cpp
inference-engine/src/vpu/graph_transformer/src/passes/add_copy_for_outputs_inside_network.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/adjust_data_batch.cpp
inference-engine/src/vpu/graph_transformer/src/passes/adjust_data_layout.cpp
inference-engine/src/vpu/graph_transformer/src/passes/adjust_data_location.cpp
inference-engine/src/vpu/graph_transformer/src/passes/final_check.cpp
inference-engine/src/vpu/graph_transformer/src/passes/hw_conv_tiling.cpp
inference-engine/src/vpu/graph_transformer/src/passes/hw_conv_tiling/hw_convolution_tiler.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/hw_conv_tiling/hw_stage_tiler.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/hw_fc_tiling.cpp
inference-engine/src/vpu/graph_transformer/src/passes/hw_padding.cpp
inference-engine/src/vpu/graph_transformer/src/passes/hw_pooling_tiling.cpp
inference-engine/src/vpu/graph_transformer/src/passes/hw_pooling_tiling/hw_pooling_tiler.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/hw_pooling_tiling/hw_stage_tiler.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/initial_check.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/inject_sw.cpp
inference-engine/src/vpu/graph_transformer/src/passes/merge_hw_stages.cpp
inference-engine/src/vpu/graph_transformer/src/passes/process_special_stages.cpp
inference-engine/src/vpu/graph_transformer/src/passes/remove_unused_stages_outputs.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/replace_deconv_by_conv.cpp
inference-engine/src/vpu/graph_transformer/src/passes/replace_fc_by_conv.cpp
inference-engine/src/vpu/graph_transformer/src/passes/reshape_dilation_conv.cpp
inference-engine/src/vpu/graph_transformer/src/passes/split_grouped_conv.cpp
inference-engine/src/vpu/graph_transformer/src/passes/split_hw_conv_and_pool.cpp
inference-engine/src/vpu/graph_transformer/src/passes/split_hw_depth_convolution.cpp
inference-engine/src/vpu/graph_transformer/src/passes/strided_slice.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/passes/sw_conv_adaptation.cpp
inference-engine/src/vpu/graph_transformer/src/passes/sw_deconv_adaptation.cpp
inference-engine/src/vpu/graph_transformer/src/passes/sw_fc_adaptation.cpp
inference-engine/src/vpu/graph_transformer/src/passes/sw_pooling_adaptation.cpp
inference-engine/src/vpu/graph_transformer/src/passes/swap_concat_and_hw_ops.cpp
inference-engine/src/vpu/graph_transformer/src/special_stage_processor.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/stages/argmax.cpp
inference-engine/src/vpu/graph_transformer/src/stages/bias.cpp
inference-engine/src/vpu/graph_transformer/src/stages/clamp.cpp
inference-engine/src/vpu/graph_transformer/src/stages/concat.cpp
inference-engine/src/vpu/graph_transformer/src/stages/convolution.cpp
inference-engine/src/vpu/graph_transformer/src/stages/copy.cpp
inference-engine/src/vpu/graph_transformer/src/stages/crop.cpp
inference-engine/src/vpu/graph_transformer/src/stages/ctc_decoder.cpp
inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp
inference-engine/src/vpu/graph_transformer/src/stages/detection_output.cpp
inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp
inference-engine/src/vpu/graph_transformer/src/stages/exp.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/stages/expand.cpp [moved from inference-engine/src/vpu/graph_transformer/src/stages/broadcast.cpp with 62% similarity]
inference-engine/src/vpu/graph_transformer/src/stages/floor.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/stages/gather.cpp
inference-engine/src/vpu/graph_transformer/src/stages/gemm.cpp
inference-engine/src/vpu/graph_transformer/src/stages/grn.cpp
inference-engine/src/vpu/graph_transformer/src/stages/interp.cpp
inference-engine/src/vpu/graph_transformer/src/stages/log.cpp
inference-engine/src/vpu/graph_transformer/src/stages/mtcnn.cpp
inference-engine/src/vpu/graph_transformer/src/stages/mvn.cpp
inference-engine/src/vpu/graph_transformer/src/stages/none.cpp
inference-engine/src/vpu/graph_transformer/src/stages/norm.cpp
inference-engine/src/vpu/graph_transformer/src/stages/normalize.cpp
inference-engine/src/vpu/graph_transformer/src/stages/pad.cpp
inference-engine/src/vpu/graph_transformer/src/stages/permute.cpp
inference-engine/src/vpu/graph_transformer/src/stages/power.cpp
inference-engine/src/vpu/graph_transformer/src/stages/proposal.cpp
inference-engine/src/vpu/graph_transformer/src/stages/psroipooling.cpp
inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp
inference-engine/src/vpu/graph_transformer/src/stages/region_yolo.cpp
inference-engine/src/vpu/graph_transformer/src/stages/relu.cpp
inference-engine/src/vpu/graph_transformer/src/stages/reorg_yolo.cpp
inference-engine/src/vpu/graph_transformer/src/stages/resample.cpp
inference-engine/src/vpu/graph_transformer/src/stages/reshape.cpp
inference-engine/src/vpu/graph_transformer/src/stages/reverse_sequence.cpp
inference-engine/src/vpu/graph_transformer/src/stages/rnn.cpp
inference-engine/src/vpu/graph_transformer/src/stages/roipooling.cpp
inference-engine/src/vpu/graph_transformer/src/stages/scale.cpp
inference-engine/src/vpu/graph_transformer/src/stages/shrink.cpp
inference-engine/src/vpu/graph_transformer/src/stages/softmax.cpp
inference-engine/src/vpu/graph_transformer/src/stages/split.cpp
inference-engine/src/vpu/graph_transformer/src/stages/strided_slice.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/stages/tile.cpp
inference-engine/src/vpu/graph_transformer/src/stages/topk.cpp [new file with mode: 0644]
inference-engine/src/vpu/graph_transformer/src/stub_stage.cpp
inference-engine/src/vpu/graph_transformer/src/sw/post_op_stage.cpp
inference-engine/src/vpu/myriad_plugin/CMakeLists.txt
inference-engine/src/vpu/myriad_plugin/api/myriad_api.cpp
inference-engine/src/vpu/myriad_plugin/myriad_async_infer_request.cpp
inference-engine/src/vpu/myriad_plugin/myriad_config.cpp
inference-engine/src/vpu/myriad_plugin/myriad_config.h
inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp
inference-engine/src/vpu/myriad_plugin/myriad_executor.h
inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp
inference-engine/src/vpu/myriad_plugin/myriad_infer_request.h
inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
inference-engine/tests/CMakeLists.txt
inference-engine/tests/helpers/single_layer_common.cpp
inference-engine/tests/helpers/single_layer_common.hpp
inference-engine/tests/helpers/tests_common.hpp
inference-engine/tests/helpers/tests_vpu_common.cpp [new file with mode: 0644]
inference-engine/tests/helpers/tests_vpu_common.hpp [new file with mode: 0644]
inference-engine/tests/unit/CMakeLists.txt
inference-engine/tests/unit/builders/network_builder_test.cpp
inference-engine/tests/unit/builders/transform_network_test.cpp [deleted file]
inference-engine/tests/unit/cnn_network/cnn_layer_validation_tests.cpp
inference-engine/tests/unit/cnn_network/parameters.h
inference-engine/tests/unit/cnn_network/shapes.h
inference-engine/tests/unit/cnn_network/v2_format_parser_test.cpp
inference-engine/tests/unit/engines/gna/configuration_test.cpp
inference-engine/tests/unit/engines/gna/fp32_non_quantized_tests.cpp
inference-engine/tests/unit/engines/gna/gna_cppwraper_test.cpp
inference-engine/tests/unit/engines/gna/gna_graph_aot_test.cpp
inference-engine/tests/unit/engines/gna/gna_matcher.cpp
inference-engine/tests/unit/engines/gna/gna_matcher.hpp
inference-engine/tests/unit/engines/gna/i16_quantisation_test.cpp
inference-engine/tests/unit/engines/gna/matchers/diag_matcher.hpp
inference-engine/tests/unit/engines/gna/test_irs.cpp
inference-engine/tests/unit/engines/gna/test_irs.hpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tree_tests.cpp [deleted file]
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp [new file with mode: 0644]
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp [new file with mode: 0644]
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp [new file with mode: 0644]
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp [new file with mode: 0644]
inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
inference-engine/tests/unit/engines/mkldnn/graph/test_graph.hpp
inference-engine/tests/unit/engines/mkldnn/mem_solver_test.cpp [moved from inference-engine/tests/unit/mem_solver/mem_solver_test.cpp with 97% similarity]
inference-engine/tests/unit/graph_tools/graph_copy_tests.cpp
inference-engine/tests/unit/graph_tools/graph_test_base.hpp
inference-engine/tests/unit/graph_tools/graph_tools_functional_tests.cpp [new file with mode: 0644]
inference-engine/tests/unit/graph_tools/graph_tools_test.cpp
inference-engine/tests/unit/inference_engine_tests/blob_proxy_test.cpp
inference-engine/tests/unit/inference_engine_tests/blob_test.cpp
inference-engine/tests/unit/inference_engine_tests/cnn_network_test.cpp
inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executor_manager_tests.cpp
inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp
inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp
inference-engine/tests/unit/inference_engine_tests/device_tests.cpp [deleted file]
inference-engine/tests/unit/inference_engine_tests/local_test.cpp
inference-engine/tests/unit/inference_engine_tests/ngraph_reader_tests.cpp
inference-engine/tests/unit/inference_engine_tests/plugin_dispatcher_tests.cpp
inference-engine/tests/unit/inference_engine_tests/range_iterator_tests.cpp [deleted file]
inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.cpp
inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.hpp
inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_thread_safe_internal.hpp
inference-engine/tests/unit/mocks/mock_icnn_network.hpp
inference-engine/tests/unit/mocks/mock_not_empty_icnn_network.hpp
inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.cpp
inference-engine/tests/unit/transformations/eltwise_broadcast_test.cpp [deleted file]
inference-engine/tests/unit/transformations/sub_test.cpp [deleted file]
inference-engine/tests/unit/transformations/tranformations_test.hpp [deleted file]
inference-engine/thirdparty/CMakeLists.txt
inference-engine/thirdparty/ade
inference-engine/thirdparty/clDNN/CMakeLists.txt
inference-engine/thirdparty/clDNN/api/C/activation.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/activation_grad.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/apply_adam.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/arg_max_min.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/average_unpooling.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/batch_norm.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/batch_norm_grad.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/binary_convolution.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/border.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/broadcast.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/cldnn.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/concatenation.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/condition.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/contract.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/convolution.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/convolution_grad_input.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/convolution_grad_weights.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/crop.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/custom_gpu_primitive.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/data.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/deconvolution.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/deformable_conv.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/deformable_interp.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/depth_to_space.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/detection_output.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/detection_output_sort.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/eltwise.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/embed.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/fully_connected.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/fully_connected_grad_input.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/fully_connected_grad_weights.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/gather.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/gemm.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/index_select.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/input_layout.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/lookup_table.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/lrn.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/lstm.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/lstm_dynamic.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/max_unpooling.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/mutable_data.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/mvn.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/normalize.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/one_hot.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/permute.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/pooling.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/prior_box.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/proposal.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/reduce.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/region_yolo.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/reorder.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/reorg_yolo.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/reshape.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/reverse_sequence.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/roi_pooling.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/scale.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/scale_grad_input.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/scale_grad_weights.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/select.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/shuffle_channels.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/softmax.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/softmax_loss_grad.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/split.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/strided_slice.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/tile.h [deleted file]
inference-engine/thirdparty/clDNN/api/C/upsampling.h [deleted file]
inference-engine/thirdparty/clDNN/api/CPP/compounds.h [deleted file]
inference-engine/thirdparty/clDNN/api/CPP/event.hpp [deleted file]
inference-engine/thirdparty/clDNN/api/CPP/network.hpp [deleted file]
inference-engine/thirdparty/clDNN/api/CPP/primitive.hpp [deleted file]
inference-engine/thirdparty/clDNN/api/activation.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/activation.hpp with 55% similarity]
inference-engine/thirdparty/clDNN/api/activation_grad.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/activation_grad.hpp with 74% similarity]
inference-engine/thirdparty/clDNN/api/apply_adam.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/apply_adam.hpp with 81% similarity]
inference-engine/thirdparty/clDNN/api/arg_max_min.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/arg_max_min.hpp with 78% similarity]
inference-engine/thirdparty/clDNN/api/average_unpooling.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/average_unpooling.hpp with 81% similarity]
inference-engine/thirdparty/clDNN/api/batch_norm.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/batch_norm.hpp with 90% similarity]
inference-engine/thirdparty/clDNN/api/batch_norm_grad.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/batch_norm_grad.hpp with 81% similarity]
inference-engine/thirdparty/clDNN/api/binary_convolution.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/binary_convolution.hpp with 82% similarity]
inference-engine/thirdparty/clDNN/api/border.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/border.hpp with 86% similarity]
inference-engine/thirdparty/clDNN/api/broadcast.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/broadcast.hpp with 89% similarity]
inference-engine/thirdparty/clDNN/api/cldnn.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/cldnn_defs.h with 67% similarity]
inference-engine/thirdparty/clDNN/api/compounds.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/api/concatenation.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/concatenation.hpp with 77% similarity]
inference-engine/thirdparty/clDNN/api/condition.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/condition.hpp with 81% similarity]
inference-engine/thirdparty/clDNN/api/contract.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/contract.hpp with 82% similarity]
inference-engine/thirdparty/clDNN/api/convolution.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/convolution.hpp with 75% similarity]
inference-engine/thirdparty/clDNN/api/convolution_grad_input.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/convolution_grad_input.hpp with 94% similarity]
inference-engine/thirdparty/clDNN/api/convolution_grad_weights.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/convolution_grad_weights.hpp with 75% similarity]
inference-engine/thirdparty/clDNN/api/crop.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/crop.hpp with 92% similarity]
inference-engine/thirdparty/clDNN/api/custom_gpu_primitive.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/custom_gpu_primitive.hpp with 67% similarity]
inference-engine/thirdparty/clDNN/api/data.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/data.hpp with 83% similarity]
inference-engine/thirdparty/clDNN/api/deconvolution.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/deconvolution.hpp with 77% similarity]
inference-engine/thirdparty/clDNN/api/depth_to_space.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/depth_to_space.hpp with 79% similarity]
inference-engine/thirdparty/clDNN/api/detection_output.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/detection_output.hpp with 74% similarity]
inference-engine/thirdparty/clDNN/api/eltwise.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/eltwise.hpp with 65% similarity]
inference-engine/thirdparty/clDNN/api/embed.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/embed.hpp with 85% similarity]
inference-engine/thirdparty/clDNN/api/engine.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/engine.hpp with 61% similarity]
inference-engine/thirdparty/clDNN/api/event.hpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/api/fully_connected.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/fully_connected.hpp with 76% similarity]
inference-engine/thirdparty/clDNN/api/fully_connected_grad_input.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/fully_connected_grad_input.hpp with 81% similarity]
inference-engine/thirdparty/clDNN/api/fully_connected_grad_weights.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/fully_connected_grad_weights.hpp with 86% similarity]
inference-engine/thirdparty/clDNN/api/gather.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/gather.hpp with 73% similarity]
inference-engine/thirdparty/clDNN/api/gather_tree.hpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/api/gemm.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/gemm.hpp with 82% similarity]
inference-engine/thirdparty/clDNN/api/index_select.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/index_select.hpp with 89% similarity]
inference-engine/thirdparty/clDNN/api/input_layout.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/input_layout.hpp with 80% similarity]
inference-engine/thirdparty/clDNN/api/layout.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/layout.hpp with 93% similarity]
inference-engine/thirdparty/clDNN/api/lookup_table.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/lookup_table.hpp with 79% similarity]
inference-engine/thirdparty/clDNN/api/lrn.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/lrn.hpp with 79% similarity]
inference-engine/thirdparty/clDNN/api/lstm.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/lstm.hpp with 70% similarity]
inference-engine/thirdparty/clDNN/api/lstm_dynamic.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/lstm_dynamic.hpp with 82% similarity]
inference-engine/thirdparty/clDNN/api/max_unpooling.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/max_unpooling.hpp with 84% similarity]
inference-engine/thirdparty/clDNN/api/memory.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/memory.hpp with 65% similarity]
inference-engine/thirdparty/clDNN/api/meta_utils.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/meta_utils.hpp with 98% similarity]
inference-engine/thirdparty/clDNN/api/mutable_data.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/mutable_data.hpp with 84% similarity]
inference-engine/thirdparty/clDNN/api/mvn.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/mvn.hpp with 79% similarity]
inference-engine/thirdparty/clDNN/api/network.hpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/api/normalize.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/normalize.hpp with 86% similarity]
inference-engine/thirdparty/clDNN/api/one_hot.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/one_hot.hpp with 86% similarity]
inference-engine/thirdparty/clDNN/api/permute.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/permute.hpp with 83% similarity]
inference-engine/thirdparty/clDNN/api/pooling.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/pooling.hpp with 90% similarity]
inference-engine/thirdparty/clDNN/api/primitive.hpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/api/prior_box.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/prior_box.hpp with 76% similarity]
inference-engine/thirdparty/clDNN/api/profiling.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/profiling.hpp with 97% similarity]
inference-engine/thirdparty/clDNN/api/program.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/program.hpp with 58% similarity]
inference-engine/thirdparty/clDNN/api/proposal.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/proposal.hpp with 75% similarity]
inference-engine/thirdparty/clDNN/api/pyramid_roi_align.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/pyramid_roi_align.hpp with 83% similarity]
inference-engine/thirdparty/clDNN/api/quantize.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/quantize.hpp with 85% similarity]
inference-engine/thirdparty/clDNN/api/reduce.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/reduce.hpp with 65% similarity]
inference-engine/thirdparty/clDNN/api/region_yolo.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/region_yolo.hpp with 76% similarity]
inference-engine/thirdparty/clDNN/api/reorder.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/reorder.hpp with 81% similarity]
inference-engine/thirdparty/clDNN/api/reorg_yolo.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/reorg_yolo.hpp with 82% similarity]
inference-engine/thirdparty/clDNN/api/reshape.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/reshape.hpp with 85% similarity]
inference-engine/thirdparty/clDNN/api/reverse_sequence.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/reverse_sequence.hpp with 86% similarity]
inference-engine/thirdparty/clDNN/api/roi_pooling.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/roi_pooling.hpp with 71% similarity]
inference-engine/thirdparty/clDNN/api/scale.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/scale.hpp with 89% similarity]
inference-engine/thirdparty/clDNN/api/scale_grad_input.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/scale_grad_input.hpp with 82% similarity]
inference-engine/thirdparty/clDNN/api/scale_grad_weights.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/scale_grad_weights.hpp with 89% similarity]
inference-engine/thirdparty/clDNN/api/select.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/select.hpp with 86% similarity]
inference-engine/thirdparty/clDNN/api/shuffle_channels.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/shuffle_channels.hpp with 83% similarity]
inference-engine/thirdparty/clDNN/api/softmax.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/softmax.hpp with 79% similarity]
inference-engine/thirdparty/clDNN/api/softmax_loss_grad.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/softmax_loss_grad.hpp with 84% similarity]
inference-engine/thirdparty/clDNN/api/split.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/split.hpp with 77% similarity]
inference-engine/thirdparty/clDNN/api/strided_slice.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/strided_slice.hpp with 78% similarity]
inference-engine/thirdparty/clDNN/api/tensor.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/tensor.hpp with 71% similarity]
inference-engine/thirdparty/clDNN/api/tile.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/tile.hpp with 72% similarity]
inference-engine/thirdparty/clDNN/api/topology.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/topology.hpp with 54% similarity]
inference-engine/thirdparty/clDNN/api/upsampling.hpp [moved from inference-engine/thirdparty/clDNN/api/CPP/upsampling.hpp with 72% similarity]
inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_bn_scale.h [deleted file]
inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_eltwise.h [deleted file]
inference-engine/thirdparty/clDNN/api_extension/C/lstm_dynamic_input.h [deleted file]
inference-engine/thirdparty/clDNN/api_extension/C/lstm_dynamic_timeloop.h [deleted file]
inference-engine/thirdparty/clDNN/api_extension/fused_conv_bn_scale.hpp [moved from inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_bn_scale.hpp with 70% similarity]
inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp [moved from inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_eltwise.hpp with 64% similarity]
inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_input.hpp [moved from inference-engine/thirdparty/clDNN/api_extension/CPP/lstm_dynamic_input.hpp with 85% similarity]
inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_timeloop.hpp [moved from inference-engine/thirdparty/clDNN/api_extension/CPP/lstm_dynamic_timeloop.hpp with 81% similarity]
inference-engine/thirdparty/clDNN/api_test_builds/CMakeLists.txt
inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt
inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.cpp [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_generic.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_params.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_f16.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_f16.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_f16_1x1.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_f16_1x1.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_f16_depthwise.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_f16_depthwise.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bfyx_f16.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_to_bfyx_f16.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_f16.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_f16.h [moved from inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.h with 69% similarity]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_f16_fp16.h [moved from inference-engine/thirdparty/clDNN/api/C/pyramid_roi_align.h with 57% similarity]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_f16_fp32.h [moved from inference-engine/thirdparty/clDNN/api/C/quantize.h with 53% similarity]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_ref.cpp [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_3x3.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.cpp [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/deformable_convolution_kernel_bfyx_conv.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfzyx_f16.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfzyx_f16.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.cpp [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_ref.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_ref.h [moved from inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.h with 63% similarity]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_selector.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_selector.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.h [moved from inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_ref.h with 63% similarity]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_ref_kernel.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h [moved from inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.h with 53% similarity]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/qunatize_kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_binary.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_items_class_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache.json
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_tutorial.cl [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_axis.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/binary_convolution_gpu_1x1.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/binary_convolution_gpu_1x1_b_fs_yx_fsv16.cl [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/binary_convolution_gpu_generic.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/binary_convolution_gpu_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/border_gpu_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/concatenation_gpu_simple_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16_depthwise.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfzyx_ref.cl [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_fs_byx_fsv32_depthwise.cl [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_tutorial.cl [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_image_tutorial.cl [deleted file]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_tree_gpu_ref.cl [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_fwd_data_f16.cl [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_fwd_data_f32.cl [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_input_bfyx_opt.cl [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_input_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_ref_accross_channels.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_ref_within_channels.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/ocl_types.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/permute_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_ref.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl [moved from inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_ref.cl with 74% similarity]
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data_binary.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data_byxf_f32_to_byx8_f4_i8.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data_fast_b1.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data_to_yxfb_batched.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
inference-engine/thirdparty/clDNN/kernel_selector/core/common/training_params.h
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_runner_interface.h
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp
inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
inference-engine/thirdparty/clDNN/src/CMakeLists.txt
inference-engine/thirdparty/clDNN/src/activation.cpp
inference-engine/thirdparty/clDNN/src/activation_grad.cpp
inference-engine/thirdparty/clDNN/src/apply_adam.cpp
inference-engine/thirdparty/clDNN/src/arg_max_min.cpp
inference-engine/thirdparty/clDNN/src/average_unpooling.cpp
inference-engine/thirdparty/clDNN/src/batch_norm.cpp
inference-engine/thirdparty/clDNN/src/batch_norm_grad.cpp
inference-engine/thirdparty/clDNN/src/binary_convolution.cpp
inference-engine/thirdparty/clDNN/src/border.cpp
inference-engine/thirdparty/clDNN/src/broadcast.cpp
inference-engine/thirdparty/clDNN/src/cldnn.cpp
inference-engine/thirdparty/clDNN/src/concatenation.cpp
inference-engine/thirdparty/clDNN/src/condition.cpp
inference-engine/thirdparty/clDNN/src/contract.cpp
inference-engine/thirdparty/clDNN/src/convolution.cpp
inference-engine/thirdparty/clDNN/src/convolution_grad_weights.cpp
inference-engine/thirdparty/clDNN/src/crop.cpp
inference-engine/thirdparty/clDNN/src/custom_gpu_primitive.cpp
inference-engine/thirdparty/clDNN/src/data.cpp
inference-engine/thirdparty/clDNN/src/deconvolution.cpp
inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp
inference-engine/thirdparty/clDNN/src/depth_to_space.cpp
inference-engine/thirdparty/clDNN/src/detection_output.cpp
inference-engine/thirdparty/clDNN/src/eltwise.cpp
inference-engine/thirdparty/clDNN/src/embed.cpp
inference-engine/thirdparty/clDNN/src/engine.cpp
inference-engine/thirdparty/clDNN/src/event.cpp
inference-engine/thirdparty/clDNN/src/fully_connected.cpp
inference-engine/thirdparty/clDNN/src/fully_connected_grad_input.cpp
inference-engine/thirdparty/clDNN/src/fully_connected_grad_weights.cpp
inference-engine/thirdparty/clDNN/src/fused_conv_bn_scale.cpp
inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
inference-engine/thirdparty/clDNN/src/gather.cpp
inference-engine/thirdparty/clDNN/src/gather_tree.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/gemm.cpp
inference-engine/thirdparty/clDNN/src/generic_layer.cpp
inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/activation_grad_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/apply_adam_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/batch_norm_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/batch_norm_grad_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/cache/kernels/pooling_gpu_bfyx_average_opt.cl
inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.cpp
inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.h
inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp
inference-engine/thirdparty/clDNN/src/gpu/confiugration.h
inference-engine/thirdparty/clDNN/src/gpu/contract_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/convolution_grad_weights_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/embed_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/engine_info.cpp
inference-engine/thirdparty/clDNN/src/gpu/engine_info.h
inference-engine/thirdparty/clDNN/src/gpu/events_pool.h
inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/fully_connected_grad_input_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/fully_connected_grad_weights_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/fused_conv_bn_scale_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/index_select_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp
inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h
inference-engine/thirdparty/clDNN/src/gpu/lookup_table_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp
inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h
inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.h
inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp
inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h
inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp
inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h
inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp
inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/scale_grad_input_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/scale_grad_weights_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/softmax_loss_grad_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/upsampling_gpu.cpp
inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/prep_opt_depthwise_sep_post.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_binarization.cpp [deleted file]
inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_depthwise_sep_opt.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/reverse_optional_nodes_outputs.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp
inference-engine/thirdparty/clDNN/src/half.cpp
inference-engine/thirdparty/clDNN/src/include/activation_grad_inst.h
inference-engine/thirdparty/clDNN/src/include/activation_inst.h
inference-engine/thirdparty/clDNN/src/include/api_impl.h [deleted file]
inference-engine/thirdparty/clDNN/src/include/apply_adam_inst.h
inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h
inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h
inference-engine/thirdparty/clDNN/src/include/batch_norm_grad_inst.h
inference-engine/thirdparty/clDNN/src/include/batch_norm_inst.h
inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h
inference-engine/thirdparty/clDNN/src/include/border_inst.h
inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h
inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h
inference-engine/thirdparty/clDNN/src/include/condition_inst.h
inference-engine/thirdparty/clDNN/src/include/contract_inst.h
inference-engine/thirdparty/clDNN/src/include/convolution_grad_weights_inst.h
inference-engine/thirdparty/clDNN/src/include/convolution_inst.h
inference-engine/thirdparty/clDNN/src/include/crop_inst.h
inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h
inference-engine/thirdparty/clDNN/src/include/data_inst.h
inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h
inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h
inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h
inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
inference-engine/thirdparty/clDNN/src/include/embed_inst.h
inference-engine/thirdparty/clDNN/src/include/engine_impl.h
inference-engine/thirdparty/clDNN/src/include/error_handler.h
inference-engine/thirdparty/clDNN/src/include/event_impl.h
inference-engine/thirdparty/clDNN/src/include/fully_connected_grad_input_inst.h
inference-engine/thirdparty/clDNN/src/include/fully_connected_grad_weights_inst.h
inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h
inference-engine/thirdparty/clDNN/src/include/fused_conv_bn_scale_inst.h
inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h
inference-engine/thirdparty/clDNN/src/include/gather_inst.h
inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/include/gemm_inst.h
inference-engine/thirdparty/clDNN/src/include/generic_layer.h [deleted file]
inference-engine/thirdparty/clDNN/src/include/generic_layer.hpp
inference-engine/thirdparty/clDNN/src/include/index_select_inst.h
inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h
inference-engine/thirdparty/clDNN/src/include/internal_primitive.h
inference-engine/thirdparty/clDNN/src/include/internal_primitive_type_base.h
inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h
inference-engine/thirdparty/clDNN/src/include/lookup_table_inst.h
inference-engine/thirdparty/clDNN/src/include/lrn_inst.h
inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h
inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h
inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h
inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h
inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h
inference-engine/thirdparty/clDNN/src/include/lstm_inst.h
inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h
inference-engine/thirdparty/clDNN/src/include/memory_impl.h
inference-engine/thirdparty/clDNN/src/include/memory_pool.h
inference-engine/thirdparty/clDNN/src/include/meta_utils.h
inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h
inference-engine/thirdparty/clDNN/src/include/mvn_inst.h
inference-engine/thirdparty/clDNN/src/include/network_impl.h
inference-engine/thirdparty/clDNN/src/include/normalize_inst.h
inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h
inference-engine/thirdparty/clDNN/src/include/pass_manager.h
inference-engine/thirdparty/clDNN/src/include/permute_inst.h
inference-engine/thirdparty/clDNN/src/include/pooling_inst.h
inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
inference-engine/thirdparty/clDNN/src/include/primitive_type.h
inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h
inference-engine/thirdparty/clDNN/src/include/program_helpers.h
inference-engine/thirdparty/clDNN/src/include/program_impl.h
inference-engine/thirdparty/clDNN/src/include/program_node.h
inference-engine/thirdparty/clDNN/src/include/proposal_inst.h
inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h
inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
inference-engine/thirdparty/clDNN/src/include/reduce_inst.h
inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h
inference-engine/thirdparty/clDNN/src/include/reorder_inst.h
inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h
inference-engine/thirdparty/clDNN/src/include/reshape_inst.h
inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h
inference-engine/thirdparty/clDNN/src/include/roi_pooling_inst.h
inference-engine/thirdparty/clDNN/src/include/scale_grad_input_inst.h
inference-engine/thirdparty/clDNN/src/include/scale_grad_weights_inst.h
inference-engine/thirdparty/clDNN/src/include/scale_inst.h
inference-engine/thirdparty/clDNN/src/include/select_inst.h
inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h
inference-engine/thirdparty/clDNN/src/include/sliding_window_utils.h
inference-engine/thirdparty/clDNN/src/include/softmax_inst.h
inference-engine/thirdparty/clDNN/src/include/softmax_loss_grad_inst.h
inference-engine/thirdparty/clDNN/src/include/split_inst.h
inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h
inference-engine/thirdparty/clDNN/src/include/tile_inst.h
inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
inference-engine/thirdparty/clDNN/src/include/topology_impl.h
inference-engine/thirdparty/clDNN/src/include/upsampling_inst.h
inference-engine/thirdparty/clDNN/src/index_select.cpp
inference-engine/thirdparty/clDNN/src/input_layout.cpp
inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
inference-engine/thirdparty/clDNN/src/lookup_table.cpp
inference-engine/thirdparty/clDNN/src/lrn.cpp
inference-engine/thirdparty/clDNN/src/lstm.cpp
inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp
inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp
inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp
inference-engine/thirdparty/clDNN/src/lstm_elt.cpp
inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp
inference-engine/thirdparty/clDNN/src/max_unpooling.cpp
inference-engine/thirdparty/clDNN/src/memory.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/memory_pool.cpp
inference-engine/thirdparty/clDNN/src/mutable_data.cpp
inference-engine/thirdparty/clDNN/src/mvn.cpp
inference-engine/thirdparty/clDNN/src/network.cpp
inference-engine/thirdparty/clDNN/src/normalize.cpp
inference-engine/thirdparty/clDNN/src/one_hot.cpp
inference-engine/thirdparty/clDNN/src/permute.cpp
inference-engine/thirdparty/clDNN/src/pooling.cpp
inference-engine/thirdparty/clDNN/src/prior_box.cpp
inference-engine/thirdparty/clDNN/src/program.cpp
inference-engine/thirdparty/clDNN/src/program_helpers.cpp
inference-engine/thirdparty/clDNN/src/program_node.cpp
inference-engine/thirdparty/clDNN/src/proposal.cpp
inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp
inference-engine/thirdparty/clDNN/src/quantize.cpp
inference-engine/thirdparty/clDNN/src/reduce.cpp
inference-engine/thirdparty/clDNN/src/region_yolo.cpp
inference-engine/thirdparty/clDNN/src/reorder.cpp
inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp
inference-engine/thirdparty/clDNN/src/reshape.cpp
inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp
inference-engine/thirdparty/clDNN/src/roi_pooling.cpp
inference-engine/thirdparty/clDNN/src/scale.cpp
inference-engine/thirdparty/clDNN/src/scale_grad_input.cpp
inference-engine/thirdparty/clDNN/src/scale_grad_weights.cpp
inference-engine/thirdparty/clDNN/src/select.cpp
inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp
inference-engine/thirdparty/clDNN/src/softmax.cpp
inference-engine/thirdparty/clDNN/src/softmax_loss_grad.cpp
inference-engine/thirdparty/clDNN/src/split.cpp
inference-engine/thirdparty/clDNN/src/strided_slice.cpp
inference-engine/thirdparty/clDNN/src/tile.cpp
inference-engine/thirdparty/clDNN/src/topology.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/src/upsampling.cpp
inference-engine/thirdparty/clDNN/tests/CMakeLists.txt
inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp
inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp
inference-engine/thirdparty/clDNN/tests/module_tests/test_uqr_distribution.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/apply_adam_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/barriers_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_grad_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/binary_convolution_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/contract_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_input_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_weights_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/embed_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_input_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_weights_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp [new file with mode: 0644]
inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/index_select_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/lookup_table_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/lstm_dynamic_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/proposal_test_data.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/quantize_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/reduce_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/removing_output_node_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_input_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_weights_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/softmax_loss_grad_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/spatial_concatenate_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/tensor_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/upsampling_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests/test_utils/float16.h
inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp
inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.h
inference-engine/thirdparty/clDNN/tests/test_utils/random_gen.h
inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp
inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
inference-engine/thirdparty/clDNN/tests/test_utils/uniform_quantized_real_distribution.hpp
inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt
inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp
inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/prepare_conv_eltw_fusing.cpp
inference-engine/thirdparty/fluid/checksum.txt
inference-engine/thirdparty/fluid/modules/gapi/CMakeLists.txt
inference-engine/thirdparty/fluid/modules/gapi/cmake/standalone.cmake
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/core.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/core.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidbuffer.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garg.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garray.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gasync_context.hpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcall.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcommon.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompiled.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompiled_async.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompoundkernel.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcomputation.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcomputation_async.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gkernel.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmat.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmetaarg.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gproto.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/core.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/ggpukernel.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/imgproc.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gscalar.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtransform.hpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtyped.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/goclkernel.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/operators.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/assert.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/convert.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/cvdefs.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/exports.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/mat.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/render.hpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/any.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/optional.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/util.hpp
inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/variant.hpp
inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.hpp
inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp
inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp
inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp
inference-engine/thirdparty/fluid/modules/gapi/perf/perf_precomp.hpp
inference-engine/thirdparty/fluid/modules/gapi/samples/api_ref_snippets.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/garray.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend_priv.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gkernel.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gmat.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gorigin.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/gscalar.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_core.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_imgproc.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/operators.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/api/render.cpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/src/api/render_priv.hpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundbackend.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundkernel.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpukernel.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidcore.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclkernel.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/dump_dot.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/exec.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/kernels.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/meta.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/passes.hpp
inference-engine/thirdparty/fluid/modules/gapi/src/executor/gasync.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.cpp
inference-engine/thirdparty/fluid/modules/gapi/src/precomp.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_compoundkernel_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests_inl.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests_inl.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests.cpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests.hpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests_inl.hpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_common.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_helpers.hpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_fluid.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_fluid.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_cpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_fluid.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_render_tests_cpu.cpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_async_test.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_basic_hetero_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_desc_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_parallel_rois_test.cpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_resize_test.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcomputation_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gpu_test.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_kernel_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_mock_kernels.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_sample_pipelines.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_transform_tests.cpp [new file with mode: 0644]
inference-engine/thirdparty/fluid/modules/gapi/test/gapi_util_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_core_tests_gpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_operators_tests_gpu.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_recompilation_test.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/opencl_kernels_test_gapi.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/own/gapi_types_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/own/mat_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/own/scalar_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/test_precomp.hpp
inference-engine/thirdparty/fluid/modules/gapi/test/util/any_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/util/optional_tests.cpp
inference-engine/thirdparty/fluid/modules/gapi/test/util/variant_tests.cpp
inference-engine/thirdparty/fluid/revision.txt
inference-engine/thirdparty/mkl-dnn/include/mkldnn.h
inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp
inference-engine/thirdparty/mkl-dnn/src/common/memory.cpp
inference-engine/thirdparty/mkl-dnn/src/common/primitive.hpp
inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_isa_traits.hpp
inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.hpp
inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_bin_conv_kernel.cpp
inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp
inference-engine/thirdparty/movidius/CMakeLists.txt
inference-engine/thirdparty/movidius/WinPthread/pthread_semaphore.c [new file with mode: 0644]
inference-engine/thirdparty/movidius/WinPthread/pthread_semaphore.h [new file with mode: 0644]
inference-engine/thirdparty/movidius/XLink/CMakeLists.txt
inference-engine/thirdparty/movidius/XLink/XLink.cmake [new file with mode: 0644]
inference-engine/thirdparty/movidius/XLink/pc/XLinkPlatform.c
inference-engine/thirdparty/movidius/XLink/pc/pcie_host.c
inference-engine/thirdparty/movidius/XLink/pc/pcie_host.h
inference-engine/thirdparty/movidius/XLink/pc/usb_boot.c
inference-engine/thirdparty/movidius/XLink/pc/usb_boot.h
inference-engine/thirdparty/movidius/XLink/shared/XLink.c
inference-engine/thirdparty/movidius/XLink/shared/XLink.h
inference-engine/thirdparty/movidius/XLink/shared/XLinkDispatcher.c
inference-engine/thirdparty/movidius/XLink/shared/XLinkDispatcher.h
inference-engine/thirdparty/movidius/XLink/shared/XLinkPlatform.h
inference-engine/thirdparty/movidius/XLink/shared/XLinkPlatform_tool.h [new file with mode: 0644]
inference-engine/thirdparty/movidius/XLink/shared/XLinkPrivateDefines.h
inference-engine/thirdparty/movidius/XLink/shared/XLinkPublicDefines.h
inference-engine/thirdparty/movidius/XLink/shared/XLinkVersion.h
inference-engine/thirdparty/movidius/XLink/shared/XLink_tool.h [new file with mode: 0644]
inference-engine/thirdparty/movidius/XLink/tests/CMakeLists.txt [deleted file]
inference-engine/thirdparty/movidius/XLink/tests/XLink_tests.cpp [deleted file]
inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
inference-engine/thirdparty/movidius/mvnc/include/mvnc.h
inference-engine/thirdparty/movidius/mvnc/include/mvnc_data.h
inference-engine/thirdparty/movidius/mvnc/include/mvnc_tool.h
inference-engine/thirdparty/movidius/mvnc/include/ncCommPrivate.h
inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h
inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c
inference-engine/thirdparty/movidius/mvnc/src/mvnc_data.c
inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp
inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.hpp
inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp
inference-engine/thirdparty/movidius/shared/include/mvLog.h
inference-engine/thirdparty/movidius/shared/include/mvStringUtils.h
inference-engine/thirdparty/movidius/shared/src/mvStringUtils.c
inference-engine/thirdparty/movidius/watchdog/watchdog.cpp
inference-engine/thirdparty/movidius/watchdog/watchdog.h
inference-engine/thirdparty/ngraph.cmake
inference-engine/thirdparty/stb_lib/CMakeLists.txt
inference-engine/tools/CMakeLists.txt
inference-engine/tools/accuracy_checker_tool/accuracy_check.py
inference-engine/tools/accuracy_checker_tool/convert_annotation.py
inference-engine/tools/benchmark_tool/README.md
inference-engine/tools/benchmark_tool/benchmark.py [deleted file]
inference-engine/tools/benchmark_tool/benchmark_app.py [new file with mode: 0644]
inference-engine/tools/benchmark_tool/parameters.py [new file with mode: 0644]
inference-engine/tools/benchmark_tool/requirements.txt
inference-engine/tools/calibration_tool/README.md
inference-engine/tools/calibration_tool/statistics_collector/CMakeLists.txt
inference-engine/tools/calibration_tool/statistics_collector/data_stats.cpp
inference-engine/tools/calibration_tool/statistics_collector/data_stats.hpp
inference-engine/tools/calibration_tool/statistics_collector/main.cpp
inference-engine/tools/calibration_tool/statistics_collector/statistics_processor.cpp
inference-engine/tools/vpu/CMakeLists.txt
inference-engine/tools/vpu/common/vpu_tools_common.cpp
inference-engine/tools/vpu/vpu_compile/CMakeLists.txt
inference-engine/tools/vpu/vpu_compile/main.cpp
inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt [new file with mode: 0644]
inference-engine/tools/vpu/vpu_perfcheck/main.cpp [new file with mode: 0644]
inference-engine/tools/vpu/vpu_profile/README.md
inference-engine/tools/vpu/vpu_profile/main.cpp
model-optimizer/README.md
model-optimizer/extensions/back/CutMemory.py [new file with mode: 0644]
model-optimizer/extensions/back/CutMemory_test.py [new file with mode: 0644]
model-optimizer/extensions/back/FuseReshapesSequence.py
model-optimizer/extensions/back/Gather0D.py [new file with mode: 0644]
model-optimizer/extensions/back/ReduceToPooling.py
model-optimizer/extensions/back/ReduceToPooling_test.py
model-optimizer/extensions/back/ScalarConstNormalize.py
model-optimizer/extensions/front/mxnet/conv_ext.py
model-optimizer/extensions/front/mxnet/conv_ext_test.py
model-optimizer/extensions/front/mxnet/elementwise_ext.py
model-optimizer/extensions/front/mxnet/eltwise_scalar_replacers.py
model-optimizer/extensions/front/mxnet/expand_dims_ext.py [new file with mode: 0644]
model-optimizer/extensions/front/onnx/constant_of_shape_ext.py [new file with mode: 0644]
model-optimizer/extensions/front/onnx/constant_of_shape_to_broadcast.py [new file with mode: 0644]
model-optimizer/extensions/front/onnx/elementwise_ext.py
model-optimizer/extensions/front/onnx/expand_ext.py [new file with mode: 0644]
model-optimizer/extensions/front/onnx/floor_ext.py [new file with mode: 0644]
model-optimizer/extensions/front/onnx/not_ext.py [moved from inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/__init__.py with 63% similarity]
model-optimizer/extensions/front/onnx/reduce_min_ext.py [new file with mode: 0644]
model-optimizer/extensions/front/onnx/slice_ext.py
model-optimizer/extensions/front/onnx/top_k_ext.py [new file with mode: 0644]
model-optimizer/extensions/front/reduce_axis_normalizer.py
model-optimizer/extensions/front/tf/BatchToSpaceNDToUpsample.py [new file with mode: 0644]
model-optimizer/extensions/front/tf/InterpolateTransposes.py [new file with mode: 0644]
model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
model-optimizer/extensions/front/tf/elementwise_ext.py
model-optimizer/extensions/front/tf/sparse_fill_empty_rows_ext.py [new file with mode: 0644]
model-optimizer/extensions/front/tf/swish.py [new file with mode: 0644]
model-optimizer/extensions/front/tf/swish_test.py [new file with mode: 0644]
model-optimizer/extensions/front/tf/unique_ext.py [new file with mode: 0644]
model-optimizer/extensions/middle/BiasAddBroadcasting.py [new file with mode: 0644]
model-optimizer/extensions/middle/Cast.py
model-optimizer/extensions/middle/EltwiseInputReshape.py
model-optimizer/extensions/middle/InsertSelect.py [new file with mode: 0644]
model-optimizer/extensions/middle/InsertSelect_test.py [new file with mode: 0644]
model-optimizer/extensions/middle/RemoveDuplicationMemory.py
model-optimizer/extensions/middle/RemoveDuplicationMemory_test.py
model-optimizer/extensions/middle/RemoveUselessCrops.py
model-optimizer/extensions/middle/RemoveUselessCrops_test.py
model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice.py
model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice_test.py
model-optimizer/extensions/middle/ReplacePNorm.py [new file with mode: 0644]
model-optimizer/extensions/middle/ReplacePNormNodePattern_test.py [new file with mode: 0644]
model-optimizer/extensions/middle/ReplaceSpliceNodePattern.py
model-optimizer/extensions/middle/ReplaceSpliceNodePattern_test.py
model-optimizer/extensions/middle/SliceConvert_test.py
model-optimizer/extensions/middle/SliceConverter.py
model-optimizer/extensions/middle/UpsampleToResample.py
model-optimizer/extensions/ops/Cast.py
model-optimizer/extensions/ops/ReduceOps.py
model-optimizer/extensions/ops/activation_ops.py
model-optimizer/extensions/ops/detectionoutput_onnx.py
model-optimizer/extensions/ops/elementwise.py
model-optimizer/extensions/ops/gather.py
model-optimizer/extensions/ops/non_max_suppression.py [new file with mode: 0644]
model-optimizer/extensions/ops/pnorm.py [new file with mode: 0644]
model-optimizer/extensions/ops/range.py
model-optimizer/extensions/ops/roifeatureextractor_onnx.py
model-optimizer/extensions/ops/sparse_fill_empty_rows.py [new file with mode: 0644]
model-optimizer/extensions/ops/sparse_fill_empty_rows_test.py [new file with mode: 0644]
model-optimizer/extensions/ops/splice.py
model-optimizer/extensions/ops/unique.py [new file with mode: 0644]
model-optimizer/extensions/ops/unique_test.py [new file with mode: 0644]
model-optimizer/mo/back/ie_ir_ver_2/emitter.py
model-optimizer/mo/front/common/partial_infer/concat.py
model-optimizer/mo/front/common/partial_infer/eltwise.py
model-optimizer/mo/front/common/partial_infer/multi_box_detection.py
model-optimizer/mo/front/common/partial_infer/space_to_batch.py
model-optimizer/mo/front/common/partial_infer/split.py
model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext.py [new file with mode: 0644]
model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext_test.py [new file with mode: 0644]
model-optimizer/mo/front/kaldi/extractors/splice_component_ext.py
model-optimizer/mo/front/kaldi/loader/loader.py
model-optimizer/mo/front/kaldi/loader/loader_test.py
model-optimizer/mo/front/kaldi/loader/utils.py
model-optimizer/mo/front/kaldi/loader/utils_test.py
model-optimizer/mo/front/mxnet/extractor.py
model-optimizer/mo/front/tf/extractor.py
model-optimizer/mo/graph/graph.py
model-optimizer/mo/graph/port.py
model-optimizer/mo/middle/passes/convert_data_type.py
model-optimizer/mo/middle/passes/eliminate.py
model-optimizer/mo/ops/broadcast.py
model-optimizer/mo/ops/constant_of_shape.py [new file with mode: 0644]
model-optimizer/mo/ops/crop.py
model-optimizer/mo/ops/expand_dims.py
model-optimizer/mo/ops/memoryoffset.py
model-optimizer/mo/ops/slice.py
model-optimizer/mo/ops/squeeze.py
model-optimizer/mo/ops/strided_slice.py
model-optimizer/mo/ops/unsqueeze.py
model-optimizer/mo/pipeline/caffe.py
model-optimizer/mo/pipeline/kaldi.py
model-optimizer/mo/pipeline/mx.py
model-optimizer/mo/pipeline/onnx.py
model-optimizer/mo/pipeline/tf.py
model-optimizer/mo/utils/cli_parser.py
model-optimizer/mo/utils/error.py
model-optimizer/mo/utils/graph.py
model-optimizer/mo/utils/logger.py
model-optimizer/mo/utils/pipeline_config.py
model-optimizer/mo/utils/pipeline_config_test.py
model-optimizer/mo/utils/unittest/graph.py
model-optimizer/mo/utils/unittest/ir_engine.py [new file with mode: 0644]
model-optimizer/mo/utils/unittest/ir_engine_test.py [new file with mode: 0644]
model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.bin [new file with mode: 0644]
model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.xml [new file with mode: 0644]
model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6_negative.xml [new file with mode: 0644]
model-optimizer/mo/utils/versions_checker.py
model-optimizer/mo/utils/versions_checker_test.py [new file with mode: 0644]
model-optimizer/requirements.txt
model-optimizer/requirements_caffe.txt
model-optimizer/requirements_dev.txt [new file with mode: 0644]
model-optimizer/requirements_kaldi.txt
model-optimizer/requirements_mxnet.txt
model-optimizer/requirements_onnx.txt
model-optimizer/requirements_tf.txt
tools/.gitignore
tools/accuracy_checker/.pylintrc [deleted file]
tools/accuracy_checker/README.md [deleted file]
tools/accuracy_checker/accuracy_checker/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/README.md [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/action_recognition.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/adapter.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/attributes_recognition.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/classification.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/detection.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/dummy_adapters.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/hit_ratio.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/image_processing.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/pose_estimation.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/reidentification.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/segmentation.py [deleted file]
tools/accuracy_checker/accuracy_checker/adapters/text_detection.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/README.md [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/_reid_common.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/bitvehicle.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/brats.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/cityscapes.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/convert.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/detection_opencv_storage.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/format_converter.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/icdar.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/imagenet.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/lfw.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/mapillary_20.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/market1501.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/mars.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/mighty.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/ms_coco.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/ncf_converter.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/sample_converter.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/super_resolution_converter.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/vgg_face_regression.py [deleted file]
tools/accuracy_checker/accuracy_checker/annotation_converters/wider.py [deleted file]
tools/accuracy_checker/accuracy_checker/config/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/config/config_reader.py [deleted file]
tools/accuracy_checker/accuracy_checker/config/config_validator.py [deleted file]
tools/accuracy_checker/accuracy_checker/data_readers/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/data_readers/data_reader.py [deleted file]
tools/accuracy_checker/accuracy_checker/dataset.py [deleted file]
tools/accuracy_checker/accuracy_checker/dependency.py [deleted file]
tools/accuracy_checker/accuracy_checker/evaluators/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/evaluators/model_evaluator.py [deleted file]
tools/accuracy_checker/accuracy_checker/evaluators/pipeline_evaluator.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/caffe_installation_readme.md [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher_readme.md [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher_readme.md [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/dummy_launcher.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/input_feeder.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/launcher.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/loaders/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/loaders/loader.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/loaders/pickle_loader.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/loaders/xml_loader.py [deleted file]
tools/accuracy_checker/accuracy_checker/launcher/model_conversion.py [deleted file]
tools/accuracy_checker/accuracy_checker/logging.py [deleted file]
tools/accuracy_checker/accuracy_checker/main.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/README.md [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/average_meter.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/character_recognition.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/classification.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/coco_metrics.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/detection.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/hit_ratio.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/metric.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/metric_executor.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/multilabel_recognition.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/overlap.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/regression.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/reid.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/semantic_segmentation.py [deleted file]
tools/accuracy_checker/accuracy_checker/metrics/text_detection.py [deleted file]
tools/accuracy_checker/accuracy_checker/pipeline_connectors/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/pipeline_connectors/connectors.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/README.md [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/cast_to_int.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/clip_boxes.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/clip_points.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/clip_segmentation_mask.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/correct_yolo_v2_boxes.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/crop_segmentation_mask.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/encode_segmentation_mask.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/extend_segmentation_mask.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/filter.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/nms.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/normalize_landmarks_points.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/postprocessing_executor.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/postprocessor.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/resize_prediction_boxes.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/resize_segmentation_mask.py [deleted file]
tools/accuracy_checker/accuracy_checker/postprocessor/zoom_segmentation_mask.py [deleted file]
tools/accuracy_checker/accuracy_checker/preprocessor/README.md [deleted file]
tools/accuracy_checker/accuracy_checker/preprocessor/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/preprocessor/preprocessing_executor.py [deleted file]
tools/accuracy_checker/accuracy_checker/preprocessor/preprocessors.py [deleted file]
tools/accuracy_checker/accuracy_checker/presenters.py [deleted file]
tools/accuracy_checker/accuracy_checker/progress_reporters.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/__init__.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/base_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/character_recognition_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/classification_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/detection_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/hit_ratio_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/multilabel_recognition.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/pose_estimation_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/regression_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/reid_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/representaton_container.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/segmentation_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/super_resolution_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/representation/text_detection_representation.py [deleted file]
tools/accuracy_checker/accuracy_checker/utils.py [deleted file]
tools/accuracy_checker/configs/face-detection-adas-0001.yml [deleted file]
tools/accuracy_checker/configs/face-detection-retail-0004.yml [deleted file]
tools/accuracy_checker/configs/face-reidentification-retail-0095.yml [deleted file]
tools/accuracy_checker/configs/human-pose-estimation-0001.yml [deleted file]
tools/accuracy_checker/configs/landmarks-regression-retail-0009.yml [deleted file]
tools/accuracy_checker/configs/person-reidentification-retail-0031.yml [deleted file]
tools/accuracy_checker/configs/person-reidentification-retail-0076.yml [deleted file]
tools/accuracy_checker/configs/person-reidentification-retail-0079.yml [deleted file]
tools/accuracy_checker/configs/resnet50-binary-0001.yml [deleted file]
tools/accuracy_checker/configs/text-detection-0002.yml [deleted file]
tools/accuracy_checker/configs/text-recognition-0012.yml [deleted file]
tools/accuracy_checker/data/test_data/1.jpg [deleted file]
tools/accuracy_checker/data/test_models/SampLeNet.bin [deleted file]
tools/accuracy_checker/data/test_models/SampLeNet.caffemodel [deleted file]
tools/accuracy_checker/data/test_models/SampLeNet.prototxt [deleted file]
tools/accuracy_checker/data/test_models/SampLeNet.xml [deleted file]
tools/accuracy_checker/pylint_checkers.py [deleted file]
tools/accuracy_checker/requirements.txt [deleted file]
tools/accuracy_checker/sample/README.md [deleted file]
tools/accuracy_checker/sample/sample_config.yml [deleted file]
tools/accuracy_checker/setup.cfg [deleted file]
tools/accuracy_checker/tests/__init__.py [deleted file]
tools/accuracy_checker/tests/common.py [deleted file]
tools/accuracy_checker/tests/conftest.py [deleted file]
tools/accuracy_checker/tests/test_adapters.py [deleted file]
tools/accuracy_checker/tests/test_caffe_launcher.py [deleted file]
tools/accuracy_checker/tests/test_config_reader.py [deleted file]
tools/accuracy_checker/tests/test_config_validator.py [deleted file]
tools/accuracy_checker/tests/test_dataset.py [deleted file]
tools/accuracy_checker/tests/test_dependency.py [deleted file]
tools/accuracy_checker/tests/test_detection_metrics.py [deleted file]
tools/accuracy_checker/tests/test_dlsdk_launcher.py [deleted file]
tools/accuracy_checker/tests/test_input_feeder.py [deleted file]
tools/accuracy_checker/tests/test_metric_evaluator.py [deleted file]
tools/accuracy_checker/tests/test_model_conversion.py [deleted file]
tools/accuracy_checker/tests/test_model_evaluator.py [deleted file]
tools/accuracy_checker/tests/test_postprocessor.py [deleted file]
tools/accuracy_checker/tests/test_preprocessor.py [deleted file]
tools/accuracy_checker/tests/test_presenter.py [deleted file]
tools/accuracy_checker/tests/test_regression_metrics.py [deleted file]
tools/accuracy_checker/tests/test_reid_metrics.py [deleted file]
tools/accuracy_checker/tests/test_segmentation_metrics.py [deleted file]
tools/accuracy_checker/tests/test_utils.py [deleted file]
tools/benchmark/README.md
tools/benchmark/__init__.py
tools/benchmark/__main__.py [deleted file]
tools/benchmark/benchmark.py
tools/benchmark/command_line_reader.py [deleted file]
tools/benchmark/configuration.py [deleted file]
tools/benchmark/logging.py [deleted file]
tools/benchmark/requirements.txt
tools/benchmark/utils/__init__.py [moved from inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/__init__.py with 100% similarity]
tools/benchmark/utils/constants.py [new file with mode: 0644]
tools/benchmark/utils/infer_request_wrap.py [new file with mode: 0644]
tools/benchmark/utils/inputs_filling.py [new file with mode: 0644]
tools/benchmark/utils/logging.py [moved from inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/logging.py with 100% similarity]
tools/benchmark/utils/progress_bar.py [moved from inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/progress_bar.py with 57% similarity]
tools/benchmark/utils/statistics_report.py [new file with mode: 0644]
tools/benchmark/utils/utils.py [new file with mode: 0644]
tools/calibration/aggregated_statistics.py
tools/calibration/base_calibrator.py
tools/calibration/benchmark_facade.py [new file with mode: 0644]
tools/calibration/calibration_configuration.py
tools/calibration/calibrator.py
tools/calibration/command_line_processor.py
tools/calibration/command_line_reader.py
tools/calibration/process_dataset_callbacks/calculate_accuracy_callback.py
tools/calibration/process_dataset_callbacks/collect_results_callback.py
tools/calibration/requirements.txt
tools/network.py
tools/utils/layer.py
tools/utils/network_info.py

index 8d1dd32..139578a 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # [OpenVINOâ„¢ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
-[![Stable release](https://img.shields.io/badge/version-2019.R2-green.svg)](https://github.com/opencv/dldt/releases/tag/2019_R2)
+[![Stable release](https://img.shields.io/badge/version-2019.R3-green.svg)](https://github.com/opencv/dldt/releases/tag/2019_R3)
 [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE)
 
 This toolkit allows developers to deploy pre-trained deep learning models through a high-level C++ Inference Engine API integrated with application logic. 
index 0d449c9..f41d9df 100644 (file)
@@ -34,6 +34,9 @@ message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR})
 message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID})
 message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
 
+# remove file with exported developer targets to force its regeneration
+file(REMOVE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
+
 add_subdirectory(src)
 
 if(ENABLE_TESTS)
index 4d79782..53b1783 100644 (file)
@@ -2,35 +2,79 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-#module to locate GNA libraries
+# module to locate GNA libraries
 
 if (WIN32)
     set(GNA_PLATFORM_DIR win64)
-    set(GNA_LIB_DIR x64)
-    set(GNA_LIB gna)
 elseif (UNIX)
     set(GNA_PLATFORM_DIR linux)
-    set(GNA_LIB_DIR lib)
-    set(GNA_LIB gna_api)
-    set(GNA_KERNEL_LIB gna_kernel)
 else ()
     message(FATAL_ERROR "GNA not supported on this platform, only linux, and windows")
 endif ()
 
-find_library(GNA_API_LIBRARY
-        ${GNA_LIB}
-        HINTS
-        ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR})
+set(libGNA_FOUND TRUE)
+
+set(GNA_KERNEL_LIB_NAME gna)
+set(GNA_LIBS_LIST
+        "libGNA::API"
+        "libGNA::KERNEL")
 
-set(libGNA_INCLUDE_DIRS ${GNA}/${GNA_PLATFORM_DIR}/include)
-set(libGNA_LIBRARY ${GNA_API_LIBRARY})
+if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
+    # use old version of GNA Library from gna_20181120
+    if (WIN32)
+        set(GNA_LIB_DIR x64)
+    else ()
+        list(APPEND GNA_LIBS_LIST
+                "libGNA::OLD_API_LIB")
+        set(GNA_LIB_DIR lib)
+        set(GNA_KERNEL_LIB_NAME gna_kernel)
+    endif()
+    set(libGNA_INCLUDE_DIRS "${GNA}/${GNA_PLATFORM_DIR}/include")
+else()
+    # use current version of GNA library
+    set(GNA_LIB_DIR x64)
+    set(libGNA_INCLUDE_DIRS "${GNA}/include")
+endif()
+set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR})
+
+add_library(libGNA::KERNEL SHARED IMPORTED)
+find_library(GNA_KERNEL_LIBRARY
+        ${GNA_KERNEL_LIB_NAME}
+        HINTS
+        ${libGNA_LIBRARIES_BASE_PATH})
+set_target_properties(libGNA::KERNEL PROPERTIES IMPORTED_LOCATION ${GNA_KERNEL_LIBRARY})
 
-if (UNIX)
-    #message("Searching for libgna_kernel.so in: ${GNA}/${GNA_PLATFORM_DIR}/${GNA_KERNEL_LIB}")
-    find_library(GNA_KERNEL_LIBRARY
-            ${GNA_KERNEL_LIB}
+if ((GNA_LIBRARY_VERSION STREQUAL "GNA1") AND (NOT WIN32))
+    add_library(libGNA::OLD_API_LIB SHARED IMPORTED)
+    find_library(GNA_API_LIBRARY
+            gna_api
             HINTS
-            ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR})
-endif ()
+            ${libGNA_LIBRARIES_BASE_PATH})
+    set_target_properties(libGNA::OLD_API_LIB PROPERTIES IMPORTED_LOCATION ${GNA_API_LIBRARY})
+    target_link_libraries(libGNA::OLD_API_LIB INTERFACE libGNA::KERNEL)
+    set_target_properties(libGNA::OLD_API_LIB PROPERTIES IMPORTED_NO_SONAME TRUE)
+    set_target_properties(libGNA::KERNEL PROPERTIES IMPORTED_NO_SONAME TRUE)
+endif()
+
+add_library(libGNA::API INTERFACE IMPORTED)
+set_property(TARGET libGNA::API PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${libGNA_INCLUDE_DIRS})
+
+add_library(libGNA INTERFACE IMPORTED)
+foreach(_lib_name ${GNA_LIBS_LIST})
+    set_property(TARGET libGNA APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${_lib_name})
 
-set(libGNA_LIBRARIES ${libGNA_LIBRARY} ${GNA_KERNEL_LIBRARY})
+    get_target_property(_target_type ${_lib_name} TYPE)
+    if (${_target_type} STREQUAL "INTERFACE_LIBRARY")
+        get_target_property(_target_location ${_lib_name} INTERFACE_INCLUDE_DIRECTORIES)
+    else()
+        get_target_property(_target_location ${_lib_name} IMPORTED_LOCATION)
+    endif ()
+    message(STATUS "${_lib_name} ${_target_type} : ${_target_location}")
+endforeach(_lib_name)
+
+if (WIN32)
+    set_target_properties(libGNA::KERNEL PROPERTIES
+        IMPORTED_IMPLIB ${GNA_KERNEL_LIBRARY})
+elseif(NOT GNA_LIBRARY_VERSION STREQUAL "GNA1")
+    set_target_properties(libGNA PROPERTIES INTERFACE_LINK_OPTIONS "-Wl,-rpath-link,${libGNA_LIBRARIES_BASE_PATH}")
+endif ()
index 71c9007..3e89b16 100644 (file)
@@ -24,8 +24,6 @@ endif()
 if (APPLE)
     set(ENABLE_GNA OFF)
     set(ENABLE_CLDNN OFF)
-    SET(ENABLE_MYRIAD OFF)
-    SET(ENABLE_VPU OFF)
 endif()
 
 
@@ -66,18 +64,39 @@ if (ENABLE_MKL_DNN)
     add_definitions(-DENABLE_MKL_DNN=1)
 endif()
 
-if (ENABLE_UNICODE_PATH_SUPPORT)
-    add_definitions(-DENABLE_UNICODE_PATH_SUPPORT=1)
-endif()
-
 if (ENABLE_GNA)
     add_definitions(-DENABLE_GNA)
+
+    set (DEFAULT_GNA_LIB GNA1_1401)
+
+    # "GNA library version: GNA1|GNA1_1401|GNA2" - default is 1401
+    if (NOT GNA_LIBRARY_VERSION STREQUAL "GNA1"
+            AND NOT GNA_LIBRARY_VERSION STREQUAL "GNA1_1401"
+            AND NOT GNA_LIBRARY_VERSION STREQUAL "GNA2")
+        set (GNA_LIBRARY_VERSION ${DEFAULT_GNA_LIB})
+        message(STATUS "GNA_LIBRARY_VERSION not set. Can be GNA1, GNA1_1401 or GNA2. Default is ${GNA_LIBRARY_VERSION}")
+    endif()
+
+    if (GNA_LIBRARY_VERSION STREQUAL "GNA2")
+        message(WARNING "GNA2 is not currently supported. Fallback to ${DEFAULT_GNA_LIB}")
+        set(GNA_LIBRARY_VERSION ${DEFAULT_GNA_LIB})
+    endif()
+
+    if (UNIX AND NOT APPLE AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4)
+        message(WARNING "${GNA_LIBRARY_VERSION} no supported on GCC version ${CMAKE_CXX_COMPILER_VERSION}. Fallback to GNA1")
+        set(GNA_LIBRARY_VERSION GNA1)
+    endif()
+
+    set(GNA_LIBRARY_VERSION "${GNA_LIBRARY_VERSION}" CACHE STRING "GNAVersion" FORCE)
+    list (APPEND IE_OPTIONS GNA_LIBRARY_VERSION)
 endif()
 
 if (ENABLE_SAMPLES)
     set (ENABLE_SAMPLES_CORE ON)
 endif()
 
+#models dependend tests
+
 if (DEVELOPMENT_PLUGIN_MODE)
     message (STATUS "Enabled development plugin mode")
 
@@ -93,8 +112,18 @@ if (DEVELOPMENT_PLUGIN_MODE)
     endif()
 endif()
 
+if (NOT ENABLE_TESTS)
+    set(ENABLE_GNA_MODELS OFF)
+endif ()
+
 if (VERBOSE_BUILD)
     set(CMAKE_VERBOSE_MAKEFILE  ON)
 endif()
 
+
+if(ENABLE_DUMP)
+    add_definitions(-DDEBUG_DUMP)
+endif()
+
+
 print_enabled_features()
index d9a6918..ebe82ee 100644 (file)
@@ -7,6 +7,9 @@ if(DEFINED IE_MAIN_SOURCE_DIR AND TARGET inference_engine)
     set(InferenceEngine_LIBRARIES inference_engine)
 else()
     include("${CMAKE_CURRENT_LIST_DIR}/targets.cmake")
+    if(NOT WIN32)
+        set_target_properties(IE::inference_engine PROPERTIES INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
+    endif()
     get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES)
     set(InferenceEngine_LIBRARIES IE::inference_engine)
 endif()
index 00a5b8e..682f2e5 100644 (file)
@@ -11,46 +11,26 @@ set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}")
 
 include(ExternalProject)
 
-if (ENABLE_SAME_BRANCH_FOR_MODELS)
-    branchName(MODELS_BRANCH)
-else()
-    set(MODELS_BRANCH "master")
-endif()
-
 include(linux_name)
 if(COMMAND get_linux_name)
     get_linux_name(LINUX_OS_NAME)
 endif()
 
 if (ENABLE_MYRIAD)
-    RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2450
-            ARCHIVE_UNIFIED firmware_ma2450_676.zip
-            TARGET_PATH "${TEMP}/vpu/firmware/ma2450"
-            ENVIRONMENT "VPU_FIRMWARE_MA2450"
-            FOLDER)
-    debug_message(STATUS "ma2450=" ${VPU_FIRMWARE_MA2450})
-endif ()
-
-if (ENABLE_MYRIAD)
-    RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2X8X
-            ARCHIVE_UNIFIED firmware_ma2x8x_mdk_R8_9.zip
-            TARGET_PATH "${TEMP}/vpu/firmware/ma2x8x"
-            ENVIRONMENT "VPU_FIRMWARE_MA2X8X"
-            FOLDER)
-    debug_message(STATUS "ma2x8x=" ${VPU_FIRMWARE_MA2X8X})
-endif ()
+    include(vpu_dependencies)
+endif()
 
 ## enable cblas_gemm from OpenBLAS package
 if (GEMM STREQUAL "OPENBLAS")
-if(NOT BLAS_LIBRARIES OR NOT BLAS_INCLUDE_DIRS)
-    find_package(BLAS REQUIRED)
-    if(BLAS_FOUND)
-        find_path(BLAS_INCLUDE_DIRS cblas.h)
-    else()
-        message(ERROR "OpenBLAS not found: install OpenBLAS or set -DBLAS_INCLUDE_DIRS=<path to dir with cblas.h> and -DBLAS_LIBRARIES=<path to libopenblas.so or openblas.lib>")
+    if(NOT BLAS_LIBRARIES OR NOT BLAS_INCLUDE_DIRS)
+        find_package(BLAS REQUIRED)
+        if(BLAS_FOUND)
+            find_path(BLAS_INCLUDE_DIRS cblas.h)
+        else()
+            message(ERROR "OpenBLAS not found: install OpenBLAS or set -DBLAS_INCLUDE_DIRS=<path to dir with cblas.h> and -DBLAS_LIBRARIES=<path to libopenblas.so or openblas.lib>")
+        endif()
     endif()
-endif()
-debug_message(STATUS "openblas=" ${BLAS_LIBRARIES})
+    debug_message(STATUS "openblas=" ${BLAS_LIBRARIES})
 endif ()
 
 #MKL-ml package
@@ -64,111 +44,116 @@ endif ()
 
 ## Intel OMP package
 if (THREADING STREQUAL "OMP")
-if (WIN32)
-    RESOLVE_DEPENDENCY(OMP
-            ARCHIVE_WIN "iomp.zip"
-            TARGET_PATH "${TEMP}/omp"
-            ENVIRONMENT "OMP"
-            VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-elseif(LINUX)
-    RESOLVE_DEPENDENCY(OMP
-            ARCHIVE_LIN "iomp.tgz"
-            TARGET_PATH "${TEMP}/omp"
-            ENVIRONMENT "OMP"
-            VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-else(APPLE)
-    RESOLVE_DEPENDENCY(OMP
-            ARCHIVE_MAC "iomp_20190130_mac.tgz"
-            TARGET_PATH "${TEMP}/omp"
-            ENVIRONMENT "OMP"
-            VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-endif()
-log_rpath_from_dir(OMP "${OMP}/lib")
-debug_message(STATUS "intel_omp=" ${OMP})
+    if (WIN32)
+        RESOLVE_DEPENDENCY(OMP
+                ARCHIVE_WIN "iomp.zip"
+                TARGET_PATH "${TEMP}/omp"
+                ENVIRONMENT "OMP"
+                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+    elseif(LINUX)
+        RESOLVE_DEPENDENCY(OMP
+                ARCHIVE_LIN "iomp.tgz"
+                TARGET_PATH "${TEMP}/omp"
+                ENVIRONMENT "OMP"
+                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+    else(APPLE)
+        RESOLVE_DEPENDENCY(OMP
+                ARCHIVE_MAC "iomp_20190130_mac.tgz"
+                TARGET_PATH "${TEMP}/omp"
+                ENVIRONMENT "OMP"
+                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+    endif()
+    log_rpath_from_dir(OMP "${OMP}/lib")
+    debug_message(STATUS "intel_omp=" ${OMP})
 endif ()
 
 ## TBB package
 if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
-if (WIN32)
-    #TODO: add target_path to be platform specific as well, to avoid following if
-    RESOLVE_DEPENDENCY(TBB
-            ARCHIVE_WIN "tbb2019_20181010_win.zip" #TODO: windows zip archive created incorrectly using old name for folder
-            TARGET_PATH "${TEMP}/tbb"
-            ENVIRONMENT "TBBROOT"
-            VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-elseif(LINUX)
-    RESOLVE_DEPENDENCY(TBB
-            ARCHIVE_LIN "tbb2019_20181010_lin.tgz"
-            TARGET_PATH "${TEMP}/tbb"
-            ENVIRONMENT "TBBROOT")
-else(APPLE)
-    RESOLVE_DEPENDENCY(TBB
-            ARCHIVE_MAC "tbb2019_20190414_mac.tgz"
-            TARGET_PATH "${TEMP}/tbb"
-            ENVIRONMENT "TBBROOT"
-            VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-endif()
-log_rpath_from_dir(TBB "${TBB}/lib")
-debug_message(STATUS "tbb=" ${TBB})
+    if (WIN32)
+        #TODO: add target_path to be platform specific as well, to avoid following if
+        RESOLVE_DEPENDENCY(TBB
+                ARCHIVE_WIN "tbb2019_20181010_win.zip" #TODO: windows zip archive created incorrectly using old name for folder
+                TARGET_PATH "${TEMP}/tbb"
+                ENVIRONMENT "TBBROOT"
+                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+    elseif(LINUX)
+        RESOLVE_DEPENDENCY(TBB
+                ARCHIVE_LIN "tbb2019_20181010_lin.tgz"
+                TARGET_PATH "${TEMP}/tbb"
+                ENVIRONMENT "TBBROOT")
+    else(APPLE)
+        RESOLVE_DEPENDENCY(TBB
+                ARCHIVE_MAC "tbb2019_20190414_v1_mac.tgz"
+                TARGET_PATH "${TEMP}/tbb"
+                ENVIRONMENT "TBBROOT"
+                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+    endif()
+    log_rpath_from_dir(TBB "${TBB}/lib")
+    debug_message(STATUS "tbb=" ${TBB})
 endif ()
 
 if (ENABLE_OPENCV)
-  set(OPENCV_VERSION "4.1.1")
-  set(OPENCV_BUILD "595")
-  set(OPENCV_SUFFIX "")
-if (WIN32)
-    RESOLVE_DEPENDENCY(OPENCV
-            ARCHIVE_WIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}.zip"
-            TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}"
-            ENVIRONMENT "OpenCV_DIR"
-            VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
-    log_rpath_from_dir(OPENCV "\\opencv_${OPENCV_VERSION}\\bin")
-    set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
-elseif(APPLE)
-    RESOLVE_DEPENDENCY(OPENCV
-            ARCHIVE_MAC "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_osx.tar.xz"
-            TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_osx"
-            ENVIRONMENT "OpenCV_DIR"
-            VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
-    log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_osx/lib")
-    set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
-elseif(LINUX)
-    if (${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04")
-        set(OPENCV_SUFFIX "ubuntu16")
-    elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 18.04")
-        set(OPENCV_SUFFIX "ubuntu18")
-    elseif (${LINUX_OS_NAME} STREQUAL "CentOS 7")
-        set(OPENCV_SUFFIX "centos7")
-    elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l" AND
-            (${LINUX_OS_NAME} STREQUAL "Debian 9" OR
-             ${LINUX_OS_NAME} STREQUAL "Raspbian 9" OR
-             ${LINUX_OS_NAME} STREQUAL "Debian 10" OR
-             ${LINUX_OS_NAME} STREQUAL "Raspbian 10"))
-        set(OPENCV_SUFFIX "debian9arm")
+    set(OPENCV_VERSION "4.1.2")
+    set(OPENCV_BUILD "624")
+    set(OPENCV_SUFFIX "")
+    if (WIN32)
+        RESOLVE_DEPENDENCY(OPENCV
+                ARCHIVE_WIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}.zip"
+                TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}"
+                ENVIRONMENT "OpenCV_DIR"
+                VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
+        log_rpath_from_dir(OPENCV "\\opencv_${OPENCV_VERSION}\\bin")
+    elseif(APPLE)
+        RESOLVE_DEPENDENCY(OPENCV
+                ARCHIVE_MAC "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_osx.tar.xz"
+                TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_osx"
+                ENVIRONMENT "OpenCV_DIR"
+                VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
+        log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_osx/lib")
+    elseif(LINUX)
+        if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l")
+            set(OPENCV_SUFFIX "debian9arm")
+        elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04")
+            set(OPENCV_SUFFIX "ubuntu16")
+        elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 18.04")
+            set(OPENCV_SUFFIX "ubuntu18")
+        elseif (${LINUX_OS_NAME} STREQUAL "CentOS 7")
+            set(OPENCV_SUFFIX "centos7")
+        endif()
     endif()
-endif()
-
-if (OPENCV_SUFFIX)
-    RESOLVE_DEPENDENCY(OPENCV
-            ARCHIVE_LIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_${OPENCV_SUFFIX}.tar.xz"
-            TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}"
-            ENVIRONMENT "OpenCV_DIR"
-            VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
-    log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/lib")
-    set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
-endif()
 
-debug_message(STATUS "opencv=" ${OPENCV})
-set(OpenCV_DIR "${OPENCV}" CACHE PATH "Path to OpenCV in temp directory")
+    if (OPENCV_SUFFIX)
+        RESOLVE_DEPENDENCY(OPENCV
+                ARCHIVE_LIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_${OPENCV_SUFFIX}.tar.xz"
+                TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}"
+                ENVIRONMENT "OpenCV_DIR"
+                VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
+        log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/lib")
+    endif()
+    debug_message(STATUS "opencv=" ${OPENCV})
+    # OpenCV_DIR should point to cmake folder within the specified OpenCV binary package.
+    # It's required to successsfully find OpenCV libs using find_package(OpenCV ...) command.
+    # So, the cached OpenCV_DIR variable should be update if custom value wasn't previously set here.
+    if (NOT DEFINED ENV{OpenCV_DIR})
+        set(OpenCV_DIR "${OPENCV}/cmake" CACHE PATH "Path to OpenCV in temp directory")
+    endif()
 endif()
 
-
 include(ie_parallel)
 
 if (ENABLE_GNA)
-    RESOLVE_DEPENDENCY(GNA
-            ARCHIVE_UNIFIED "gna_20181120.zip"
-            TARGET_PATH "${TEMP}/gna")
+    if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
+        RESOLVE_DEPENDENCY(GNA
+                ARCHIVE_UNIFIED "gna_20181120.zip"
+                TARGET_PATH "${TEMP}/gna")
+    elseif(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
+        set(GNA_VERSION "01.00.00.1401")
+        RESOLVE_DEPENDENCY(GNA
+                ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
+                TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
+                VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
+    endif()
+    debug_message(STATUS "gna=" ${GNA})
 endif()
 
 configure_file(
index 52e0fef..a27143d 100644 (file)
@@ -6,7 +6,7 @@
 include(debug)
 
 if (UNIX AND NOT APPLE)
-    set(LINUX TRUE)
+    set(LINUX ON)
 endif()
 
 string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
@@ -68,16 +68,14 @@ set(CMAKE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX})
 if (WIN32)
     # Support CMake multiconfiguration for Visual Studio build
     set(IE_BUILD_POSTFIX $<$<CONFIG:Debug>:${IE_DEBUG_POSTFIX}>$<$<CONFIG:Release>:${IE_RELEASE_POSTFIX}>)
-    set(IE_BUILD_CONFIGURATION $<CONFIG>)
 else ()
     if (${CMAKE_BUILD_TYPE} STREQUAL "Debug" )
         set(IE_BUILD_POSTFIX ${IE_DEBUG_POSTFIX})
     else()
         set(IE_BUILD_POSTFIX ${IE_RELEASE_POSTFIX})
     endif()
-    set(IE_BUILD_CONFIGURATION ${CMAKE_BUILD_TYPE})
 endif()
-message(STATUS "BUILD_CONFIGURATION: ${IE_BUILD_CONFIGURATION}")
+message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
 
 add_definitions(-DIE_BUILD_POSTFIX=\"${IE_BUILD_POSTFIX}\")
 
@@ -95,12 +93,12 @@ if(NOT UNIX)
     set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER})
     set(LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}) # compatibility issue: linux uses LIBRARY_OUTPUT_PATH, windows uses LIBRARY_OUTPUT_DIRECTORY
 else()
-    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
-    set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
-    set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION})
-    set(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION})
-    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION})
-    set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
+    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/lib)
+    set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/lib)
+    set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE})
+    set(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE})
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE})
+    set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/lib)
     set(LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}/lib)
 endif()
 
index bd837be..930a640 100644 (file)
@@ -145,7 +145,7 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked
   if(DEFINED ENV{IE_PATH_TO_DEPS})
     set(URL "$ENV{IE_PATH_TO_DEPS}/${RELATIVE_URL}")
   else()
-    set(URL "https://download.01.org/opencv/2019/openvinotoolkit/R2/inference_engine/${RELATIVE_URL}")
+    set(URL "https://download.01.org/opencv/2019/openvinotoolkit/R3/inference_engine/${RELATIVE_URL}")
   endif()
 
   #no message on recursive calls
index 9fa537d..2d7c827 100644 (file)
@@ -4,15 +4,20 @@
 
 include (options)
 
-#this options are aimed to optimize build time on development system
+#these options are aimed to optimize build time on development system
 
 #backed targets
 ie_option (ENABLE_GNA "GNA support for inference engine" ON)
+ie_option (ENABLE_ROCKHOPER "use Rockhopper decoder for converting / output scores" ON)
 
 ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON)
 
 ie_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON)
 
+ie_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF)
+
+ie_option (ENABLE_CLDNN_BUILD "build clDnn from sources" OFF)
+
 ie_option (ENABLE_PROFILING_ITT "ITT tracing of IE and plugins internals" ON)
 
 ie_option (ENABLE_PROFILING_RAW "Raw counters profiling (just values, no start/stop time or timeline)" OFF)
@@ -90,8 +95,18 @@ ie_option (DEVELOPMENT_PLUGIN_MODE "Disabled build of all plugins" OFF)
 
 ie_option (TREAT_WARNING_AS_ERROR "Treat build warnings as errors" ON)
 
+ie_option (ENABLE_CPP_CCT "enables C++ version of Cross Check Tool" OFF)
+
 ie_option (ENABLE_UNICODE_PATH_SUPPORT "Enable loading models from Unicode paths" ON)
 
+ie_option (ENABLE_LTO "Enable Link Time Optimization" OFF)
+
+# FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but
+#        this must be addressed in a proper way
+if(CMAKE_CROSSCOMPILING OR NOT (UNIX AND NOT APPLE))
+    set(ENABLE_LTO OFF)
+endif()
+
 if (UNIX AND NOT APPLE AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.3)
     set(ENABLE_UNICODE_PATH_SUPPORT OFF)
 endif()
index 8265701..97e8c5e 100644 (file)
@@ -6,57 +6,77 @@ function(set_ie_threading_interface_for TARGET_NAME)
     set(IE_THREAD_DEFINE "IE_THREAD_SEQ")
 
     if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
-        if (NOT (IE_MAIN_SOURCE_DIR))
-            set(incl_path ${IE_EXTERNAL_DIR}/tbb/include)
-            if (WIN32)
-                set(lib_rel_path ${IE_LIB_REL_DIR})
-                set(lib_dbg_path ${IE_LIB_DBG_DIR})
+        if (DEFINED ENV{TBBROOT})
+            # Check TBB package in case if custom TBBROOT path configured
+            find_package(TBB QUIET PATHS "$ENV{TBBROOT}/cmake")
+            if (TBB_FOUND)
+                set(IE_THREAD_DEFINE "IE_THREAD_TBB")
+                if (WIN32)
+                    target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp")
+                endif ()
+                target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_IMPORTED_TARGETS})
+            else ()
+                # TBB was not found by the configured TBBROOT path, SEQ method will be used
+                ext_message(WARNING "TBB not found by the configured TBBROOT path $ENV{TBBROOT}")
+            endif ()
+        else()
+            if (NOT (IE_MAIN_SOURCE_DIR))
+                set(incl_path ${IE_EXTERNAL_DIR}/tbb/include)
+                if (WIN32)
+                    set(lib_rel_path ${IE_LIB_REL_DIR})
+                    set(lib_dbg_path ${IE_LIB_DBG_DIR})
+                else ()
+                    set(lib_rel_path ${IE_EXTERNAL_DIR}/tbb/lib)
+                    set(lib_dbg_path ${lib_rel_path})
+                endif ()
             else ()
-                set(lib_rel_path ${IE_EXTERNAL_DIR}/tbb/lib)
+                set(incl_path ${TBB}/include)
+                set(lib_rel_path ${TBB}/lib)
                 set(lib_dbg_path ${lib_rel_path})
             endif ()
-        else ()
-            set(incl_path ${TBB}/include)
-            set(lib_rel_path ${TBB}/lib)
-            set(lib_dbg_path ${lib_rel_path})
-        endif ()
 
-        if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
-            find_path(TBB_INCLUDE_DIRS tbb/tbb.h ${incl_path} NO_DEFAULT_PATH)
-            find_library(TBB_LIBRARIES_RELEASE tbb ${lib_rel_path} NO_DEFAULT_PATH)
-            find_library(TBB_LIBRARIES_DEBUG tbb_debug ${lib_dbg_path} NO_DEFAULT_PATH)
-            ext_message(STATUS "TBB include: ${TBB_INCLUDE_DIRS}")
-            ext_message(STATUS "TBB Release lib: ${TBB_LIBRARIES_RELEASE}")
-            ext_message(STATUS "TBB Debug lib: ${TBB_LIBRARIES_DEBUG}")
-        endif ()
-
-        if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
-            ext_message(WARNING "TBB not found. TBB support will be disabled. ${IE_THREAD_DEFINE} is defined")
-        else ()
-            set(IE_THREAD_DEFINE "IE_THREAD_TBB")
-
-            target_include_directories(${TARGET_NAME} PUBLIC ${TBB_INCLUDE_DIRS})
-            if (WIN32)
-                target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp")
+            if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
+                find_path(TBB_INCLUDE_DIRS tbb/tbb.h ${incl_path} NO_DEFAULT_PATH)
+                find_library(TBB_LIBRARIES_RELEASE tbb ${lib_rel_path} NO_DEFAULT_PATH)
+                ext_message(STATUS "TBB include: ${TBB_INCLUDE_DIRS}")
+                ext_message(STATUS "TBB Release lib: ${TBB_LIBRARIES_RELEASE}")
+                if (NOT LINUX)
+                    find_library(TBB_LIBRARIES_DEBUG tbb_debug ${lib_dbg_path} NO_DEFAULT_PATH)
+                    if (TBB_LIBRARIES_DEBUG)
+                        ext_message(STATUS "TBB Debug lib: ${TBB_LIBRARIES_DEBUG}")
+                    else ()
+                        ext_message(WARNING "TBB Debug binaries are missed.")
+                    endif ()
+                endif ()
             endif ()
 
-            # Debug binaries are optional.
-            if (TBB_LIBRARIES_DEBUG)
+            if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
+                ext_message(WARNING "TBB not found. TBB support will be disabled. ${IE_THREAD_DEFINE} is defined")
+            else ()
+                set(IE_THREAD_DEFINE "IE_THREAD_TBB")
+
+                target_include_directories(${TARGET_NAME} PUBLIC ${TBB_INCLUDE_DIRS})
                 if (WIN32)
-                    target_link_libraries(${TARGET_NAME} PUBLIC "$<$<CONFIG:DEBUG>:${TBB_LIBRARIES_DEBUG}>;$<$<NOT:$<CONFIG:DEBUG>>:${TBB_LIBRARIES_RELEASE}>")
-                else ()
-                    if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
-                        target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_DEBUG})
-                    else()
-                        target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
+                    target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp")
+                endif ()
+
+                # Debug binaries are optional.
+                if (TBB_LIBRARIES_DEBUG AND NOT LINUX)
+                    if (WIN32)
+                        target_link_libraries(${TARGET_NAME} PUBLIC "$<$<CONFIG:DEBUG>:${TBB_LIBRARIES_DEBUG}>;$<$<NOT:$<CONFIG:DEBUG>>:${TBB_LIBRARIES_RELEASE}>")
+                    else ()
+                        if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+                            target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_DEBUG})
+                        else()
+                            target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
+                        endif ()
                     endif ()
+                else ()
+                    # Link Release library to all configurations.
+                    target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
                 endif ()
-            else ()
-                # Link Release library to all configurations.
-                ext_message(WARNING "TBB Debug binaries are missed.")
-                target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
             endif ()
-        endif ()
+        endif()
     elseif (THREADING STREQUAL "OMP")
         if (WIN32)
             set(omp_lib_name libiomp5md)
@@ -79,9 +99,15 @@ function(set_ie_threading_interface_for TARGET_NAME)
 
         if (NOT OMP_LIBRARIES_RELEASE)
             find_library(OMP_LIBRARIES_RELEASE ${omp_lib_name} ${lib_rel_path} NO_DEFAULT_PATH)
-            find_library(OMP_LIBRARIES_DEBUG ${omp_lib_name} ${lib_dbg_path} NO_DEFAULT_PATH)
             ext_message(STATUS "OMP Release lib: ${OMP_LIBRARIES_RELEASE}")
-            ext_message(STATUS "OMP Debug lib: ${OMP_LIBRARIES_DEBUG}")
+            if (NOT LINUX)
+                find_library(OMP_LIBRARIES_DEBUG ${omp_lib_name} ${lib_dbg_path} NO_DEFAULT_PATH)
+                if (OMP_LIBRARIES_DEBUG)
+                    ext_message(STATUS "OMP Debug lib: ${OMP_LIBRARIES_DEBUG}")
+                else ()
+                    ext_message(WARNING "OMP Debug binaries are missed.")
+                endif ()
+            endif ()
         endif ()
 
         if (NOT OMP_LIBRARIES_RELEASE)
@@ -98,7 +124,7 @@ function(set_ie_threading_interface_for TARGET_NAME)
             endif ()
 
             # Debug binaries are optional.
-            if (OMP_LIBRARIES_DEBUG)
+            if (OMP_LIBRARIES_DEBUG AND NOT LINUX)
                 if (WIN32)
                     target_link_libraries(${TARGET_NAME} PUBLIC "$<$<CONFIG:DEBUG>:${OMP_LIBRARIES_DEBUG}>;$<$<NOT:$<CONFIG:DEBUG>>:${OMP_LIBRARIES_RELEASE}>")
                 else()
@@ -110,7 +136,6 @@ function(set_ie_threading_interface_for TARGET_NAME)
                 endif ()
             else ()
                 # Link Release library to all configurations.
-                ext_message(WARNING "OMP Debug binaries are missed.")
                 target_link_libraries(${TARGET_NAME} PUBLIC ${OMP_LIBRARIES_RELEASE})
             endif ()
         endif ()
index 6a5442f..ad15859 100644 (file)
@@ -4,9 +4,9 @@
 
 macro(disable_deprecated_warnings)
     if(WIN32)
-        if("${CMAKE_CXX_COMPILER_ID}" MATCHES Intel)
+        if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-warning:1478")
-        elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL MSVC)
+        elseif(CMAKE_CXX_COMPILER_ID STREQUAL MSVC)
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") # disable warning on deprecated API
         endif()
     else()
@@ -29,7 +29,6 @@ if (WIN32)
         endif()
     endif()
 
-
     set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Z7")
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
 
@@ -38,7 +37,7 @@ if (WIN32)
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Z7")
 
         set(DEBUG_SYMBOLS_LINKER_FLAGS "/DEBUG")
-        if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
+        if (CMAKE_BUILD_TYPE STREQUAL "Release")
             # Keep default /OPT values. See /DEBUG reference for details.
             set(DEBUG_SYMBOLS_LINKER_FLAGS "${DEBUG_SYMBOLS_LINKER_FLAGS} /OPT:REF /OPT:ICF")
         endif()
@@ -51,12 +50,28 @@ else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Werror=return-type ")
     if (APPLE)
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reorder")       
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wswitch")    
     elseif(UNIX)
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self")
-        if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-switch")
         else()
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized")
         endif()
     endif()
+
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable=remark")
+    endif()
+
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden")
+
+    if(LINUX)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
+        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL")
+    endif()
 endif()
index ee57890..b1a355f 100644 (file)
@@ -2,12 +2,16 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
+if (UNIX OR APPLE AND CMAKE_BUILD_TYPE STREQUAL "Release")
     set(CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS} -fPIE -fPIC -Wformat -Wformat-security")
+    # TODO: double check it it's OK
+    if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+        string(REPLACE "-fPIE" "" CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS}")
+    endif()
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
     set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie")
-    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
         set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack -z relro -z now")
         set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack -z relro -z now")
         if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
@@ -17,12 +21,12 @@ if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
         endif()
         set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s -fvisibility=hidden")
         set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s -fvisibility=hidden")
-    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
         set(CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS} -fstack-protector-all")
         set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fvisibility=hidden")
         set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvisibility=hidden")
-    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector")
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong")
         set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack -z relro -z now")
         set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack -z relro -z now")
         set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wl,--strip-all -fvisibility=hidden")
@@ -32,7 +36,7 @@ if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_CCXX_FLAGS}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CCXX_FLAGS}")
 elseif (WIN32)
-    if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+    if (CMAKE_CXX_COMPILER_ID STREQUAL MSVC)
         set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MP /sdl")
     endif()
 endif()
index c7911a6..bdfa5f3 100644 (file)
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-set(InferenceEngine_VERSION 2.0.0)
+set(InferenceEngine_VERSION 2.1.0)
 set(PACKAGE_VERSION ${InferenceEngine_VERSION})
 
 set(PACKAGE_VERSION_EXACT False)
index 3bbb0cf..0d49227 100644 (file)
@@ -121,7 +121,8 @@ else()
         elseif (APPLE)
             set_target_properties(IE::inference_engine PROPERTIES
                     IMPORTED_LOCATION_RELEASE "${IE_RELEASE_LIBRARY}"
-                    INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
+                    INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}"
+                    INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
 
             # Debug binaries are optional
             find_library(IE_DEBUG_LIBRARY inference_engine@IE_DEBUG_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
@@ -137,7 +138,8 @@ else()
             # Only Release binaries are distributed for Linux systems
             set_target_properties(IE::inference_engine PROPERTIES
                     IMPORTED_LOCATION "${IE_RELEASE_LIBRARY}"
-                    INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
+                    INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}"
+                    INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
             target_link_libraries(IE::inference_engine INTERFACE ${CMAKE_DL_LIBS})
         endif()
 
diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake
new file mode 100644 (file)
index 0000000..1550163
--- /dev/null
@@ -0,0 +1,68 @@
+# Copyright (C) 2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(VPU_SUPPORTED_SOC ma2450 ma2x8x mv0262)
+
+#
+# Default firmware packages
+#
+
+RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2450
+    ARCHIVE_UNIFIED firmware_ma2450_759W.zip
+    TARGET_PATH "${TEMP}/vpu/firmware/ma2450"
+    ENVIRONMENT "VPU_FIRMWARE_MA2450"
+    FOLDER)
+debug_message(STATUS "ma2450=" ${VPU_FIRMWARE_MA2450})
+
+RESOLVE_DEPENDENCY(VPU_FIRMWARE_MV0262
+    ARCHIVE_UNIFIED firmware_mv0262_mdk_R9.8.zip
+    TARGET_PATH "${TEMP}/vpu/firmware/mv0262"
+    ENVIRONMENT "VPU_FIRMWARE_MV0262"
+    FOLDER)
+debug_message(STATUS "mv0262=" ${VPU_FIRMWARE_MV0262})
+
+RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2X8X
+    ARCHIVE_UNIFIED firmware_ma2x8x_mdk_R9.8.zip
+    TARGET_PATH "${TEMP}/vpu/firmware/ma2x8x"
+    ENVIRONMENT "VPU_FIRMWARE_MA2X8X"
+    FOLDER)
+debug_message(STATUS "ma2x8x=" ${VPU_FIRMWARE_MA2X8X})
+
+#
+# CMake variables to override default firmware files
+#
+
+foreach(soc IN LISTS VPU_SUPPORTED_SOC)
+    string(TOUPPER "${soc}" soc_upper)
+    set(var_name VPU_FIRMWARE_${soc_upper}_FILE)
+
+    find_file(${var_name} MvNCAPI-${soc}.mvcmd "${VPU_FIRMWARE_${soc_upper}}/mvnc")
+    if(NOT ${var_name})
+        message(FATAL_ERROR "[VPU] Missing ${soc} firmware")
+    endif()
+endforeach()
+
+#
+# `vpu_copy_firmware` CMake target
+#
+
+foreach(soc IN LISTS VPU_SUPPORTED_SOC)
+    string(TOUPPER "${soc}" soc_upper)
+    set(var_name VPU_FIRMWARE_${soc_upper}_FILE)
+
+    set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/MvNCAPI-${soc}.mvcmd")
+    list(APPEND all_firmware_files ${firmware_out_file})
+
+    add_custom_command(
+        OUTPUT ${firmware_out_file}
+        COMMAND
+            ${CMAKE_COMMAND} -E copy ${${var_name}} ${firmware_out_file}
+        MAIN_DEPENDENCY ${${var_name}}
+        COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}"
+        VERBATIM)
+endforeach()
+
+add_custom_target(vpu_copy_firmware
+    DEPENDS ${all_firmware_files}
+    COMMENT "[VPU] Copy firmware files")
index 9fca214..bba853f 100644 (file)
@@ -13,7 +13,7 @@ elseif(ARCH STREQUAL "i386")
 endif()
 
 # in case of independent python api build (out of Inference Engine root Cmake)
-if (NOT(IE_MAIN_SOURCE_DIR))
+if (NOT DEFINED IE_MAIN_SOURCE_DIR)
     if("${CMAKE_BUILD_TYPE}" STREQUAL "")
         message(STATUS "CMAKE_BUILD_TYPE not defined, 'Release' will be used")
         set(CMAKE_BUILD_TYPE "Release")
@@ -45,7 +45,11 @@ else()
     set (PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/${PYTHON_VERSION}/openvino)
 endif()
 
-find_package (InferenceEngine REQUIRED)
+if(DEFINED IE_MAIN_SOURCE_DIR)
+    find_package(InferenceEngine REQUIRED)
+else()
+    find_package(InferenceEngineDeveloperPackage REQUIRED)
+endif()
 
 set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 add_subdirectory (src/openvino/inference_engine)
index e874177..9cee626 100644 (file)
@@ -260,7 +260,7 @@ This class stores main information about the layer and allow to modify some laye
 
 * `weights`- Dictionary with layer weights, biases or custom blobs if any
 * `params` - Layer specific parameters. Provides getter and setter interfaces to get and modify layer parameters.
-             Please note that some modifications can be ignored and\or overwriten by target plugin (e.g. modification of
+             Please note that some modifications can be ignored and/or overwriten by target plugin (e.g. modification of
              convolution kernel size will be reflected in layer parameters but finally the plugin will ignore it and will
              use initial kernel size)
 
@@ -280,9 +280,7 @@ layers affinity and output layers.
    * `init_from_buffer` - Defines the way of how `model` and `weights` attributes are interpreted. 
    If  `True`, attributes are interpreted as strings with paths to .xml and .bin files of IR. If `False`, they are 
    interpreted as Python `bytes` object with .xml and .bin files content.
-   * `ngrpah_compatibility` - Default value: `False`. If `IENetwork` initializes from 
-                              [experimental IR V7](./docs/OperationsSpecification-V7.md), set to `True`
-
+  
 * Usage examples:
     
    * Initializing `IENetwork` object from IR files:
@@ -506,7 +504,7 @@ This class is the main plugin interface and serves to initialize and configure t
     * Description: Loads extensions library to the plugin. Applicable only for a CPU device and a HETERO device with CPU  
     * Parameters:
         * `extension_path` - A full path to CPU extensions library    
-     * Return value: None
+    * Return value: None
     * Usage example:
 ```py
 >>> plugin = IEPlugin(device="CPU")
diff --git a/inference-engine/ie_bridges/python/sample/affinity_setting_sample/affinity_setting_sample.py b/inference-engine/ie_bridges/python/sample/affinity_setting_sample/affinity_setting_sample.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/README.md b/inference-engine/ie_bridges/python/sample/benchmark_app/README.md
deleted file mode 100644 (file)
index 7bb4b20..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-# Benchmark Python* Application
-
-This topic demonstrates how to run the Benchmark Application demo, which performs inference using convolutional networks.
-
-## How It Works
-
-Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine
-plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend
-on the mode defined with the `-api` command-line parameter.
-
-> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
-
-### Synchronous API
-
-For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values:
-* Number of iterations defined with the `-niter` command-line argument
-* Time duration specified with the `-t` command-line argument
-* Both of them (execution will continue until both conditions are met)
-* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
-
-During the execution, the application collects two types of metrics:
-* Latency for each infer request executed with `Infer` method
-* Duration of all executions
-
-Reported latency value is calculated as mean value of all collected latencies. Reported throughput value is a derivative from reported latency and additionally depends on batch size.
-
-### Asynchronous API
-For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of infer is specified with the `-nireq` command-line parameter. A number of executions is defined by one of the two values:
-* Number of iterations defined with the `-niter` command-line argument
-* Time duration specified with the `-t` command-line argument
-* Both of them (execution will continue until both conditions are met)
-* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
-
-The infer requests are executed asynchronously. Callback is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration.
-
-## Running
-Notice that the benchmark_app usually produces optimal performance for any device out of the box.
-
-**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.:
-```
-$benchmark_app -m <model> -i <input> -d CPU
-```
-
-Running the application with the `-h` or `--help`' option yields the following usage message:
-
-```
-usage: benchmark_app.py [-h] [-i PATH_TO_INPUT] -m PATH_TO_MODEL
-                        [-pp PLUGIN_DIR] [-d TARGET_DEVICE]
-                        [-l PATH_TO_EXTENSION] [-c PATH_TO_CLDNN_CONFIG]
-                        [-api {sync,async}] [-niter NUMBER_ITERATIONS]
-                        [-nireq NUMBER_INFER_REQUESTS] [-b BATCH_SIZE]
-                        [-stream_output [STREAM_OUTPUT]] [-t TIME]
-                        [-progress [PROGRESS]] [-nstreams NUMBER_STREAMS]
-                        [-nthreads NUMBER_THREADS] [-pin {YES,NO}]
-                        [--exec_graph_path EXEC_GRAPH_PATH]
-                        [-pc [PERF_COUNTS]]
-
-Options:
-  -h, --help            Show this help message and exit.
-  -i PATH_TO_INPUT, --path_to_input PATH_TO_INPUT
-                        Optional. Path to a folder with images and/or binaries
-                        or to specific image or binary file.
-  -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL
-                        Required. Path to an .xml file with a trained model.
-  -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
-                        Optional. Path to a plugin folder.
-  -d TARGET_DEVICE, --target_device TARGET_DEVICE
-                        Optional. Specify a target device to infer on: CPU,
-                        GPU, FPGA, HDDL or MYRIAD.
-                        Use "-d HETERO:<comma separated devices list>" format to specify HETERO plugin.
-  -l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION
-                        Optional. Required for CPU custom layers. Absolute
-                        path to a shared library with the kernels
-                        implementations.
-  -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG
-                        Optional. Required for GPU custom kernels. Absolute
-                        path to an .xml file with the kernels description.
-  -api {sync,async}, --api_type {sync,async}
-                        Optional. Enable using sync/async API. Default value
-                        is async.
-  -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS
-                        Optional. Number of iterations. If not specified, the
-                        number of iterations is calculated depending on a
-                        device.
-  -nireq NUMBER_INFER_REQUESTS, --number_infer_requests NUMBER_INFER_REQUESTS
-                        Optional. Number of infer requests. Default value is
-                        determined automatically for device.
-  -b BATCH_SIZE, --batch_size BATCH_SIZE
-                        Optional. Batch size value. If not specified, the
-                        batch size value is determined from IR
-  -stream_output [STREAM_OUTPUT]
-                        Optional. Print progress as a plain text. When
-                        specified, an interactive progress bar is replaced
-                        with a multiline output.
-  -t TIME, --time TIME  Optional. Time in seconds to execute topology.
-  -progress [PROGRESS]  Optional. Show progress bar (can affect performance
-                        measurement). Default values is "False".
-  -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
-                       Optional. Number of streams to use for inference on the CPU/GPU in throughput mode
-                       (for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
-  -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
-                        Number of threads to use for inference on the CPU
-                        (including HETERO case).
-  -pin {YES,NO}, --infer_threads_pinning {YES,NO}
-                        Optional. Enable ("YES" is default value) or disable
-                        ("NO")CPU threads pinning for CPU-involved inference.
-  --exec_graph_path EXEC_GRAPH_PATH
-                        Optional. Path to a file where to store executable
-                        graph information serialized.
-  -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
-                        Optional. Report performance counters.
-
-```
-
-Running the application with the empty list of options yields the usage message given above and an error message.
-
-Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values.
-If a model has only image input(s), please a provide folder with images or a path to an image as input.
-If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input.
-If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
-
-To run the demo, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
-
-> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-
-For example, to do inference of an image using a trained network with multiple outputs on CPU, run the following command:
-
-```
-python3 benchmark_app.py -i <path_to_image>/inputImage.bmp -m <path_to_model>/multiple-output.xml -d CPU
-```
-
-## Demo Output
-
-The application outputs number of executed iterations, total duration of execution, latency and throughput.
-Additionally, if you set the `-pc` parameter, the application outputs performance counters.
-If you set `-exec_graph_path`, the application reports executable graph information serialized.
-
-```
-[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
-Progress: |................................| 100.00%
-
-[Step 9/9] Dumping statistics report
-Progress: |................................| 100.00%
-
-Count:      4408 iterations
-Duration:   60153.52 ms
-Latency:    51.8244 ms
-Throughput: 73.28 FPS
-
-```
-
-## See Also
-* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
-* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py
deleted file mode 100644 (file)
index ccee155..0000000
+++ /dev/null
@@ -1,343 +0,0 @@
-"""
- Copyright (C) 2018-2019 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-from statistics import median
-from openvino.inference_engine import IENetwork, IECore, get_version
-
-from .utils.parameters import *
-from .utils.inputs_filling import *
-from .utils.utils import *
-from .utils.infer_request_wrap import *
-from .utils.progress_bar import *
-
-def getDurationInMilliseconds(duration):
-    return duration * 1000
-
-def static_vars(**kwargs):
-    def decorate(func):
-        for k in kwargs:
-            setattr(func, k, kwargs[k])
-        return func
-    return decorate
-
-@static_vars(step_id = 0)
-def next_step(additional_info = ""):
-    step_names = {
-        1  : "Parsing and validating input arguments",
-        2  : "Loading Inference Engine",
-        3  : "Read the Intermediate Representation of the network",
-        4  : "Resizing network to match image sizes and given batch",
-        5  : "Configuring input of the model",
-        6  : "Setting device configuration",
-        7  : "Loading the model to the device",
-        8  : "Setting optimal runtime parameters",
-        9  : "Creating infer requests and filling input blobs with images",
-        10 : "Measuring performance",
-        11 : "Dumping statistics report",
-    }
-
-    next_step.step_id += 1
-    if (next_step.step_id not in step_names.keys()):
-        raise Exception("Step ID " + str(next_step.step_id) + " is out of total steps number " + len(step_names))
-
-    print("[Step {}/{}] {}".format(next_step.step_id, len(step_names), step_names[next_step.step_id]) + (" (" + additional_info + ")" if len(additional_info) else ""))
-
-def main(args=None):
-    try:
-        # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
-        next_step()
-
-        if not args:
-            args = parse_args()
-
-        # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
-        next_step()
-
-        device_name = args.target_device.upper()
-
-        ie = IECore()
-
-        if CPU_DEVICE_NAME in device_name:
-            if args.path_to_extension:
-                ie.add_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME)
-        if GPU_DEVICE_NAME in device_name:
-            if args.path_to_cldnn_config:
-                ie.set_config({'CONFIG_FILE' : args.path_to_cldnn_config}, GPU_DEVICE_NAME)
-                logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
-
-        logger.info("InferenceEngine:\n{: <9}{}".format("",get_version()))
-        version_string = "Device is {}\n".format(device_name)
-        for device, version in ie.get_versions(device_name).items():
-          version_string += "{: <9}{}\n".format("", device)
-          version_string += "{: <9}{:.<24}{} {}.{}\n".format("",version.description," version", version.major, version.minor)
-          version_string += "{: <9}{:.<24} {}\n".format("","Build", version.build_number)
-        logger.info(version_string)
-
-        # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
-        next_step()
-
-        xml_filename = os.path.abspath(args.path_to_model)
-        head, tail = os.path.splitext(xml_filename)
-        bin_filename = os.path.abspath(head + BIN_EXTENSION)
-
-        ie_network = IENetwork(xml_filename, bin_filename)
-
-        input_info = ie_network.inputs
-
-        if len(input_info) == 0:
-            raise AttributeError('No inputs info is provided')
-
-        # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
-        next_step()
-
-        batch_size = ie_network.batch_size
-        precision = ie_network.precision
-
-        if args.batch_size and args.batch_size != ie_network.batch_size:
-            new_shapes = {}
-            for key in input_info.keys():
-                shape = input_info[key].shape
-                layout = input_info[key].layout
-
-                batchIndex = -1
-                if ((layout == 'NCHW') or (layout == 'NCDHW') or
-                    (layout == 'NHWC') or (layout == 'NDHWC') or
-                    (layout == 'NC')):
-                    batchIndex = 0
-                elif (layout == 'CN'):
-                    batchIndex = 1
-
-                if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)):
-                    shape[batchIndex] = args.batch_size
-                    new_shapes[key] = shape
-
-            if (len(new_shapes) > 0):
-                logger.info("Resizing network to batch = {}".format(args.batch_size))
-                ie_network.reshape(new_shapes)
-
-            batch_size = args.batch_size
-
-        logger.info("Network batch size: {}, precision {}".format(batch_size, precision))
-
-        # --------------------- 5. Configuring input of the model ------------------------------------------------------
-        next_step()
-
-        for key in input_info.keys():
-            if (isImage(input_info[key])):
-                # Set the precision of input data provided by the user
-                # Should be called before load of the network to the plugin
-                input_info[key].precision = 'U8'
-
-        # --------------------- 6. Setting device configuration --------------------------------------------------------
-        next_step()
-
-        devices = parseDevices(device_name)
-        device_nstreams = parseValuePerDevice(devices, args.number_streams)
-        for device in devices:
-          if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys
-            ## limit threading for CPU portion of inference
-            if args.number_threads:
-              ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, device)
-
-            # pin threads for CPU portion of inference
-            ie.set_config({'CPU_BIND_THREAD': args.infer_threads_pinning}, device)
-
-            ## for CPU execution, more throughput-oriented execution via streams
-            # for pure CPU execution, more throughput-oriented execution via streams
-            if args.api_type == 'async':
-                ie.set_config({'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device))
-                                                         if device in device_nstreams.keys()
-                                                         else 'CPU_THROUGHPUT_AUTO' }, device)
-            device_nstreams[device] = int(ie.get_config(device, 'CPU_THROUGHPUT_STREAMS'))
-
-          elif device == GPU_DEVICE_NAME:
-            if args.api_type == 'async':
-                ie.set_config({'GPU_THROUGHPUT_STREAMS' : str(device_nstreams.get(device))
-                                                          if device in device_nstreams.keys()
-                                                          else 'GPU_THROUGHPUT_AUTO'}, device)
-            device_nstreams[device] = int(ie.get_config(device, 'GPU_THROUGHPUT_STREAMS'))
-
-          elif device == MYRIAD_DEVICE_NAME:
-            ie.set_config({'LOG_LEVEL': 'LOG_INFO',
-                           'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME)
-
-        # --------------------- 7. Loading the model to the device -----------------------------------------------------
-        next_step()
-
-        config = { 'PERF_COUNT' : ('YES' if args.perf_counts else 'NO')}
-
-        exe_network = ie.load_network(ie_network,
-                                      device_name,
-                                      config=config,
-                                      num_requests=args.number_infer_requests if args.number_infer_requests else 0)
-
-        # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
-        next_step()
-
-        ## Number of requests
-        infer_requests = exe_network.requests
-        nireq = len(infer_requests)
-
-        ## Iteration limit
-        niter = args.number_iterations
-        if niter and args.api_type == 'async':
-          niter = (int)((niter + nireq - 1)/nireq)*nireq
-          if (args.number_iterations != niter):
-            logger.warn("Number of iterations was aligned by request number "
-                        "from {} to {} using number of requests {}".format(args.number_iterations, niter, nireq))
-
-        ## Time limit
-        duration_seconds = 0
-        if args.time:
-          ## time limit
-          duration_seconds = args.time
-        elif not args.number_iterations:
-          ## default time limit
-          duration_seconds = get_duration_in_secs(device)
-
-        # ------------------------------------ 8. Creating infer requests and filling input blobs ----------------------
-        next_step()
-
-        request_queue = InferRequestsQueue(infer_requests)
-
-        path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None
-        requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests)
-
-        # ------------------------------------ 9. Measuring performance ------------------------------------------------
-
-        progress_count = 0
-        progress_bar_total_count = 10000
-
-        output_string = "Start inference {}ronously".format(args.api_type)
-        if (args.api_type == "async"):
-            if output_string != "":
-                output_string += ", "
-
-            output_string += str(nireq) + " inference requests"
-            device_ss = ''
-            for device, nstreams in device_nstreams.items():
-                if device_ss != '':
-                    device_ss += ', '
-                device_ss += "{} streams for {}".format(str(nstreams), device)
-            if device_ss != '':
-                output_string += " using " + device_ss
-
-        output_string += ", limits: "
-        if niter:
-            if not duration_seconds:
-                progress_bar_total_count = niter
-            output_string += str(niter) + " iterations"
-
-        if duration_seconds:
-            if niter:
-                output_string += ", "
-            output_string += str(getDurationInMilliseconds(duration_seconds)) + " ms duration"
-
-        next_step(output_string)
-
-        ## warming up - out of scope
-        infer_request = request_queue.getIdleRequest()
-        if not infer_request:
-            raise Exception("No idle Infer Requests!")
-
-        if (args.api_type == 'sync'):
-            infer_request.infer(requests_input_data[infer_request.id])
-        else:
-            infer_request.startAsync(requests_input_data[infer_request.id])
-
-        request_queue.waitAll()
-        request_queue.resetTimes()
-
-        start_time = datetime.now()
-        exec_time = (datetime.now() - start_time).total_seconds()
-        iteration = 0
-
-        progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress)
-
-        ## Start inference & calculate performance
-        ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
-        while ((niter and iteration < niter) or
-               (duration_seconds and exec_time < duration_seconds) or
-               (args.api_type == "async" and iteration % nireq != 0)):
-            infer_request = request_queue.getIdleRequest()
-            if not infer_request:
-                raise Exception("No idle Infer Requests!")
-
-            if (args.api_type == 'sync'):
-                infer_request.infer(requests_input_data[infer_request.id])
-            else:
-                infer_request.startAsync(requests_input_data[infer_request.id])
-            iteration += 1
-
-            exec_time = (datetime.now() - start_time).total_seconds()
-
-            if niter:
-                progress_bar.add_progress(1)
-            else:
-                ## calculate how many progress intervals are covered by current iteration.
-                ## depends on the current iteration time and time of each progress interval.
-                ## Previously covered progress intervals must be skipped.
-                progress_interval_time = duration_seconds / progress_bar_total_count
-                new_progress = (int) (exec_time / progress_interval_time - progress_count)
-                progress_bar.add_progress(new_progress)
-                progress_count += new_progress
-
-        ## wait the latest inference executions
-        request_queue.waitAll()
-
-        total_duration_sec = request_queue.getDurationInSeconds()
-        times = request_queue.times
-        times.sort()
-        latency_ms = median(times)
-        fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec
-
-        progress_bar.finish()
-
-        # ------------------------------------ 10. Dumping statistics report -------------------------------------------
-        next_step()
-
-        if args.exec_graph_path:
-            try:
-              exec_graph_info = exe_network.get_exec_graph_info()
-              exec_graph_info.serialize(args.exec_graph_path)
-              logger.info("Executable graph is stored to {}".format(args.exec_graph_path))
-              del exec_graph_info
-            except Exception as e:
-                logging.exception(e)
-
-        if args.perf_counts:
-            for ni in range(int(nireq)):
-                perf_counts = exe_network.requests[ni].get_perf_counts()
-                logger.info("Pefrormance counts for {}-th infer request".format(ni))
-                for layer, stats in perf_counts.items():
-                    max_layer_name = 30
-                    print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer,
-                                                                        stats['status'],
-                                                                        'layerType: ' + str(stats['layer_type']),
-                                                                        'realTime: ' + str(stats['real_time']),
-                                                                        'cpu: ' + str(stats['cpu_time']),
-                                                                        'execType: ' + str(stats['exec_type'])))
-
-        print("Count:      {} iterations".format(iteration))
-        print("Duration:   {:.2f} ms".format(getDurationInMilliseconds(total_duration_sec)))
-        print("Latency:    {:.4f} ms".format(latency_ms))
-        print("Throughput: {:.2f} FPS".format(fps))
-
-        del exe_network
-        del ie
-        next_step.step_id = 0
-    except Exception as e:
-        logging.exception(e)
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/infer_request_wrap.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/infer_request_wrap.py
deleted file mode 100644 (file)
index cf801fe..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
- Copyright (C) 2018-2019 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-from ctypes import *
-from datetime import datetime
-import threading
-
-class InferReqWrap:
-    def __init__(self, request, id, callbackQueue):
-        self.id = id
-        self.request = request
-        self.request.set_completion_callback(self.callback, self.id)
-        self.callbackQueue = callbackQueue
-
-    def callback(self, statusCode, userdata):
-        if (userdata != self.id):
-            print("Request ID {} does not correspond to user data {}".format(self.id, userdata))
-        elif statusCode != 0:
-            print("Request {} failed with status code {}".format(self.id, statusCode))
-        self.callbackQueue(self.id, self.request.latency)
-
-    def startAsync(self, input_data):
-        self.request.async_infer(input_data)
-
-    def infer(self, input_data):
-        self.request.infer(input_data)
-        self.callbackQueue(self.id, self.request.latency);
-
-class InferRequestsQueue:
-    def __init__(self, requests):
-      self.idleIds = []
-      self.requests = []
-      self.times = []
-      for id in range(0, len(requests)):
-          self.requests.append(InferReqWrap(requests[id], id, self.putIdleRequest))
-          self.idleIds.append(id)
-      self.startTime = datetime.max
-      self.endTime = datetime.min
-      self.cv = threading.Condition()
-
-    def resetTimes(self):
-      self.times.clear()
-
-    def getDurationInSeconds(self):
-      return (self.endTime - self.startTime).total_seconds()
-
-    def putIdleRequest(self, id, latency):
-      self.cv.acquire()
-      self.times.append(latency)
-      self.idleIds.append(id)
-      self.endTime = max(self.endTime, datetime.now())
-      self.cv.notify()
-      self.cv.release()
-
-    def getIdleRequest(self):
-        self.cv.acquire()
-        while len(self.idleIds) == 0:
-            self.cv.wait()
-        id = self.idleIds.pop();
-        self.startTime = min(datetime.now(), self.startTime);
-        self.cv.release()
-        return self.requests[id]
-
-    def waitAll(self):
-        self.cv.acquire()
-        while len(self.idleIds) != len(self.requests):
-            self.cv.wait()
-        self.cv.release()
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/inputs_filling.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/inputs_filling.py
deleted file mode 100644 (file)
index 00a2945..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-"""
- Copyright (C) 2018-2019 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import logging
-import os
-import cv2
-import numpy as np
-import sys
-
-from glob import glob
-from random import choice
-
-from .logging import logger
-
-IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP']
-BINARY_EXTENSIONS = ['BIN']
-
-def isImage(blob):
-    if (blob.layout != "NCHW"):
-        return False
-    channels = blob.shape[1]
-    return (channels == 3)
-
-def isImageInfo(blob):
-    if (blob.layout != "NC"):
-        return False
-    channels = blob.shape[1]
-    return (channels >= 2)
-
-def getInputs(path_to_input, batch_size, input_info, requests):
-  input_image_sizes = {}
-  for key in input_info.keys():
-      if (isImage(input_info[key])):
-          input_image_sizes[key] = (input_info[key].shape[2], input_info[key].shape[3])
-      logger.info("Network input '{}' precision {}, dimensions ({}): {}".format(key,
-                                                                         input_info[key].precision,
-                                                                         input_info[key].layout,
-                                                                         " ".join(str(x) for x in input_info[key].shape)))
-
-  images_count = len(input_image_sizes.keys())
-  binaries_count = len(input_info) - images_count
-
-  image_files = list()
-  binary_files = list()
-
-  if (path_to_input):
-    image_files = get_files_by_extensions(path_to_input, IMAGE_EXTENSIONS)
-    image_files.sort()
-    binary_files = get_files_by_extensions(path_to_input, BINARY_EXTENSIONS)
-    binary_files.sort()
-
-  if (len(image_files) == 0) and (len(binary_files) == 0):
-      logger.warn("No input files were given: all inputs will be filled with random values!")
-  else:
-      binary_to_be_used = binaries_count*batch_size*len(requests)
-      if binary_to_be_used > 0 and len(binary_files) == 0:
-          logger.warn("No supported binary inputs found! Please check your file extensions: {}".format(",".join(BINARY_EXTENSIONS)))
-      elif binary_to_be_used > len(binary_files):
-          logger.warn("Some binary input files will be duplicated: {} files are required, but only {} were provided".format(binary_to_be_used, len(binary_files)))
-      elif binary_to_be_used < len(binary_files):
-          logger.warn("Some binary input files will be ignored: only {} files are required from {}".format(binary_to_be_used, len(binary_files)))
-
-      images_to_be_used = images_count*batch_size*len(requests)
-      if images_to_be_used > 0 and len(image_files) == 0:
-          logger.warn("No supported image inputs found! Please check your file extensions: {}".format(",".join(IMAGE_EXTENSIONS)))
-      elif images_to_be_used > len(image_files):
-          logger.warn("Some image input files will be duplicated: {} files are required, but only {} were provided".format(images_to_be_used, len(image_files)))
-      elif images_to_be_used < len(image_files):
-          logger.warn("Some image input files will be ignored: only {} files are required from {}".format(images_to_be_used, len(image_files)))
-
-  requests_input_data = []
-  for request_id in range(0, len(requests)):
-      logger.info("Infer Request {} filling".format(request_id))
-      input_data = {}
-      keys = list(input_info.keys())
-      for key in keys:
-          if isImage(input_info[key]):
-              # input is image
-              if (len(image_files) > 0):
-                  input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key), len(keys), input_info[key].shape)
-                  continue
-
-          # input is binary
-          if (len(binary_files) > 0):
-              input_data[key] = fill_blob_with_binary(binary_files, input_info[key].shape)
-              continue
-
-          # most likely input is image info
-          if isImageInfo(input_info[key]) and len(input_image_sizes) == 1:
-              image_size = input_image_sizes[list(input_image_sizes.keys()).pop()]
-              logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" +
-                                                                        str(image_size[1]))
-              input_data[key] = fill_blob_with_image_info(image_size, input_info[key].shape)
-              continue
-
-          # fill with random data
-          logger.info("Fill input '{}' with random values ({} is expected)".format(key, "image" if isImage(input_info[key]) else "some binary data"))
-          input_data[key] = fill_blob_with_random(input_info[key].precision, input_info[key].shape)
-
-      requests_input_data.append(input_data)
-
-  return requests_input_data
-
-def get_files_by_extensions(path_to_input, extensions):
-    input_files = list()
-    if os.path.isfile(path_to_input):
-        input_files.append(path_to_input)
-    else:
-        path = os.path.join(path_to_input, '*')
-        files = glob(path, recursive=True)
-        for file in files:
-            file_extension = file.rsplit('.').pop().upper()
-            if file_extension in extensions:
-                input_files.append(file)
-    return input_files
-
-def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_size, shape):
-    images = np.ndarray(shape)
-    image_index = request_id*batch_size*input_size + input_id
-    for b in range(batch_size):
-        image_index %= len(image_paths)
-        image_filename = image_paths[image_index]
-        image = cv2.imread(image_filename)
-
-        new_im_size = tuple(shape[2:])
-        if image.shape[:-1] != new_im_size:
-            logger.warn("Image {} is resized from ({}) to ({})".format(image_filename, image.shape[:-1], new_im_size))
-            image = cv2.resize(image, new_im_size)
-
-        image = image.transpose((2, 1, 0))
-        images[b] = image
-
-        image_index += input_size
-    return images
-
-def fill_blob_with_binary(binary_paths, request_id, batch_size, input_id, input_size, shape):
-  binaries = np.ndarray(shape)
-  binary_index = request_id*batch_size*input_size + input_id
-  for b in range(batch_size):
-      binary_index %= len(image_paths)
-      binary_filename = binary_paths[binary_index]
-
-      binary_file_size = os.path.getsize(binary_file)
-      input_size = np.prod(shape)/batch_size
-      if (input_size != binary_file_size):
-          raise Exception("File " + binary_filename + " contains " << str(binary_file_size) + " bytes " +
-                          "but network expects " + str(input_size))
-
-      with open(binary_file, 'r') as f:
-          binary_data = f.read()
-
-      binaries[b] = binary_data
-      binary_index += input_size
-
-  return binaries
-
-def fill_blob_with_image_info(image_size, shape):
-    im_info = np.ndarray(shape)
-    for b in range(shape[0]):
-      for i in range(shape[1]):
-        im_info[b][i] = image_size[i] if i in [0, 1] else 1
-
-    return im_info
-
-def fill_blob_with_random(precision, shape):
-    if precision == "FP32":
-      return np.random.rand(*shape).astype(np.float32)
-    elif precision == "FP16":
-      return np.random.rand(*shape).astype(np.float16)
-    elif precision == "I32":
-      return np.random.rand(*shape).astype(np.int32)
-    elif precision == "U8":
-      return np.random.rand(*shape).astype(np.uint8)
-    elif precision == "I8":
-      return np.random.rand(*shape).astype(np.int8)
-    elif precision == "U16":
-      return np.random.rand(*shape).astype(np.uint16)
-    elif precision == "I16":
-      return np.random.rand(*shape).astype(np.int16)
-    else:
-      raise Exception("Input precision is not supported: " + precision)
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/parameters.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/parameters.py
deleted file mode 100644 (file)
index 3e8b59b..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
- Copyright (C) 2018-2019 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import argparse
-from fnmatch import fnmatch
-
-XML_EXTENSION = ".xml"
-BIN_EXTENSION = ".bin"
-
-XML_EXTENSION_PATTERN = '*' + XML_EXTENSION
-
-def validate_args(args):
-    if args.number_iterations is not None and args.number_iterations < 0:
-        raise Exception("Number of iterations should be positive (invalid -niter option value)")
-    if args.number_infer_requests and args.number_infer_requests < 0:
-        raise Exception("Number of inference requests should be positive (invalid -nireq option value)")
-    if not fnmatch(args.path_to_model, XML_EXTENSION_PATTERN):
-        raise Exception('Path {} is not xml file.')
-
-def str2bool(v):
-  if v.lower() in ('yes', 'true', 't', 'y', '1'):
-    return True
-  elif v.lower() in ('no', 'false', 'f', 'n', '0'):
-    return False
-  else:
-    raise argparse.ArgumentTypeError('Boolean value expected.')
-
-def parse_args():
-    parser = argparse.ArgumentParser(add_help=False)
-    args = parser.add_argument_group('Options')
-    args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS,
-                      help="Show this help message and exit.")
-    args.add_argument('-i', '--path_to_input', type=str, required=False,
-                      help="Optional. Path to a folder with images and/or binaries or to specific image or binary file.")
-    args.add_argument('-m', '--path_to_model', type=str, required=True,
-                      help="Required. Path to an .xml file with a trained model.")
-    args.add_argument('-d', '--target_device', type=str, required=False, default="CPU",
-                      help="Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. "
-                           "Use \"-d HETERO:<comma separated devices list>\" format to specify HETERO plugin. ")
-    args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None,
-                      help="Optional. Required for CPU custom layers. "
-                           "Absolute path to a shared library with the kernels implementations.")
-    args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False,
-                      help="Optional. Required for GPU custom kernels. Absolute path to an .xml file with the "
-                           "kernels description.")
-    args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'],
-                      help="Optional. Enable using sync/async API. Default value is async.")
-    args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None,
-                      help="Optional. Number of iterations. "
-                           "If not specified, the number of iterations is calculated depending on a device.")
-    args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=None,
-                      help="Optional. Number of infer requests. Default value is determined automatically for device.")
-    args.add_argument('-b', '--batch_size', type=int, required=False, default=None,
-                      help="Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation")
-    args.add_argument('-stream_output', type=str2bool, required=False, default=False, nargs='?', const=True,
-                      help="Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a "
-                           "multiline output.")
-    args.add_argument('-t', '--time', type=int, required=False, default=None,
-                      help="Optional. Time in seconds to execute topology.")
-    args.add_argument('-progress', type=str2bool, required=False, default=False, nargs='?', const=True,
-                      help="Optional. Show progress bar (can affect performance measurement). Default values is \"False\".")
-    args.add_argument('-nstreams', '--number_streams', type=str, required=False, default=None,
-                      help="Optional. Number of streams to use for inference on the CPU/GPU in throughput mode "
-                           "(for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).")
-    args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None,
-                      help="Number of threads to use for inference on the CPU "
-                           "(including HETERO case).")
-    args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES', choices=['YES', 'NO'],
-                      help="Optional. Enable (\"YES\" is default value) or disable (\"NO\")"
-                      "CPU threads pinning for CPU-involved inference.")
-    args.add_argument('--exec_graph_path', type=str, required=False,
-                      help="Optional. Path to a file where to store executable graph information serialized.")
-    args.add_argument("-pc", "--perf_counts", type=str2bool, required=False, default=False, nargs='?', const=True,
-                      help="Optional. Report performance counters.", )
-    parsed_args = parser.parse_args()
-
-    validate_args(parsed_args)
-
-    return parsed_args
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/utils.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/utils.py
deleted file mode 100644 (file)
index c1f0afe..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-"""
- Copyright (C) 2018-2019 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import multiprocessing
-from .logging import logger
-
-VPU_DEVICE_NAME = "VPU"
-MYRIAD_DEVICE_NAME = "MYRIAD"
-HDDL_DEVICE_NAME = "HDDL"
-FPGA_DEVICE_NAME = "FPGA"
-CPU_DEVICE_NAME = "CPU"
-GPU_DEVICE_NAME = "GPU"
-HETERO_DEVICE_NAME = "HETERO"
-UNKNOWN_DEVICE_TYPE = "UNKNOWN"
-
-DEVICE_DURATION_IN_SECS = {
-    CPU_DEVICE_NAME: 60,
-    GPU_DEVICE_NAME: 60,
-    VPU_DEVICE_NAME: 60,
-    MYRIAD_DEVICE_NAME: 60,
-    HDDL_DEVICE_NAME: 60,
-    FPGA_DEVICE_NAME: 120,
-    UNKNOWN_DEVICE_TYPE: 120
-}
-
-DEVICE_NIREQ_ASYNC = {
-    CPU_DEVICE_NAME: 2,
-    GPU_DEVICE_NAME: 2,
-    VPU_DEVICE_NAME: 4,
-    MYRIAD_DEVICE_NAME: 4,
-    HDDL_DEVICE_NAME: 100,
-    FPGA_DEVICE_NAME: 3,
-    UNKNOWN_DEVICE_TYPE: 1
-}
-
-def get_duration_in_secs(target_device):
-    duration = 0
-    for device in DEVICE_DURATION_IN_SECS:
-        if device in target_device:
-            duration = max(duration, DEVICE_DURATION_IN_SECS[device])
-
-    if duration == 0:
-        duration = DEVICE_DURATION_IN_SECS[UNKNOWN_DEVICE_TYPE]
-        logger.warn("Default duration {} seconds is used for unknown device {}".format(duration, target_device))
-
-    return duration
-
-def get_nireq(target_device):
-    nireq = 0
-    for device in DEVICE_NIREQ_ASYNC:
-        if device in target_device:
-            nireq = max(nireq, DEVICE_NIREQ_ASYNC[device])
-
-    if nireq == 0:
-        nireq = DEVICE_NIREQ_ASYNC[UNKNOWN_DEVICE_TYPE]
-        logger.warn("Default number of requests {} is used for unknown device {}".format(duration, target_device))
-
-    return nireq
-
-def parseDevices(device_string):
-    devices = device_string
-    if ':' in devices:
-        devices = devices.partition(':')[2]
-    return [ d[:d.index('(')] if '(' in d else d for d in devices.split(',') ]
-
-def parseValuePerDevice(devices, values_string):
-    ## Format: <device1>:<value1>,<device2>:<value2> or just <value>
-    result = {}
-    if not values_string:
-      return result
-    device_value_strings = values_string.upper().split(',')
-    for device_value_string in device_value_strings:
-        device_value_vec = device_value_string.split(':')
-        if len(device_value_vec) == 2:
-            for device in devices:
-                if device == device_value_vec[0]:
-                    value = int(device_value_vec[1])
-                    result[device_value_vec[0]] = value
-                    break
-        elif len(device_value_vec) == 1:
-            value = int(device_value_vec[0])
-            for device in devices:
-                result[device] = value
-        elif not device_value_vec:
-            raise Exception("Unknown string format: " + values_string)
-    return result
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py
deleted file mode 100644 (file)
index cf1139a..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-import benchmark
-
-if __name__ == "__main__":
-    benchmark.main()
index 98691c7..7812e07 100644 (file)
@@ -20,7 +20,7 @@ python3 classification_sample.py -h
 The command yields the following usage message:
 ```
 usage: classification_sample.py [-h] -m MODEL -i INPUT [INPUT ...]
-                                [-l CPU_EXTENSION] [-pp PLUGIN_DIR]
+                                [-l CPU_EXTENSION]
                                 [-d DEVICE] [--labels LABELS] [-nt NUMBER_TOP]
 
 Options:
@@ -34,8 +34,6 @@ Options:
                         Optional. Required for CPU custom layers. MKLDNN (CPU)-targeted custom layers.
                         Absolute path to a shared library with the kernels
                         implementations.
-  -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
-                        Optional. Path to a plugin folder
   -d DEVICE, --device DEVICE
                         Optional. Specify the target device to infer on; CPU,
                         GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
index b409f5d..18ada6f 100644 (file)
@@ -32,7 +32,7 @@ python3 classification_sample_async.py -h
 The command yields the following usage message:
 ```
 usage: classification_sample_async.py [-h] -m MODEL -i INPUT [INPUT ...]
-                                      [-l CPU_EXTENSION] [-pp PLUGIN_DIR]
+                                      [-l CPU_EXTENSION]
                                       [-d DEVICE] [--labels LABELS]
                                       [-nt NUMBER_TOP]
 
@@ -47,8 +47,6 @@ Options:
                         Optional. Required for CPU custom layers. Absolute
                         path to a shared library with the kernels
                         implementations.
-  -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
-                        Optional. Path to a plugin folder
   -d DEVICE, --device DEVICE
                         Optional. Specify the target device to infer on; CPU,
                         GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
@@ -68,7 +66,7 @@ To run the sample, you can use AlexNet and GoogLeNet or other image classificati
 
 You can do inference of an image using a trained AlexNet network on FPGA with fallback to CPU using the following command:
 ```
-    python3 classification_sample_async.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU -nireq 2 -ni 200
+    python3 classification_sample_async.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU
 ```
 
 ## Sample Output
index 9d78438..c0178e9 100644 (file)
@@ -22,14 +22,19 @@ def main():
         print("\tDevice: {}".format(device))
         print("\tMetrics:")
         for metric in ie.get_metric(device, "SUPPORTED_METRICS"):
-            metric_val = ie.get_metric(device, metric)
-            print("\t\t{}: {}".format(metric, param_to_string(metric_val)))
+            try:
+              metric_val = ie.get_metric(device, metric)
+              print("\t\t{}: {}".format(metric, param_to_string(metric_val)))
+            except TypeError:
+              print("\t\t{}: UNSUPPORTED TYPE".format(metric))
 
         print("\n\tDefault values for device configuration keys:")
         for cfg in ie.get_metric(device, "SUPPORTED_CONFIG_KEYS"):
-            cfg_val = ie.get_config(device, cfg)
-            print("\t\t{}: {}".format(cfg, param_to_string(cfg_val)))
-
+            try:
+              cfg_val = ie.get_config(device, cfg)
+              print("\t\t{}: {}".format(cfg, param_to_string(cfg_val)))
+            except TypeError:
+              print("\t\t{}: UNSUPPORTED TYPE".format(cfg))
 
 if __name__ == '__main__':
     sys.exit(main() or 0)
diff --git a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md
new file mode 100644 (file)
index 0000000..7b91825
--- /dev/null
@@ -0,0 +1,73 @@
+# Object Detection Python* Sample SSD
+
+This sample demonstrates how to run the Object Detection sample application.
+
+The sample demonstrates how to use the new Infer Request API of Inference Engine in applications.
+Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
+The sample demonstrates how to build and execute an inference request on example of object detection networks.
+
+Due to properties of SSD networks, this sample works correctly only on a batch of the size 1. For a greater number of images in a batch, network reshape is required.
+
+## How It Works
+
+Upon the start-up, the sample application reads command line parameters and loads specified network and input images (or a
+folder with images) to the Inference Engine plugin.
+
+Then, the sample creates an inference request object and executes inference on it.
+
+When inference is done, the application outputs data to the standard output stream and creates an output image with bounding boxes drawn atop the initial image.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
+## Running
+
+Running the application with the <code>-h</code> option yields the following usage message:
+```
+python3 object_detection_sample_ssd.py -h
+```
+The command yields the following usage message:
+```
+usage: object_detection_sample_ssd.py [-h] -m MODEL -i INPUT [INPUT ...]
+                                      [-l CPU_EXTENSION]
+                                      [-d DEVICE] [--labels LABELS]
+                                      [-nt NUMBER_TOP]
+
+Options:
+  -h, --help            Show this help message and exit
+  -m MODEL, --model MODEL
+                        Required. Path to an .xml file with a trained model
+  -i INPUT [INPUT ...], --input INPUT [INPUT ...]
+                        Required. Path to a folder with images or path to an
+                        image files
+  -l CPU_EXTENSION, --cpu_extension CPU_EXTENSION
+                        Optional. Required for CPU custom layers. Absolute
+                        path to a shared library with the kernels
+                        implementations
+  -d DEVICE, --device DEVICE
+                        Optional. Specify the target device to infer on; CPU,
+                        GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
+                        will look for a suitable plugin for device specified
+                        Default value is CPU
+  --labels LABELS       Optional. Labels mapping file
+  -nt NUMBER_TOP, --number_top NUMBER_TOP
+                        Optional. Number of top results
+```
+
+Running the application with the empty list of options yields the usage message given above and an error message.
+
+To run the sample, you can use RMNet_SSD or other object-detection models. You can download the pre-trained models with the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or from [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+
+You can do inference of an image using a trained RMNet_SSD network on FPGA with fallback to CPU using the following command:
+```
+    python3 object_detection_sample_ssd.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU
+```
+
+## Sample Output
+
+By default, the application outputs all inference results and draws bounding boxes for inference results with an over 50% confidence.
+
+## See Also
+* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
diff --git a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py
new file mode 100644 (file)
index 0000000..2027469
--- /dev/null
@@ -0,0 +1,189 @@
+#!/usr/bin/env python
+"""
+ Copyright (c) 2018 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from __future__ import print_function
+import sys
+import os
+from argparse import ArgumentParser, SUPPRESS
+import cv2
+import numpy as np
+import logging as log
+from time import time
+from openvino.inference_engine import IENetwork, IECore
+
+
+def build_argparser():
+    parser = ArgumentParser(add_help=False)
+    args = parser.add_argument_group("Options")
+    args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
+    args.add_argument("-m", "--model", help="Required. Path to an .xml file with a trained model.",
+        required=True, type=str)
+    args.add_argument("-i", "--input", help="Required. Path to image file.",
+        required=True, type=str, nargs="+")
+    args.add_argument("-l", "--cpu_extension",
+        help="Optional. Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.",
+        type=str, default=None)
+    args.add_argument("-d", "--device",
+        help="Optional. Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified (CPU by default)",
+        default="CPU", type=str)
+    args.add_argument("--labels", help="Optional. Labels mapping file", default=None, type=str)
+    args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int)
+    
+    return parser
+
+
+def main():
+    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
+    args = build_argparser().parse_args()
+    # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
+    model_xml = args.model
+    model_bin = os.path.splitext(model_xml)[0] + ".bin"
+    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
+    net = IENetwork(model=model_xml, weights=model_bin)
+    # -----------------------------------------------------------------------------------------------------
+
+    # ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
+    log.info("Loading Inference Engine")
+    ie = IECore()
+    log.info("Device info:")
+    versions = ie.get_versions(args.device)
+    print("{}{}".format(" "*8, args.device))
+    print("{}MKLDNNPlugin version ......... {}.{}".format(" "*8, versions[args.device].major, versions[args.device].minor))
+    print("{}Build ........... {}".format(" "*8, versions[args.device].build_number))
+    
+    if args.cpu_extension and "CPU" in args.device:
+        ie.add_extension(args.cpu_extension, "CPU")
+        log.info("CPU extension loaded: {}".format(args.cpu_extension))
+
+    if "CPU" in args.device:
+        supported_layers = ie.query_network(net, "CPU")
+        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
+        if len(not_supported_layers) != 0:
+            log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
+                      format(args.device, ', '.join(not_supported_layers)))
+            log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
+                      "or --cpu_extension command line argument")
+            sys.exit(1)
+    # -----------------------------------------------------------------------------------------------------
+
+    # --------------------------- 3. Read and preprocess input --------------------------------------------
+    input_blob = next(iter(net.inputs))
+    n, c, h, w = net.inputs[input_blob].shape
+    images = np.ndarray(shape=(n, c, h, w))
+    images_hw = []
+    for i in range(n):
+        image = cv2.imread(args.input[i])
+        ih, iw = image.shape[:-1]
+        images_hw.append((ih, iw))
+        log.info("File was added: ")
+        log.info("        {}".format(args.input[i]))
+        if (ih, iw) != (h, w):
+            image = cv2.resize(image, (w, h))
+            log.warning("Image {} is resized from {} to {}".format(args.input[i], image.shape[:-1], (h, w)))
+        image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW
+        images[i] = image
+    # -----------------------------------------------------------------------------------------------------
+
+    # --------------------------- 4. Configure input & output ---------------------------------------------
+    # --------------------------- Prepare input blobs -----------------------------------------------------
+    log.info("Preparing input blobs")
+    assert (len(net.inputs.keys()) == 1 or len(net.inputs.keys()) == 2), "Sample supports topologies only with 1 or 2 inputs"
+    input_blob = next(iter(net.inputs))
+    out_blob = next(iter(net.outputs))
+    input_name, input_info_name = "", ""
+
+    for input_key in net.inputs:
+        if len(net.inputs[input_key].layout) == 4:
+            input_name = input_key
+            log.info("Batch size is {}".format(net.batch_size))
+            net.inputs[input_key].precision = 'U8'
+        elif len(net.inputs[input_key].layout) == 2:
+            input_info_name = input_key
+            net.inputs[input_key].precision = 'FP32'
+            if net.inputs[input_key].shape[1] != 3 and net.inputs[input_key].shape[1] != 6 or net.inputs[input_key].shape[0] != 1:
+                log.error('Invalid input info. Should be 3 or 6 values length.')
+
+    # --------------------------- Prepare output blobs ----------------------------------------------------
+    log.info('Preparing output blobs')
+
+    output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))]
+    for output_key in net.outputs:
+        if net.layers[output_key].type == "DetectionOutput":
+            output_name, output_info = output_key, net.outputs[output_key]
+
+    if output_name == "":
+        log.error("Can't find a DetectionOutput layer in the topology")
+
+    output_dims = output_info.shape
+    if len(output_dims) != 4:
+        log.error("Incorrect output dimensions for SSD model")
+    max_proposal_count, object_size = output_dims[2], output_dims[3]
+
+    if object_size != 7:
+        log.error("Output item should have 7 as a last dimension")
+
+    output_info.precision = "FP32"
+    # -----------------------------------------------------------------------------------------------------
+
+    # --------------------------- Performing inference ----------------------------------------------------
+    log.info("Loading model to the device")
+    exec_net = ie.load_network(network=net, device_name=args.device)
+    log.info("Creating infer request and starting inference")
+    res = exec_net.infer(inputs={input_blob: images})
+    # -----------------------------------------------------------------------------------------------------
+
+    # --------------------------- Read and postprocess output ---------------------------------------------
+    log.info("Processing output blobs")
+    res = res[out_blob]
+    boxes, classes = {}, {}
+    data = res[0][0]
+    for number, proposal in enumerate(data):
+        if proposal[2] > 0:
+            imid = np.int(proposal[0])
+            ih, iw = images_hw[imid]
+            label = np.int(proposal[1])
+            confidence = proposal[2]
+            xmin = np.int(iw * proposal[3])
+            ymin = np.int(ih * proposal[4])
+            xmax = np.int(iw * proposal[5])
+            ymax = np.int(ih * proposal[6])
+            print("[{},{}] element, prob = {:.6}    ({},{})-({},{}) batch id : {}"\
+                .format(number, label, confidence, xmin, ymin, xmax, ymax, imid), end="")
+            if proposal[2] > 0.5:
+                print(" WILL BE PRINTED!")
+                if not imid in boxes.keys():
+                    boxes[imid] = []
+                boxes[imid].append([xmin, ymin, xmax, ymax])
+                if not imid in classes.keys():
+                    classes[imid] = []
+                classes[imid].append(label)
+            else:
+                print()
+
+    for imid in classes:
+        tmp_image = cv2.imread(args.input[imid])
+        for box in boxes[imid]:
+            cv2.rectangle(tmp_image, (box[0], box[1]), (box[2], box[3]), (232, 35, 244), 2)
+        cv2.imwrite("out.bmp", tmp_image)
+        log.info("Image out.bmp created!")
+    # -----------------------------------------------------------------------------------------------------
+
+    log.info("Execution successful\n")
+    log.info("This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool")
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)
\ No newline at end of file
index 272432b..9671006 100644 (file)
@@ -19,7 +19,7 @@ The command yields the following usage message:
 ```
 usage: style_transfer_sample.py [-h] -m MODEL -i INPUT [INPUT ...]
                                 [-l CPU_EXTENSION] [-d DEVICE] 
-                                [-nt NUMBER_TOP] [-ni NUMBER_ITER]
+                                [-nt NUMBER_TOP]
                                 [--mean_val_r MEAN_VAL_R]
                                 [--mean_val_g MEAN_VAL_G]
                                 [--mean_val_b MEAN_VAL_B]
index d79a32a..12fd329 100644 (file)
@@ -44,7 +44,7 @@ cdef c_map_to_dict(map[string, string] c_map):
 supported_precisions = ["FP32", "FP16", "Q78", "I32", "I16", "I8", "U32", "U16", "U8"]
 
 supported_layouts = ["NCHW", "NHWC", "OIHW", "C", "CHW", "HW", "NC", "CN", "BLOCKED", "NCDHW"]
-known_plugins = ['CPU', 'GPU', 'FPGA', 'MYRIAD', 'HETERO', 'HDDL']
+known_plugins = ['CPU', 'GPU', 'FPGA', 'MYRIAD', 'HETERO', 'HDDL', 'MULTI']
 
 ctypedef enum StatusCode:
     OK = 0
@@ -336,7 +336,7 @@ cdef class InferRequest:
             # TODO: add execution index. Check if unsigned int is properly converted to int in python.
             profile[l.first.decode()] = {"status": info.status.decode(), "exec_type": info.exec_type.decode(),
                                          "layer_type": info.layer_type.decode(), "real_time": info.real_time,
-                                         "cpu_time": info.cpu_time}
+                                         "cpu_time": info.cpu_time, "execution_index": info.execution_index}
         return profile
 
     @property
@@ -493,18 +493,14 @@ cdef class IENetwork:
         cdef IENetwork net = IENetwork(model, weights)
         return net
 
-        # TODO: Use enum with precision type instead of srting parameter when python2 support will not be required.
-    def add_outputs(self, outputs, precision="FP32"):
-        if precision.upper() not in supported_precisions:
-            raise AttributeError(
-                "Unsupported precision {}! List of supported precisions: {}".format(precision, supported_precisions))
+    def add_outputs(self, outputs):
         if not isinstance(outputs, list):
             outputs = [outputs]
         for i, l in enumerate(outputs):
             if isinstance(l, str):
-                self.impl.addOutput(l.encode(), 0, precision.upper().encode())
+                self.impl.addOutput(l.encode(), 0)
             elif isinstance(l, tuple) and len(l) == 2:
-                self.impl.addOutput(l[0].encode(), l[1], precision.upper().encode())
+                self.impl.addOutput(l[0].encode(), l[1])
             else:
                 raise TypeError("Incorrect type {type} for layer to add at index {ind}. "
                                 "Expected string with layer name or tuple with two elements: layer name as "
index 371ffcf..c9a1ad3 100644 (file)
@@ -69,6 +69,11 @@ PyObject* parse_parameter(const InferenceEngine::Parameter & param){
         auto val = param.as<int>();
         return PyLong_FromLong((long)val);
     }
+        // Check for unsinged int
+    else if (param.is<unsigned int>()) {
+        auto val = param.as<unsigned int>();
+        return PyLong_FromLong((unsigned long)val);
+    }
         // Check for float
     else if (param.is<float>()) {
         auto val = param.as<float>();
@@ -98,6 +103,15 @@ PyObject* parse_parameter(const InferenceEngine::Parameter & param){
         }
         return list;
     }
+        // Check for std::vector<unsigned int>
+    else if (param.is<std::vector<unsigned int>>()){
+        auto val = param.as<std::vector<unsigned int>>();
+        PyObject *list = PyList_New(0);
+        for (const auto & it : val){
+            PyList_Append(list, PyLong_FromLong(it));
+        }
+        return list;
+    }
         // Check for std::vector<float>
     else if (param.is<std::vector<float>>()){
         auto val = param.as<std::vector<float>>();
@@ -243,7 +257,7 @@ const std::map<std::string, InferenceEnginePython::InputInfo> InferenceEnginePyt
     const InferenceEngine::InputsDataMap &inputsInfo = actual.getInputsInfo();
     for (auto &in : inputsInfo) {
         InferenceEnginePython::InputInfo info;
-        info.actual = *in.second;
+        info.actual = in.second;
         const InferenceEngine::TensorDesc &inputTensorDesc = in.second->getTensorDesc();
         info.dims = inputTensorDesc.getDims();
         for (auto it : precision_map)
@@ -277,16 +291,8 @@ const std::map<std::string, InferenceEnginePython::OutputInfo> InferenceEnginePy
 }
 
 void
-InferenceEnginePython::IENetwork::addOutput(const std::string &out_layer, size_t port_id, const std::string &precision) {
+InferenceEnginePython::IENetwork::addOutput(const std::string &out_layer, size_t port_id) {
     actual.addOutput(out_layer, port_id);
-    InferenceEngine::OutputsDataMap outputsDataMapUpd = actual.getOutputsInfo();
-    if (outputsDataMapUpd.count(out_layer)) {
-        outputsDataMapUpd[out_layer]->setPrecision(precision_map[precision]);
-    } else if (outputsDataMapUpd.count(out_layer + "." + std::to_string(port_id))){
-        outputsDataMapUpd[out_layer + "." + std::to_string(port_id)]->setPrecision(precision_map[precision]);
-    } else {
-        THROW_IE_EXCEPTION << "Failed to set precision for layer " << out_layer;
-    }
 }
 
 void InferenceEnginePython::IENetwork::setBatch(const size_t size) {
@@ -329,11 +335,11 @@ void InferenceEnginePython::IENetwork::setStats(const std::map<std::string, std:
 }
 
 void InferenceEnginePython::InputInfo::setPrecision(std::string precision) {
-    actual.setPrecision(precision_map[precision]);
+    actual->setPrecision(precision_map[precision]);
 }
 
 void InferenceEnginePython::InputInfo::setLayout(std::string layout) {
-    actual.setLayout(layout_map[layout]);
+    actual->setLayout(layout_map[layout]);
 }
 
 void InferenceEnginePython::OutputInfo::setPrecision(std::string precision) {
@@ -567,6 +573,7 @@ InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
         profile_info.layer_type = it.second.layer_type;
         profile_info.cpu_time = it.second.cpu_uSec;
         profile_info.real_time = it.second.realTime_uSec;
+        profile_info.execution_index = it.second.execution_index;
         perf_map[it.first] = profile_info;
     }
     return perf_map;
index 59e6320..08cb8cb 100644 (file)
@@ -49,7 +49,7 @@ struct IENetLayer {
 };
 
 struct InputInfo {
-    InferenceEngine::InputInfo actual;
+    InferenceEngine::InputInfo::Ptr actual;
     std::vector<size_t> dims;
     std::string precision;
     std::string layout;
@@ -85,7 +85,7 @@ struct IENetwork {
 
     void setBatch(const size_t size);
 
-    void addOutput(const std::string &out_layer, size_t port_id, const std::string &precision);
+    void addOutput(const std::string &out_layer, size_t port_id);
 
     const std::vector<std::pair<std::string, InferenceEnginePython::IENetLayer>> getLayers();
 
index 95db6d9..e838a26 100644 (file)
@@ -91,7 +91,7 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
         const vector[pair[string, IENetLayer]] getLayers() except +
         map[string, InputInfo] getInputs() except +
         map[string, OutputInfo] getOutputs() except +
-        void addOutput(string &, size_t, string &) except +
+        void addOutput(string &, size_t) except +
         void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except +
         void setBatch(size_t size) except +
         void setLayerParams(map[string, map[string, string]] params_map) except +
index a649bd2..8e4871d 100644 (file)
@@ -23,7 +23,13 @@ endif()
 cython_add_module (${TARGET_NAME} ${SOURCE})
 
 set_target_properties (${TARGET_NAME} PROPERTIES CXX_STANDARD 11 LINKER_LANGUAGE CXX)
-target_link_libraries (${TARGET_NAME} PRIVATE statistics_collector_s)
+target_link_libraries (${TARGET_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
+
+if(TARGET IE::statistics_collector_s)
+       target_link_libraries(${TARGET_NAME} PRIVATE IE::statistics_collector_s)
+else()
+       target_link_libraries(${TARGET_NAME} PRIVATE statistics_collector_s)
+endif()
 
 # perform copy
 ADD_CUSTOM_COMMAND (TARGET ${TARGET_NAME}
index 5746467..0df940f 100644 (file)
@@ -72,9 +72,6 @@ public:
      * @return reference to layer builder
      */
     ConcatLayer& setAxis(size_t axis);
-
-private:
-    size_t axis = 1;
 };
 
 }  // namespace Builder
index 9f4f5bb..cef99ef 100644 (file)
@@ -89,7 +89,7 @@ public:
     virtual OutputsDataMap getOutputsInfo() const {
         OutputsDataMap outputs;
         actual->getOutputsInfo(outputs);
-        return std::move(outputs);
+        return outputs;
     }
 
     /**
@@ -99,7 +99,7 @@ public:
     virtual InputsDataMap getInputsInfo() const {
         InputsDataMap inputs;
         actual->getInputsInfo(inputs);
-        return std::move(inputs);
+        return inputs;
     }
 
     /**
@@ -223,7 +223,7 @@ public:
                 }
             }
         }
-        return std::move(shapes);
+        return shapes;
     }
 
     /**
index 2eb235a..6a3e716 100644 (file)
@@ -30,17 +30,31 @@ class ExecutableNetwork {
     InferenceEnginePluginPtr plg;
 
 public:
+    /**
+     * @brief Default constructor
+     */
     ExecutableNetwork() = default;
+
+    /**
+     * @brief Destructor
+     */
     ~ExecutableNetwork() {
         actual = nullptr;
     }
 
+    /**
+     * @brief Constructs ExecutableNetwork from the initialized shared_pointer
+     * @param actual Initialized shared pointer
+     * @param plg Plugin to use
+     */
     explicit ExecutableNetwork(IExecutableNetwork::Ptr actual, InferenceEnginePluginPtr plg = {})
     : actual(actual), plg(plg) {}
 
     /**
-     * @brief Wraps original method
-     * IExecutableNetwork::getOutputsInfo
+     * @copybrief IExecutableNetwork::GetOutputsInfo
+     * 
+     * Wraps IExecutableNetwork::GetOutputsInfo.
+     * @return A collection that contains string as key, and const Data smart pointer as value
      */
     ConstOutputsDataMap GetOutputsInfo() const {
         ConstOutputsDataMap data;
@@ -49,8 +63,10 @@ public:
     }
 
     /**
-     * @brief Wraps original method
-     * IExecutableNetwork::getInputsInfo
+     * @copybrief IExecutableNetwork::GetInputsInfo
+     * 
+     * Wraps IExecutableNetwork::GetInputsInfo
+     * @return A collection that contains string as key, and const InputInfo smart pointer as value
      */
     ConstInputsDataMap GetInputsInfo() const {
         ConstInputsDataMap info;
@@ -59,16 +75,20 @@ public:
     }
 
     /**
-     * @brief reset owned object to new pointer, essential for cases when simultaneously loaded networks not expected
-     * @param actual actual pointed object
+     * @brief reset owned object to new pointer.
+     * 
+     * Eessential for cases when simultaneously loaded networks not expected.
+     * @param newActual actual pointed object
      */
     void reset(IExecutableNetwork::Ptr newActual) {
         this->actual.swap(newActual);
     }
 
     /**
-     * @brief Wraps original method
-     * IExecutableNetwork::CreateInferRequest
+     * @copybrief IExecutableNetwork::CreateInferRequest
+     * 
+     * Wraps IExecutableNetwork::CreateInferRequest.
+     * @return InferRequest object
      */
     InferRequest CreateInferRequest() {
         IInferRequest::Ptr req;
@@ -78,9 +98,10 @@ public:
     }
 
     /**
-     * @brief Wraps original method
-     * IExecutableNetwork::CreateInferRequestPtr
-     * @return shared pointer on InferRequest object
+     * @copybrief IExecutableNetwork::CreateInferRequest
+     * 
+     * Wraps IExecutableNetwork::CreateInferRequest.
+     * @return shared pointer on InferenceEngine::InferRequest object
      */
     InferRequest::Ptr CreateInferRequestPtr() {
         IInferRequest::Ptr req;
@@ -89,18 +110,24 @@ public:
     }
 
     /**
-    * @brief Exports the current executable network so it can be used later in the Import() main API
+    * @copybrief IExecutableNetwork::Export
+    * 
+    * Wraps IExecutableNetwork::Export.
+    * 
+    * @see Core::ImportNetwork
+    * @see InferencePlugin::ImportNetwork
+    * 
     * @param modelFileName Full path to the location of the exported file
-    * @param resp Optional: pointer to an already allocated object to contain information in case of failure
     */
     void Export(const std::string &modelFileName) {
         CALL_STATUS_FNC(Export, modelFileName);
     }
 
     /**
-    * @brief Gets the mapping of IR layer names to implemented kernels
+    * @copybrief IExecutableNetwork::GetMappedTopology
+    * 
+    * Wraps IExecutableNetwork::GetMappedTopology.
     * @param deployedTopology Map of PrimitiveInfo objects that represent the deployed topology
-    * @param resp Optional: pointer to an already allocated object to contain information in case of failure
     */
     void GetMappedTopology(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &deployedTopology) {
         CALL_STATUS_FNC(GetMappedTopology, deployedTopology);
@@ -115,7 +142,9 @@ public:
     }
 
     /**
-    * @brief Get executable graph information from a plugin represented as CNNNetwork
+    * @copybrief IExecutableNetwork::GetExecGraphInfo
+    * 
+    * Wraps IExecutableNetwork::GetExecGraphInfo.
     * @return CNNetwork containing Executable Graph Info
     */
     CNNNetwork GetExecGraphInfo() {
@@ -125,7 +154,10 @@ public:
     }
 
     /**
-     *@brief see original function InferenceEngine::IExecutableNetwork::QueryState
+     * @copybrief IExecutableNetwork::QueryState
+     * 
+     * Wraps IExecutableNetwork::QueryState
+     * @return A vector of Memory State objects
      */
     std::vector<MemoryState> QueryState() {
         IMemoryState::Ptr pState = nullptr;
@@ -146,20 +178,21 @@ public:
     }
 
     /**
-     * @brief Sets configuration for current executable network
+     * @copybrief IExecutableNetwork::SetConfig
+     * 
+     * Wraps IExecutableNetwork::SetConfig.
      * @param config Map of pairs: (config parameter name, config parameter value)
-     * @param resp Pointer to the response message that holds a description of an error if any occurred
      */
     void SetConfig(const std::map<std::string, Parameter> &config) {
         CALL_STATUS_FNC(SetConfig, config);
     }
 
-    /** @brief Gets configuration dedicated to plugin behaviour
-        * @param name - config key, can be found in ie_plugin_config.hpp
-        * @param options - configuration details for coonfig value
-        * @param result - value of config corresponding to config key
-        * @param resp Pointer to the response message that holds a description of an error if any occurred
-    */
+    /** @copybrief IExecutableNetwork::GetConfig
+     * 
+     * Wraps IExecutableNetwork::GetConfig
+     * @param name - config key, can be found in ie_plugin_config.hpp
+     * @return Configuration paramater value
+     */
     Parameter GetConfig(const std::string &name) const {
         Parameter configValue;
         CALL_STATUS_FNC(GetConfig, name, configValue);
@@ -167,13 +200,11 @@ public:
     }
 
     /**
-     * @brief Gets general runtime metric for dedicated hardware
+     * @copybrief IExecutableNetwork::GetMetric
+     * 
+     * Wraps IExecutableNetwork::GetMetric
      * @param name  - metric name to request
-     * @param options - configuration details for metric
-     * @param result - metric value corresponding to metric key
-     * @param resp - Pointer to the response message that holds a description of an error if any
-     *             occurred
-     * @return code of the operation. OK if succeeded
+     * @return Metric paramater value
      */
     Parameter GetMetric(const std::string &name) const {
         Parameter metricValue;
@@ -181,6 +212,9 @@ public:
         return metricValue;
     }
 
+    /**
+     * @brief A smart pointer to the ExecutableNetwork object
+     */
     using Ptr = std::shared_ptr<ExecutableNetwork>;
 };
 
index 5d1eeb4..9e42226 100644 (file)
@@ -69,8 +69,14 @@ class InferRequest {
     }
 
 public:
+    /**
+     * @brief Default constructor
+     */
     InferRequest() = default;
 
+    /**
+     * @brief Destructor
+     */
     ~InferRequest() {
         actual = nullptr;
     }
@@ -150,8 +156,9 @@ public:
     }
 
     /**
-     * constructs InferRequest from initialised shared_pointer
-     * @param actual
+     * constructs InferRequest from the initialized shared_pointer
+     * @param request Initialized shared pointer
+     * @param plg Plugin to use
      */
     explicit InferRequest(IInferRequest::Ptr request, InferenceEnginePluginPtr plg = {})
     : actual(request), plg(plg) {}
@@ -192,14 +199,25 @@ public:
         return actual;
     }
 
+    /**
+     * @brief Checks if current InferRequest object is not initialized
+     * @return true if current InferRequest object is not initialized, false - otherwise
+     */
     bool operator!() const noexcept {
         return !actual;
     }
 
+    /**
+     * @brief Checks if current InferRequest object is initialized
+     * @return true if current InferRequest object is initialized, false - otherwise
+     */
     explicit operator bool() const noexcept {
         return !!actual;
     }
 
+    /**
+     * @brief A smart pointer to the InferRequest object
+     */
     using Ptr = std::shared_ptr<InferRequest>;
 };
 
index d20fcae..d1867c8 100644 (file)
@@ -14,40 +14,47 @@ class MemoryState {
     IMemoryState::Ptr actual = nullptr;
 
  public:
+    /**
+     * constructs MemoryState from the initialized shared_pointer
+     * @param pState Initialized shared pointer
+     */
     explicit MemoryState(IMemoryState::Ptr pState) : actual(pState) {}
 
     /**
      * @brief Wraps original method
      * IMemoryState::Reset
      */
-     void Reset() {
+    void Reset() {
         CALL_STATUS_FNC_NO_ARGS(Reset);
-     }
+    }
+
     /**
      * @brief Wraps original method
      * IMemoryState::GetName
      */
-     std::string GetName() const {
-         char name[256];
-         CALL_STATUS_FNC(GetName, name, sizeof(name));
-         return name;
-     }
+    std::string GetName() const {
+        char name[256];
+        CALL_STATUS_FNC(GetName, name, sizeof(name));
+        return name;
+    }
+
     /**
      * @brief Wraps original method
      * IMemoryState::GetLastState
      */
-      Blob::CPtr GetLastState() const {
-         Blob::CPtr stateBlob;
-         CALL_STATUS_FNC(GetLastState, stateBlob);
-         return stateBlob;
-     }
+    Blob::CPtr GetLastState() const {
+        Blob::CPtr stateBlob;
+        CALL_STATUS_FNC(GetLastState, stateBlob);
+        return stateBlob;
+    }
+
     /**
      * @brief Wraps original method
      * IMemoryState::SetState
      */
-     void SetState(Blob::Ptr state) {
-         CALL_STATUS_FNC(SetState, state);
-     }
+    void SetState(Blob::Ptr state) {
+        CALL_STATUS_FNC(SetState, state);
+    }
 };
 
 }  // namespace InferenceEngine
\ No newline at end of file
index 8d5744a..093b16c 100644 (file)
@@ -34,6 +34,7 @@ public:
 
     /**
      * @brief Constructs a plugin instance from the given pointer.
+     * @param pointer Initialized Plugin pointer
      */
     explicit InferencePlugin(const InferenceEnginePluginPtr &pointer) : actual(pointer) {}
 
@@ -53,6 +54,7 @@ public:
     /**
      * @deprecated Use InferencePlugin::LoadNetwork(ICNNNetwork &, const std::map<std::string, std::string> &)
      * @brief Wraps original method IInferencePlugin::LoadNetwork(ICNNNetwork &, ResponseDesc *)
+     * @param network A network object to load
      */
     INFERENCE_ENGINE_DEPRECATED
     void LoadNetwork(ICNNNetwork &network) {
@@ -64,6 +66,9 @@ public:
     /**
      * @brief Wraps original method
      * IInferencePlugin::LoadNetwork(IExecutableNetwork::Ptr&, ICNNNetwork&, const std::map<std::string, std::string> &, ResponseDesc*).
+     * @param network A network object to load
+     * @param config A map of configuration options
+     * @return Created Executable Network object
      */
     ExecutableNetwork LoadNetwork(ICNNNetwork &network, const std::map<std::string, std::string> &config) {
         IExecutableNetwork::Ptr ret;
@@ -74,6 +79,9 @@ public:
     /**
      * @brief Wraps original method
      * IInferencePlugin::LoadNetwork(IExecutableNetwork::Ptr&, ICNNNetwork&, const std::map<std::string, std::string> &, ResponseDesc*).
+     * @param network A network object to load
+     * @param config A map of configuration options
+     * @return Created Executable Network object
      */
     ExecutableNetwork LoadNetwork(CNNNetwork network, const std::map<std::string, std::string> &config) {
         IExecutableNetwork::Ptr ret;
@@ -85,6 +93,8 @@ public:
     /**
      * @deprecated Use IExecutableNetwork to create IInferRequest.
      * @brief Wraps original method IInferencePlugin::Infer(const BlobMap&, BlobMap&, ResponseDesc *)
+     * @param input A map of input blobs accessed by input names
+     * @param result A map of output blobs accessed by output names
      */
     INFERENCE_ENGINE_DEPRECATED
     void Infer(const BlobMap &input, BlobMap &result) {
@@ -96,6 +106,7 @@ public:
     /**
      * @deprecated Use IInferRequest to get performance counters
      * @brief Wraps original method IInferencePlugin::GetPerformanceCounts
+     * @return Map of layers names to profiling information for that layers
      */
     INFERENCE_ENGINE_DEPRECATED
     std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const {
@@ -109,6 +120,7 @@ public:
     /**
      * @brief Wraps original method
      * IInferencePlugin::AddExtension
+     * @param extension Pointer to loaded Extension
      */
     void AddExtension(InferenceEngine::IExtensionPtr extension) {
         CALL_STATUS_FNC(AddExtension, extension);
@@ -117,6 +129,7 @@ public:
     /**
      * @brief Wraps original method
      * IInferencePlugin::SetConfig
+     * @param config A configuration map
      */
     void SetConfig(const std::map<std::string, std::string> &config) {
         CALL_STATUS_FNC(SetConfig, config);
@@ -125,7 +138,10 @@ public:
     /**
      * @brief Wraps original method
      * IInferencePlugin::ImportNetwork
-    */
+     * @param modelFileName A path to the imported network
+     * @param config A configuration map
+     * @return Created Executable Network object
+     */
     ExecutableNetwork ImportNetwork(const std::string &modelFileName, const std::map<std::string, std::string> &config) {
         IExecutableNetwork::Ptr ret;
         CALL_STATUS_FNC(ImportNetwork, ret, modelFileName, config);
@@ -136,6 +152,8 @@ public:
      * @deprecated Use InferencePlugin::QueryNetwork(const ICNNNetwork &, const std::map<std::string, std::string> &, QueryNetworkResult &) const
      * @brief Wraps original method
      * IInferencePlugin::QueryNetwork(const ICNNNetwork&, QueryNetworkResult& ) const
+     * @param network A network object to query
+     * @param res Query results
      */
     INFERENCE_ENGINE_DEPRECATED
     void QueryNetwork(const ICNNNetwork &network, QueryNetworkResult &res) const {
@@ -145,6 +163,9 @@ public:
     /**
      * @brief Wraps original method
      * IInferencePlugin::QueryNetwork(const ICNNNetwork&, const std::map<std::string, std::string> &, QueryNetworkResult&) const
+     * @param network A network object to query
+     * @param config A configuration map
+     * @param res Query results
      */
     void QueryNetwork(const ICNNNetwork &network, const std::map<std::string, std::string> &config, QueryNetworkResult &res) const {
         actual->QueryNetwork(network, config, res);
@@ -153,7 +174,7 @@ public:
 
     /**
      * @brief Converts InferenceEngine to InferenceEnginePluginPtr pointer
-     * @brief Returns wrapped object
+     * @return Wrapped object
      */
     operator InferenceEngine::InferenceEnginePluginPtr() {
         return actual;
@@ -162,7 +183,7 @@ public:
     /**
      * @deprecated Deprecated since HeteroPluginPtr is deprecated
      * @brief Converts InferenceEngine to HeteroPluginPtr pointer
-     * @return wrapped Hetero object if underlined object is HeteroPlugin instance, nullptr otherwise
+     * @return Wrapped Hetero object if underlined object is HeteroPlugin instance, nullptr otherwise
      */
     IE_SUPPRESS_DEPRECATED_START
     operator InferenceEngine::HeteroPluginPtr() {
index 70b5050..e7be114 100644 (file)
@@ -71,8 +71,8 @@ public:
 private:
     std::vector<std::shared_ptr<LT>> sortedLayers;
     std::shared_ptr<LT> currentLayer;
-    size_t currentIdx;
     NT *network = nullptr;
+    size_t currentIdx;
 
     std::shared_ptr<LT> getNextLayer() {
         return (sortedLayers.size() > currentIdx) ? sortedLayers[currentIdx++] : nullptr;
index d4801ba..850f905 100644 (file)
@@ -49,7 +49,7 @@ class PreAllocator : public IAllocator {
             return _actualData;
         }
 
-        return this;
+        return nullptr;
     }
     /**
      * @brief The PreAllocator class cannot release the handle
@@ -83,4 +83,4 @@ std::shared_ptr<IAllocator>  make_pre_allocator(T *ptr, size_t size) {
 }
 
 }  // namespace details
-}  // namespace InferenceEngine
\ No newline at end of file
+}  // namespace InferenceEngine
diff --git a/inference-engine/include/dlia/dlia_config.hpp b/inference-engine/include/dlia/dlia_config.hpp
new file mode 100644 (file)
index 0000000..1adca7e
--- /dev/null
@@ -0,0 +1,81 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header that defines advanced related properties for DLIA plugins.
+ * These properties should be used in SetConfig() and LoadNetwork() methods of plugins
+ *
+ * @file dlia_config.hpp
+ */
+
+#pragma once
+
+#include <string>
+#include "ie_plugin_config.hpp"
+
+namespace InferenceEngine {
+
+namespace DliaMetrics {
+
+/**
+ * @def DLIA_METRIC_VALUE(name)
+ * @brief Shortcut for defining FPGA metric values
+ */
+#define DLIA_METRIC_VALUE(name) InferenceEngine::DliaMetrics::name
+#define DECLARE_DLIA_METRIC_VALUE(name) static constexpr auto name = #name
+
+/**
+ * @brief FP11 optimization capability. It's specific for FPGA device which can perform computations in FP11 data type.
+ */
+DECLARE_DLIA_METRIC_VALUE(FP11);
+
+/**
+ * @brief Input Streaming capability. It's specific for FPGA device which can perform input streaming
+ */
+DECLARE_DLIA_METRIC_VALUE(INPUT_STREAMING);
+
+}  // namespace DliaMetrics
+
+namespace DLIAConfigParams {
+
+/**
+ * @def DLIA_CONFIG_KEY(name)
+ * @brief Shortcut for defining FPGA configuration keys
+ */
+#define DLIA_CONFIG_KEY(name) InferenceEngine::DLIAConfigParams::_CONFIG_KEY(DLIA_##name)
+
+#define DECLARE_DLIA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(DLIA_##name)
+#define DECLARE_DLIA_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(DLIA_##name)
+
+/**
+ * @brief The key to define the type of transformations for DLIA inputs and outputs.
+ * DLIA use custom data layout for input and output blobs. IE DLIA Plugin provides custom
+ * optimized version of transformation functions that do not use OpenMP and much more faster
+ * than native DLIA functions. Values: "NO" - optimized plugin transformations
+ * are used, "YES" - native DLIA transformations are used.
+ */
+DECLARE_DLIA_CONFIG_KEY(IO_TRANSFORMATIONS_NATIVE);
+
+/**
+ * @brief The key to define path to DLA bitstreams architectures folder
+ */
+DECLARE_DLIA_CONFIG_KEY(ARCH_ROOT_DIR);
+
+/**
+ * @brief The bool key to define whether theoretical performance estimation should be performed.
+ * If true, the estimated performance is dumped via performance counters as "FPGA theoretical execute time"
+ */
+DECLARE_DLIA_CONFIG_KEY(PERF_ESTIMATION);
+
+// TODO: Temporarily adding dlia config to test streaming feature
+// Values - "YES" or "NO"
+DECLARE_DLIA_CONFIG_KEY(ENABLE_STREAMING);
+
+/**
+ * @brief The bool key to define whether information messages with a reason are printed in case the layer is unsupported by DLA
+ */
+DECLARE_DLIA_CONFIG_KEY(DUMP_SUPPORTED_LAYERS_INFORMATION);
+
+}  // namespace DLIAConfigParams
+}  // namespace InferenceEngine
index fb17d07..2eb3621 100644 (file)
@@ -20,6 +20,10 @@ namespace InferenceEngine {
 
 namespace HeteroConfigParams {
 
+/**
+ * @def HETERO_CONFIG_KEY(name)
+ * @brief Shortcut for defining HETERO configuration keys
+ */
 #define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name)
 #define DECLARE_HETERO_CONFIG_KEY(name) DECLARE_CONFIG_KEY(HETERO_##name)
 #define DECLARE_HETERO_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(HETERO_##name)
index 08b6838..af2e0d1 100644 (file)
@@ -28,6 +28,8 @@ class IAllocator  : public details::IRelease {
 public:
     /**
      * @brief Maps handle to heap memory accessible by any memory manipulation routines.
+     * @param handle Handle to the allocated memory to be locked
+     * @param LockOp Operation to lock memory for
      * @return Generic pointer to memory
      */
     virtual void * lock(void * handle, LockOp = LOCK_FOR_WRITE)  noexcept = 0;
@@ -35,6 +37,7 @@ public:
      * @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
      * The multiple sequential mappings of the same handle are suppose to get the same
      * result while there isn't a ref counter supported.
+     * @param handle Handle to the locked memory to unlock
      */
     virtual void  unlock(void * handle) noexcept = 0;
     /**
index 2394c0c..e9132f1 100644 (file)
     #define IE_DO_PRAGMA(x)
 #endif
 
-#ifdef _MSC_VER
+#if defined (_MSC_VER) && !defined (__clang__)
 #define IE_SUPPRESS_DEPRECATED_START \
     IE_DO_PRAGMA(warning(push)) \
     IE_DO_PRAGMA(warning(disable: 4996))
 #define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
+#elif defined(__INTEL_COMPILER)
+#define IE_SUPPRESS_DEPRECATED_START \
+    IE_DO_PRAGMA(warning(push)) \
+    IE_DO_PRAGMA(warning(disable:1478))
+#define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
 #elif defined(__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
 #define IE_SUPPRESS_DEPRECATED_START \
     IE_DO_PRAGMA(GCC diagnostic push) \
 #define IE_SUPPRESS_DEPRECATED_START
 #define IE_SUPPRESS_DEPRECATED_END
 #endif
+
+#ifndef ENABLE_UNICODE_PATH_SUPPORT
+    #if defined(_WIN32)
+        #define ENABLE_UNICODE_PATH_SUPPORT
+    #elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2))
+        #define ENABLE_UNICODE_PATH_SUPPORT
+    #endif
+#endif
index d2628ad..12d341d 100644 (file)
@@ -678,7 +678,7 @@ public:
         if (_handle != nullptr) {
             getAllocator()->free(_handle);
         }
-        _handle = getAllocator()->alloc(byteSize());
+        _handle = getAllocator()->alloc(size() * sizeof(T));
     }
 
     /**
@@ -779,10 +779,7 @@ protected:
      * @brief Frees handler and cleans up the stored data.
      */
     virtual bool free() {
-        bool bCanRelease = true;
-        if (_handle == nullptr) return bCanRelease;
-
-        bCanRelease = getAllocator()->free(_handle);
+        bool bCanRelease = getAllocator()->free(_handle);
         _handle = nullptr;
         return bCanRelease;
     }
index 9e7241d..581b877 100644 (file)
@@ -109,6 +109,8 @@ inline std::ostream & operator << (std::ostream &out, const Layout & p) {
             PRINT_LAYOUT(ANY);
             PRINT_LAYOUT(NCHW);
             PRINT_LAYOUT(NHWC);
+            PRINT_LAYOUT(NCDHW);
+            PRINT_LAYOUT(NDHWC);
             PRINT_LAYOUT(OIHW);
             PRINT_LAYOUT(C);
             PRINT_LAYOUT(CHW);
@@ -125,7 +127,7 @@ inline std::ostream & operator << (std::ostream &out, const Layout & p) {
     }
 
 /**
- * @enum Color format
+ * @enum ColorFormat
  * @brief Extra information about input color format for preprocessing
  */
 enum ColorFormat : uint32_t {
index b473572..d0d2660 100644 (file)
@@ -35,7 +35,7 @@ public:
 
     /**
      * @brief Returns plugins version information
-     * @param Device name to indentify plugin
+     * @param deviceName Device name to indentify plugin
      * @return A vector of versions
      */
     std::map<std::string, Version> GetVersions(const std::string & deviceName) const;
@@ -133,7 +133,8 @@ public:
 
     /** @brief Registers plugin to Inference Engine Core instance using XML configuration file with
      * plugins description. XML file has the following structure:
-     *
+     * 
+     * ```xml
      * <ie>
      *     <plugins>
      *         <plugin name="" location="">
@@ -144,14 +145,16 @@ public:
      *                 <property key="" value=""/>
      *             </properties>
      *         </plugin>
-     *     </plugin>
+     *     </plugins>
      * </ie>
-     *
+     * ```
+     * 
      * - `name` identifies name of device enabled by plugin
      * - `location` specifies absolute path to dynamic library with plugin. A path can also be relative to inference engine shared library.
      *   It allows to have common config for different systems with different configurations.
      * - Properties are set to plugin via the `SetConfig` method.
      * - Extensions are set to plugin via the `AddExtension` method.
+     * @param xmlConfigFile A path to .xml file with plugins to register.
      */
     void RegisterPlugins(const std::string & xmlConfigFile);
 };
index 75e906f..d43824d 100644 (file)
@@ -74,6 +74,7 @@ public:
      * @brief An empty constructor (dimensionless)
      * @param name Name of the data node
      * @param _precision Precision of the data
+     * @param layout Data layout
      */
     Data(const std::string &name, Precision _precision, Layout layout = NCHW);
 
@@ -82,6 +83,7 @@ public:
      * @param name Name of the data node
      * @param a_dims Data tensor dimensions
      * @param _precision Precision of the data
+     * @param layout Data layout
      */
     Data(const std::string &name, const SizeVector &a_dims, Precision _precision, Layout layout = NCHW);
     /**
@@ -183,7 +185,7 @@ public:
 
     /**
      * @brief Sets a name the Data object
-     * @param name Name of the data node
+     * @param newName Name of the data node
      */
 
     void setName(const std::string& newName);
index c166021..fef50c2 100644 (file)
@@ -28,8 +28,10 @@ enum class TargetDevice : uint8_t {
     eGPU = 3,
     eFPGA = 4,
     eMYRIAD = 5,
+    eHDDL = 6,
     eGNA = 7,
     eHETERO = 8,
+    eMULTI = 10,
 };
 
 /**
@@ -53,8 +55,10 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
             DECL_DEVICE(GPU),
             DECL_DEVICE(FPGA),
             DECL_DEVICE(MYRIAD),
+            DECL_DEVICE(HDDL),
             DECL_DEVICE(GNA),
-            DECL_DEVICE(HETERO)
+            DECL_DEVICE(HETERO),
+            DECL_DEVICE(MULTI)
         };
 #undef DECLARE
         return g_allDeviceInfos;
@@ -64,6 +68,8 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
     /**
      * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated
      * @brief Converts string representation of device to InferenceEngine::TargetDevice enum value
+     * @param deviceName A string representation of a device name
+     * @return An instance of InferenceEngine::TargetDevice
      */
     INFERENCE_ENGINE_DEPRECATED
     static TargetDevice fromStr(const std::string &deviceName) {
@@ -72,9 +78,11 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
             { "GPU", InferenceEngine::TargetDevice::eGPU },
             { "FPGA", InferenceEngine::TargetDevice::eFPGA },
             { "MYRIAD", InferenceEngine::TargetDevice::eMYRIAD },
+            { "HDDL", InferenceEngine::TargetDevice::eHDDL },
             { "GNA", InferenceEngine::TargetDevice::eGNA },
             { "BALANCED", InferenceEngine::TargetDevice::eBalanced },
-            { "HETERO", InferenceEngine::TargetDevice::eHETERO }
+            { "HETERO", InferenceEngine::TargetDevice::eHETERO },
+            { "MULTI", InferenceEngine::TargetDevice::eMULTI}
         };
         auto val = deviceFromNameMap.find(deviceName);
         return val != deviceFromNameMap.end() ? val->second : InferenceEngine::TargetDevice::eDefault;
@@ -82,7 +90,9 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
 
     /**
      * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated
-     * @brief Converts InferenceEngine::TargetDevice enum value to string representation
+     * @brief Converts an instance of InferenceEngine::TargetDevice to string representation
+     * @param device Instance of InferenceEngine::TargetDevice
+     * @return A c-string with the name
      */
     INFERENCE_ENGINE_DEPRECATED
     static const char * name(TargetDevice device) {
index 2547fb6..fb4847d 100644 (file)
@@ -4,6 +4,7 @@
 
 /**
  * @brief This is a header file for the ICNNNetworkStats class
+ *
  * @file ie_icnn_network_stats.hpp
  */
 #pragma once
 namespace InferenceEngine {
 
 class NetworkNodeStats;
-
+/**
+ * @brief A shared pointer to the NetworkNodeStats object
+ */
 using NetworkNodeStatsPtr = std::shared_ptr<NetworkNodeStats>;
+/**
+ * @brief A smart pointer to the NetworkNodeStats object
+ */
 using NetworkNodeStatsWeakPtr = std::weak_ptr<NetworkNodeStats>;
+/**
+ * @brief A map of pairs: name of a layer and related statistics
+ */
 using NetworkStatsMap = std::map<std::string, NetworkNodeStatsPtr>;
 /**
  * @class ICNNNetworkStats
@@ -28,16 +37,44 @@ using NetworkStatsMap = std::map<std::string, NetworkNodeStatsPtr>;
  */
 class ICNNNetworkStats : public details::IRelease {
 public:
+    /**
+     * @brief Sets a map which contains layers with statistics
+     *
+     * @param stats A map which is set
+     * Abstract method
+     */
     virtual void setNodesStats(const NetworkStatsMap& stats) = 0;
+    /**
+     * @brief Gets a map which contains layers with statistics
+     *
+     * Abstract method
+     * @return A NetworkStatsMap object
+     */
     virtual const NetworkStatsMap& getNodesStats() const = 0;
-
+    /**
+     * @brief Checks if a container is empty
+     *
+     * Abstract method
+     * @return A bool value which shows whether a container is empty
+     */
     virtual bool isEmpty() const = 0;
 };
 
-
+/**
+ * @class NetworkNodeStats
+ * @brief This class implements a container which stores statistics for a layer
+ */
 class NetworkNodeStats {
 public:
+    /**
+     * @brief The constructor which creates NetworkNodeStats object
+     */
     NetworkNodeStats() { }
+    /**
+     * @brief The constructor which creates NetworkNodeStats object with filled statistics
+     *
+     * @param statCount The number of minimum/maximum values in statistics
+     */
     explicit NetworkNodeStats(int statCount) {
         float mn = (std::numeric_limits<float>::max)();
         float mx = (std::numeric_limits<float>::min)();
@@ -49,7 +86,13 @@ public:
     }
 
 public:
+    /**
+     * @brief Vector of floats which contains minimum values of layers activations
+     */
     std::vector<float> _minOutputs;
+    /**
+     * @brief Vector of floats which contains maximum values of layers activations
+     */
     std::vector<float> _maxOutputs;
 };
 
index 7dafa40..a644038 100644 (file)
@@ -38,39 +38,51 @@ public:
     using Ptr = std::shared_ptr<IExecutableNetwork>;
 
     /**
-     * @brief Gets the Executable network output Data node information. The received info is stored in the given ConstOutputsDataMap node.
+     * @brief Gets the Executable network output Data node information.
+     * 
+     * The received info is stored in the given ::ConstOutputsDataMap node.
      * This method need to be called to find output names for using them later during filling of a map
      * of blobs passed to InferenceEngine::IInferencePlugin::Infer()
-     * @param out Reference to the ConstOutputsDataMap object
+     * 
+     * @param out Reference to the ::ConstOutputsDataMap object
      * @param resp Optional: pointer to an already allocated object to contain information in case of failure
-     * @return Status code of the operation: OK (0) for success
+     * @return Status code of the operation: InferenceEngine::OK (0) for success
      */
     virtual StatusCode GetOutputsInfo(ConstOutputsDataMap &out, ResponseDesc *resp) const noexcept  = 0;
 
     /**
-     * @brief Gets the Executable network input Data node information. The received info is stored in the given ConstInputsDataMap object.
+     * @brief Gets the executable network input Data node information.
+     * 
+     * The received info is stored in the given ::ConstInputsDataMap object.
      * This method need to be called to find out input names for using them later during filling of a map
      * of blobs passed to InferenceEngine::IInferencePlugin::Infer()
-     * @param inputs Reference to ConstInputsDataMap object.
+     * 
+     * @param inputs Reference to ::ConstInputsDataMap object.
      * @param resp Optional: pointer to an already allocated object to contain information in case of failure
-     * @return Status code of the operation: OK (0) for success
+     * @return Status code of the operation: InferenceEngine::OK (0) for success
      */
     virtual StatusCode GetInputsInfo(ConstInputsDataMap &inputs, ResponseDesc *resp) const noexcept  = 0;
 
     /**
     * @brief Creates an inference request object used to infer the network.
+    * 
     * The created request has allocated input and output blobs (that can be changed later).
+    * 
     * @param req Shared pointer to the created request object
     * @param resp Optional: pointer to an already allocated object to contain information in case of failure
-    * @return Status code of the operation: OK (0) for success
+    * @return Status code of the operation: InferenceEngine::OK (0) for success
     */
     virtual StatusCode CreateInferRequest(IInferRequest::Ptr& req, ResponseDesc *resp) noexcept = 0;
 
     /**
-    * @brief Exports the current executable network so it can be used later in the Import() main API
+    * @brief Exports the current executable network.
+    * 
+    * @see Core::ImportNetwork
+    * @see IInferencePlugin::ImportNetwork
+    * 
     * @param modelFileName Full path to the location of the exported file
     * @param resp Optional: pointer to an already allocated object to contain information in case of failure
-    * @return Status code of the operation: OK (0) for success
+    * @return Status code of the operation: InferenceEngine::OK (0) for success
     */
     virtual StatusCode Export(const std::string& modelFileName, ResponseDesc *resp) noexcept = 0;
 
@@ -78,53 +90,64 @@ public:
     * @brief Get the mapping of IR layer names to implemented kernels
     * @param deployedTopology Map of PrimitiveInfo objects that represent the deployed topology
     * @param resp Optional: pointer to an already allocated object to contain information in case of failure
-    * @return Status code of the operation: OK (0) for success
+    * @return Status code of the operation: InferenceEngine::OK (0) for success
     */
     virtual StatusCode GetMappedTopology(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &deployedTopology, ResponseDesc *resp) noexcept = 0;
 
     /**
     * @brief Get executable graph information from a device
+    * 
     * @param graphPtr network ptr to store executable graph information
     * @param resp Optional: pointer to an already allocated object to contain information in case of failure
-    * @return Status code of the operation: OK (0) for success
+    * @return Status code of the operation: InferenceEngine::OK (0) for success
     */
     virtual StatusCode GetExecGraphInfo(ICNNNetwork::Ptr &graphPtr, ResponseDesc *resp) noexcept = 0;
 
     /**
-     * @brief Gets state control interface for given executable network, State control essential for recurrent networks
+     * @brief Gets state control interface for given executable network.
+     * 
+     * State control essential for recurrent networks
+     * 
      * @param pState reference to a pointer that receives internal states
      * @param idx requested index for receiving memory state
      * @param resp Optional: pointer to an already allocated object to contain information in case of failure
-     * @return Status code of the operation: OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for given index
+     * @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for given index
      */
     virtual StatusCode QueryState(IMemoryState::Ptr & pState, size_t  idx, ResponseDesc *resp) noexcept = 0;
 
     /**
      * @brief Sets configuration for current executable network
+     * 
      * @param config Map of pairs: (config parameter name, config parameter value)
      * @param resp Pointer to the response message that holds a description of an error if any occurred
-     * @return code of the operation. OK if succeeded
+     * @return code of the operation. InferenceEngine::OK if succeeded
      */
     virtual StatusCode SetConfig(const std::map<std::string, Parameter> &config, ResponseDesc *resp) noexcept = 0;
 
-    /** @brief Gets configuration for current executable network. The method is responsible to extract information
-      * which affects executable network execution. The list of supported configuration values can be extracted via
-      * ExecutableNetwork::GetMetric with the SUPPORTED_CONFIG_KEYS key, but some of these keys cannot be changed dymanically, 
-      * e.g. DEVICE_ID cannot changed if an executable network has already been compiled for particular device.
-      * @param name - config key, can be found in ie_plugin_config.hpp
-      * @param result - value of config corresponding to config key
-      * @param resp - Pointer to the response message that holds a description of an error if any occurred
-      * @return code of the operation. OK if succeeded
-      */
+    /** @brief Gets configuration for current executable network.
+     * 
+     * The method is responsible to extract information
+     * which affects executable network execution. The list of supported configuration values can be extracted via
+     * ExecutableNetwork::GetMetric with the SUPPORTED_CONFIG_KEYS key, but some of these keys cannot be changed dymanically, 
+     * e.g. DEVICE_ID cannot changed if an executable network has already been compiled for particular device.
+     * 
+     * @param name config key, can be found in ie_plugin_config.hpp
+     * @param result value of config corresponding to config key
+     * @param resp Pointer to the response message that holds a description of an error if any occurred
+     * @return code of the operation. InferenceEngine::OK if succeeded
+     */
     virtual StatusCode GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const noexcept = 0;
 
     /**
-     * @brief Gets general runtime metric for an executable network. It can be network name, actual device ID on
+     * @brief Gets general runtime metric for an executable network.
+     * 
+     * It can be network name, actual device ID on
      * which executable network is running or all other properties which cannot be changed dynamically.
-     * @param name  - metric name to request
-     * @param result - metric value corresponding to metric key
-     * @param resp - Pointer to the response message that holds a description of an error if any occurred
-     * @return code of the operation. OK if succeeded
+     * 
+     * @param name metric name to request
+     * @param result metric value corresponding to metric key
+     * @param resp Pointer to the response message that holds a description of an error if any occurred
+     * @return code of the operation. InferenceEngine::OK if succeeded
      */
     virtual StatusCode GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const noexcept = 0;
 };
index e1510d9..22c9633 100644 (file)
 
 #include "details/ie_no_copy.hpp"
 
+/**
+ * @def INFERENCE_EXTENSION_API(TYPE)
+ * @brief Defines Inference Engine Extension API method
+ */
 
 #if defined(_WIN32) && defined(IMPLEMENT_INFERENCE_EXTENSION_API)
-#define INFERENCE_EXTENSION_API(TYPE) extern "C"  __declspec(dllexport) TYPE
+# define INFERENCE_EXTENSION_API(TYPE) extern "C"  __declspec(dllexport) TYPE
 #else
-#define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
+# define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
 #endif
 
 
index c1e3fae..6ef3506 100644 (file)
@@ -23,8 +23,10 @@ namespace InferenceEngine {
  * plugin during setting of affinity and loading of split sub-network to the plugins
  * The custom loader can define addition settings for the plugins or network loading
  * Examples of cases when this interface should be implemented in the application:
+ * 
  * 1. add custom layers to existing plugins if it is not pointed to the heterogeneous plugin
  *  or registration of custom layer is different than supported in available public plugins
+ * 
  * 2. set affinity manually for the same plugin being initialized by different parameters,
  *  e.g different device id
  *  In this case there will be mapping of
@@ -89,18 +91,29 @@ public:
     INFERENCE_ENGINE_DEPRECATED
     virtual void QueryNetwork(const std::string &device,
                               const ICNNNetwork &network,
-                              const std::map<std::string, std::string>& /*config*/,
+                              const std::map<std::string, std::string>& config,
                               QueryNetworkResult &res) noexcept = 0;
 
-    INFERENCE_ENGINE_DEPRECATED
+
+    /**
+     * @deprecated Use InferenceEngine::Core with HETERO device in InferenceEngine::Core::QueryNetwork.
+     * @brief Sets log callback
+     * @param listener A reference to IErrorListener object
+     */
     virtual void SetLogCallback(IErrorListener &listener) = 0;
 
     IE_SUPPRESS_DEPRECATED_START
+    /**
+     * @brief Shared pointer to IHeteroDeviceLoader instance
+     */
     using Ptr = std::shared_ptr<IHeteroDeviceLoader>;
     IE_SUPPRESS_DEPRECATED_END
 };
 
 IE_SUPPRESS_DEPRECATED_START
+/**
+ * @brief Represents map from device name to device-specific loader
+ */
 using MapDeviceLoaders = std::map<std::string, InferenceEngine::IHeteroDeviceLoader::Ptr>;
 IE_SUPPRESS_DEPRECATED_END
 
index d922f5b..e8dc9ee 100644 (file)
@@ -33,12 +33,18 @@ public:
         /** IInferRequest doesn't block or interrupt current thread and immediately returns inference status */
         STATUS_ONLY = 0,
     };
-
+    /**
+     * @brief A shared pointer to the IInferRequest object
+     */
     using Ptr = std::shared_ptr<IInferRequest>;
+    /**
+     * @brief A smart pointer to the IInferRequest object
+     */
     using WeakPtr = std::weak_ptr<IInferRequest>;
 
     /**
      * @brief Sets input/output data to infer
+     *
      * @note: Memory allocation does not happen
      * @param name Name of input or output blob.
      * @param data Reference to input or output blob. The type of a blob must match the network input precision and size.
@@ -49,6 +55,7 @@ public:
 
     /**
      * @brief Gets input/output data for inference
+     *
      * @note: Memory allocation does not happen
      * @param name Name of input or output blob.
      * @param data Reference to input or output blob. The type of Blob must match the network input precision and size.
@@ -59,6 +66,7 @@ public:
 
     /**
      * @brief Infers specified input(s) in synchronous mode
+     *
      * @note blocks all methods of IInferRequest while request is ongoing (running or waiting in queue)
      * @param resp Optional: pointer to an already allocated object to contain information in case of failure
      * @return Status code of the operation: OK (0) for success
@@ -67,6 +75,7 @@ public:
 
     /**
      * @brief Queries performance measures per layer to get feedback of what is the most time consuming layer
+     *
      * @note: not all plugins provide meaningful data
      * @param perfMap Map of layer names to profiling information for that layer
      * @param resp Optional: pointer to an already allocated object to contain information in case of failure
@@ -77,6 +86,7 @@ public:
 
     /**
      * @brief Waits for the result to become available. Blocks until specified millis_timeout has elapsed or the result becomes available, whichever comes first.
+     *
      * @param millis_timeout Maximum duration in milliseconds to block for
      * @note There are special cases when millis_timeout is equal some value of the WaitMode enum:
      * * STATUS_ONLY - immediately returns inference status (IInferRequest::RequestStatus). It does not block or interrupt current thread
@@ -88,6 +98,7 @@ public:
 
     /**
      * @brief Starts inference of specified input(s) in asynchronous mode
+     *
      * @note: It returns immediately. Inference starts also immediately
      * @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
      * @return Enumeration of the resulted action: OK (0) for success
@@ -96,6 +107,7 @@ public:
 
     /**
      * @brief Completion callback definition as pointer to a function
+     *
      * @param context Pointer to request for providing context inside callback
      * @param code Completion result status: OK (0) for success
      */
@@ -104,6 +116,7 @@ public:
 
     /**
      * @brief Sets a callback function that will be called on success or failure of asynchronous request
+     *
      * @param callback A function to be called
      * @return Enumeration of the resulted action: OK (0) for success
      */
@@ -111,6 +124,7 @@ public:
 
     /**
      * @brief Gets arbitrary data for the request and stores a pointer to a pointer to the obtained data
+     *
      * @param data Pointer to a pointer to the gotten arbitrary data
      * @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
      * @return Enumeration of the resulted action: OK (0) for success
@@ -119,6 +133,7 @@ public:
 
     /**
      * @brief Sets arbitrary data for the request
+     *
      * @param data Pointer to a pointer to arbitrary data to set
      * @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
      * @return Enumeration of the resulted action: OK (0) for success
@@ -127,6 +142,7 @@ public:
 
     /**
     * @brief Sets new batch size when dynamic batching is enabled in executable network that created this request.
+     *
     * @param batch_size new batch size to be used by all the following inference calls for this request.
     * @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
     * @return Enumeration of the resulted action: OK (0) for success
index 66f43fa..25941b7 100644 (file)
@@ -139,6 +139,16 @@ public:
             return res;
         }
     }
+    /**
+      * @brief serialize float with c_locale formating
+      * used for default values serializing
+      */
+    static std::string ie_serialize_float(float value) {
+        std::stringstream val_stream;
+        val_stream.imbue(std::locale("C"));
+        val_stream << value;
+        return val_stream.str();
+    }
 
     /**
      * @brief Gets float value for the given parameter
@@ -147,7 +157,7 @@ public:
      * @return float value
      */
     float GetParamAsFloat(const char* param, float def) const {
-        std::string val = GetParamAsString(param, std::to_string(def).c_str());
+        std::string val = GetParamAsString(param, ie_serialize_float(def).c_str());
         try {
             return ie_parse_float(val);
         } catch (...) {
@@ -391,11 +401,11 @@ public:
         return result;
     }
     /**
-     * @brief Returns an boolean value for the given parameter.
+     * @brief Returns a boolean value for the given parameter.
      * The valid values are (true, false, 1, 0).
      * @param param Name of the layer parameter
      * @param def Default value of the parameter if not found
-     * @return An bool value for the specified parameter
+     * @return A bool value for the specified parameter
      */
     bool GetParamAsBool(const char *param, bool def) const {
         std::string val = GetParamAsString(param, std::to_string(def).c_str());
@@ -414,7 +424,29 @@ public:
         return result;
     }
     /**
-     * @deprecated Use CNNLayer::GetParamAsBool
+     * @brief Returns a boolean value for the given parameter
+     * @param param Name of the layer parameter
+     * @return A bool value for the specified parameter
+     */
+    bool GetParamAsBool(const char *param) const {
+        std::string val = GetParamAsString(param);
+        std::string loweredCaseValue;
+        std::transform(val.begin(), val.end(), std::back_inserter(loweredCaseValue), [](char value) {
+            return std::tolower(value);
+        });
+
+        bool result = false;
+
+        if (!(std::istringstream(loweredCaseValue) >> std::boolalpha >> result)) {
+            // attempting parse using non alpha bool
+            return (GetParamAsInt(param) != 0);
+        }
+
+        return result;
+    }
+
+    /**
+     * @deprecated Use GetParamAsBool function for that functionality
      */
     INFERENCE_ENGINE_DEPRECATED
     bool GetParamsAsBool(const char *param, bool def) const {
@@ -589,10 +621,6 @@ public:
         return *this;
     }
     /**
-     * @brief move assignment operator
-     */
-    ConvolutionLayer& operator = (ConvolutionLayer &&) = default;
-    /**
      * @brief copy constructor
      */
     ConvolutionLayer(const ConvolutionLayer & that) : WeightableLayer(that) {
@@ -697,11 +725,6 @@ public:
         return *this;
     }
     /**
-     * @brief move assignment operator
-     */
-    PoolingLayer& operator = (PoolingLayer &&) = default;
-
-    /**
      * @brief copy constructor
      */
     PoolingLayer(const PoolingLayer & that) : CNNLayer(that) {
@@ -800,10 +823,6 @@ public:
         return *this;
     }
     /**
-     * @brief move assignment operator
-     */
-    BinaryConvolutionLayer& operator = (BinaryConvolutionLayer &&) = default;
-    /**
      * @brief copy constructor
      */
     BinaryConvolutionLayer(const BinaryConvolutionLayer & that) : WeightableLayer(that) {
@@ -1020,7 +1039,7 @@ public:
     enum eOperation {
         Sum = 0, Prod, Max, Sub, Min, Div, Squared_diff, Floor_mod, Pow,
         Equal, Not_equal, Less, Less_equal, Greater, Greater_equal,
-        Logical_AND, Logical_OR, Logical_XOR, Logical_NOT, Mean, Select
+        Logical_AND, Logical_OR, Logical_XOR, Logical_NOT, Mean
     };
 
     /**
@@ -1249,7 +1268,11 @@ public:
  * - Ct = ft (.) Ct-1 + it (.) ct
  * - Ht = ot (.) _h(Ct)
  */
-using LSTMCell = RNNCellBase;
+class LSTMCell : public RNNCellBase {
+ public:
+    using RNNCellBase::RNNCellBase;
+    using RNNCellBase::operator=;
+};
 
 /**
  * @brief GRU Cell layer
@@ -1284,7 +1307,11 @@ using LSTMCell = RNNCellBase;
  * - ht = _g(Wh*[rt (.) Ht-1, Xt] + Bh)
  * - Ht = (1 - zt) (.) ht + zt (.) Ht-1
  */
-using GRUCell  = RNNCellBase;
+class GRUCell : public RNNCellBase {
+ public:
+    using RNNCellBase::RNNCellBase;
+    using RNNCellBase::operator=;
+};
 
 /**
  * @brief RNN Cell layer
@@ -1314,7 +1341,12 @@ using GRUCell  = RNNCellBase;
  *
  * - Ht = _f(Wi*[Ht-1, Xt] + Bi)
  */
-using RNNCell  = RNNCellBase;
+class RNNCell : public RNNCellBase {
+ public:
+    using RNNCellBase::RNNCellBase;
+    using RNNCellBase::operator=;
+};
+
 
 /**
  * @brief Sequence of recurrent cells
@@ -1604,6 +1636,19 @@ public:
 
 
 /**
+ * @brief This class represents SparseFillEmptyRows layer
+ * SparseFillEmptyRows fills empty rows in a sparse tensor
+ */
+class SparseFillEmptyRowsLayer : public CNNLayer {
+public:
+    /**
+    * @brief Creates a new SparseFillEmptyRowsLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
 * @brief This class represents a standard Reverse Sequence layer
 * Reverse Sequence modifies input tensor according parameters
 */
@@ -1787,4 +1832,61 @@ public:
 };
 
 
+/**
+ * @brief This class represents Unique layer.
+ * The Unique operation searches for unique elements in 1-D input
+ */
+class UniqueLayer : public CNNLayer {
+public:
+    /**
+    * @brief A flag indicating whether to sort unique elements
+    */
+    bool sorted;
+    /**
+    * @brief A flag indicating whether to return indices of input data elements in the output of uniques
+    */
+    bool return_inverse;
+    /**
+    * @brief A flag indicating whether to return a number of occurences for each unique element
+    */
+    bool return_counts;
+
+    /**
+    * @brief Creates a new UniqueLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+ * @brief This class represents a standard NonMaxSuppression layer
+ */
+class NonMaxSuppressionLayer : public CNNLayer {
+public:
+    /**
+    * @brief The 'center_point_box' indicates the format of the box data
+    */
+    bool center_point_box = false;
+    /**
+    * @brief Creates a new NonMaxSuppressionLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+ * @brief This class represents a standard Scatter layer
+ */
+class ScatterLayer : public CNNLayer {
+public:
+    /**
+    * @brief The axis in Dictionary to scatter Indexes from
+    */
+    int axis = 0;
+    /**
+    * @brief Creates a new ScatterLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
 }  // namespace InferenceEngine
index 38901c0..45e07ba 100644 (file)
@@ -226,6 +226,7 @@ public:
                 inconsistentLayout = dims.size() != 1;
                 break;
             case Layout::BLOCKED:
+            case Layout::ANY:
                 inconsistentLayout = false;
                 break;
             case Layout::NCDHW:
index a214b10..4d9ada8 100644 (file)
@@ -24,6 +24,7 @@
 #include "tbb/parallel_for.h"
 #include "tbb/task_arena.h"
 
+#include "tbb/parallel_sort.h"
 #include "tbb/parallel_reduce.h"
 #include "tbb/blocked_range.h"
 #include "tbb/blocked_range2d.h"
@@ -40,6 +41,7 @@ inline int  parallel_get_env_threads() { return 0; }
     #define PARTITIONING
 #endif
 #elif IE_THREAD == IE_THREAD_OMP
+#include <algorithm>
 #include <cstdlib>
 #include <string>
 #include <omp.h>
@@ -66,6 +68,7 @@ inline int  parallel_get_env_threads() {
 }
 
 #elif IE_THREAD == IE_THREAD_SEQ
+#include <algorithm>  // NOLINT
 inline int  parallel_get_env_threads() { return 1; }
 inline int  parallel_get_max_threads() { return 1; }
 inline int  parallel_get_num_threads() { return 1; }
@@ -130,6 +133,18 @@ void parallel_nt_static(int nthr, const F &func) {
 #endif
 }
 
+template <typename I, typename F>
+void parallel_sort(I begin, I end, const F &comparator) {
+#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
+    tbb::parallel_sort(begin, end, comparator);
+#elif IE_THREAD == IE_THREAD_OMP
+    // TODO: propose OpenMP version
+    std::sort(begin, end, comparator);
+#elif IE_THREAD == IE_THREAD_SEQ
+    std::sort(begin, end, comparator);
+#endif
+}
+
 template <typename T0, typename R, typename F>
 R parallel_sum(const T0 &D0, const R &input, const F &func) {
 #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
index 9114118..67a044c 100644 (file)
@@ -35,7 +35,9 @@ public:
      * @brief Move constructor
      * @param parameter Parameter object
      */
-    Parameter(Parameter &&parameter) noexcept: ptr(std::move(parameter.ptr)) {}
+    Parameter(Parameter &&parameter) noexcept {
+        std::swap(ptr, parameter.ptr);
+    }
 
     /**
      * @brief Copy constructor
@@ -233,11 +235,11 @@ private:
         }
 
         T& get() & {
-            return std::get<0>(*this);
+            return std::get<0>(*static_cast<std::tuple<T>*>(this));
         }
 
         const T& get() const & {
-            return std::get<0>(*this);
+            return std::get<0>(*static_cast<const std::tuple<T>*>(this));
         }
 
         template <class U>
index 9229a74..6caf1cf 100644 (file)
 #include <map>
 #include <set>
 
+/**
+ * @def INFERENCE_PLUGIN_API(type)
+ * @brief Defines Inference Engine Plugin API method
+ */
 
 #if defined(_WIN32)
     #ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
@@ -82,12 +86,14 @@ struct INFERENCE_ENGINE_API_CLASS(QueryNetworkResult) {
     /**
      * @brief A copy assignment operator
      * @param q A value to copy from
+     * @return A copied object
      */
     const QueryNetworkResult & operator= (const QueryNetworkResult & q);
 
     /**
      * @brief A move assignment operator
      * @param q A value to move from
+     * @return A moved object
      */
     QueryNetworkResult & operator= (QueryNetworkResult && q);
 
@@ -220,7 +226,8 @@ public:
      * @param res Reference to query network result
      */
     INFERENCE_ENGINE_DEPRECATED
-    virtual void QueryNetwork(const ICNNNetwork& /*network*/, QueryNetworkResult& res) const noexcept {
+    virtual void QueryNetwork(const ICNNNetwork& network, QueryNetworkResult& res) const noexcept {
+        (void)network;
         res.rc = InferenceEngine::NOT_IMPLEMENTED;
     }
 
@@ -230,8 +237,10 @@ public:
      * @param config Map of pairs: (config parameter name, config parameter value)
      * @param res Reference to query network result
      */
-    virtual void QueryNetwork(const ICNNNetwork& /*network*/,
-                              const std::map<std::string, std::string> &/*config*/, QueryNetworkResult& res) const noexcept {
+    virtual void QueryNetwork(const ICNNNetwork& network,
+                              const std::map<std::string, std::string> & config, QueryNetworkResult& res) const noexcept {
+        (void)network;
+        (void)config;
         res.rc = InferenceEngine::NOT_IMPLEMENTED;
     }
 };
index a3764e8..2d14316 100644 (file)
@@ -24,10 +24,15 @@ namespace Metrics {
 #endif
 
 /**
-* @brief shortcut for defining common Inference Engine metrics
-*/
-
+ * @def METRIC_KEY(name)
+ * @brief shortcut for defining common Inference Engine metrics
+ */
 #define METRIC_KEY(name) InferenceEngine::Metrics::METRIC_##name
+
+/**
+ * @def EXEC_NETWORK_METRIC_KEY(name)
+ * @brief shortcut for defining common Inference Engine ExecutableNetwork metrics
+ */
 #define EXEC_NETWORK_METRIC_KEY(name) METRIC_KEY(name)
 
 #define DECLARE_METRIC_KEY(name, ...)               \
@@ -37,8 +42,9 @@ namespace Metrics {
 #define DECLARE_EXEC_NETWORK_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(name, __VA_ARGS__)
 
 /**
-* @brief shortcut for defining metric values
-*/
+ * @def METRIC_VALUE(name)
+ * @brief shortcut for defining metric values
+ */
 #define METRIC_VALUE(name) InferenceEngine::Metrics::name
 #define DECLARE_METRIC_VALUE(name) static constexpr auto name = #name
 
@@ -141,15 +147,17 @@ DECLARE_EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS, unsigned int);
 namespace PluginConfigParams {
 
 /**
-* @brief shortcut for defining configuration keys
-*/
+ * @def CONFIG_KEY(name)
+ * @brief shortcut for defining configuration keys
+ */
 #define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name)
 #define _CONFIG_KEY(name) KEY_##name
 #define DECLARE_CONFIG_KEY(name) static constexpr auto _CONFIG_KEY(name) = #name
 
 /**
-* @brief shortcut for defining configuration values
-*/
+ * @def CONFIG_VALUE(name)
+ * @brief shortcut for defining configuration values
+ */
 #define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
 #define DECLARE_CONFIG_VALUE(name) static constexpr auto name = #name
 
index 41b4e41..67730fc 100644 (file)
@@ -12,6 +12,7 @@
 #include <string>
 #include <vector>
 #include <cpp/ie_plugin_cpp.hpp>
+#include <multi-device/multi_device_config.hpp>
 
 namespace InferenceEngine {
 /**
@@ -35,6 +36,7 @@ public:
     /**
      * @deprecated Use InferenceEngine::Core to work with devices by name
      * @brief Loads a plugin from directories that is suitable for the device string
+     * @param deviceName A string value representing target device
      * @return A pointer to the plugin
      */
     INFERENCE_ENGINE_DEPRECATED
@@ -43,6 +45,7 @@ public:
     /**
      * @deprecated Use InferenceEngine::Core to work with devices by name
      * @brief Loads a plugin from directories that is suitable for the device
+     * @param device An instance of InferenceEngine::TargetDevice
      * @return A pointer to the plugin
      */
     INFERENCE_ENGINE_DEPRECATED
index c4d63a5..cbfc19b 100644 (file)
@@ -59,7 +59,8 @@ public:
 
     /**
      * @brief Custom precision constructor
-     * @param byteSize size of elements
+     *
+     * @param bitsSize size of elements
      * @param name optional name string, used in serialisation
      */
     explicit Precision(size_t bitsSize, const char * name = nullptr) {
@@ -179,8 +180,9 @@ public:
     }
 
     /**
-     * @brief Returns size in bytes of single element of that precision
-     * @deprecated : size of precision will be reported in bits in future releases
+     * @brief Returns size of single element of that precision in bits
+     *
+     * @returns Number of bits per element
      */
     size_t size() const {
         if (precisionInfo.bitsSize == 0) {
@@ -195,9 +197,21 @@ public:
     }
 
  protected:
+    /**
+     * @brief Returns PrecisionInfo by its name
+     *
+     * @param name Name of precision
+     */
     template<Precision::ePrecision precision>
     static PrecisionInfo makePrecisionInfo(const char * name);
 
+    /**
+     * @brief Compare two c-strings
+     *
+     * @param l Const pointer to first string
+     * @param r Const pointer to another string
+     * @returns True if strings are the same
+     */
     static bool areSameStrings(const char *l, const char *r) noexcept {
         if (l == r)
             return true;
@@ -211,6 +225,9 @@ public:
         return *l == *r;
     }
 
+    /**
+     * @brief Return PrecisionInfo
+     */
     static PrecisionInfo getPrecisionInfo(ePrecision v) {
 #define CASE(x) case x: return makePrecisionInfo<x>(#x);
         switch (v) {
@@ -334,6 +351,13 @@ inline std::ostream & operator << (std::ostream &out, const InferenceEngine::Pre
     return out << Precision(p).name();
 }
 
+inline constexpr uint32_t getPrecisionMask(InferenceEngine::Precision::ePrecision precision1,
+                                           InferenceEngine::Precision::ePrecision precision2,
+                                           InferenceEngine::Precision::ePrecision precision3 = InferenceEngine::Precision::MIXED,
+                                           InferenceEngine::Precision::ePrecision precision4 = InferenceEngine::Precision::MIXED) {
+    return (precision1) | (precision2 << 8) | (precision3 << 16) | (precision4 << 24);
+}
+
 /** @endcond */
 
 }  // namespace InferenceEngine
index 31afb20..cf745f3 100644 (file)
 
 namespace InferenceEngine {
 
+/**
+* @brief Structure with information about Primitive
+*/
 struct PrimitiveInfo {
+    /**
+    * @brief A shared pointer to PrimitiveInfo object
+    */
     using Ptr = std::shared_ptr<PrimitiveInfo>;
 
-    std::string sId;          // some internal id, could be used as a name
-    std::string sType;        // implementation type of this kernel
-    int iPreAllocatedMemory;  // mainly the allocation of the output tensor
+    /**
+    * @brief Some internal id, could be used as a name
+    */
+    std::string sId;
+
+    /**
+    * @brief Implementation type of this kernel
+    */
+    std::string sType;
 
+    /**
+    * @brief Mainly the allocation of the output tensor
+    */
+    int iPreAllocatedMemory;
+
+    /**
+    * @brief Vector of TensorInfo objects that are related to input tensors
+    */
     std::vector<TensorInfo::Ptr> inputs;
+
+    /**
+    * @brief Vector of TensorInfo object that are related to outputs tensors
+    */
     std::vector<TensorInfo::Ptr> outputs;
 
-    std::map<std::string, std::string> extraInfo;  // any other important textual information user might find interesting about this kernel
+    /**
+    * @brief Any other important textual information user might find interesting about this kernel
+    */
+    std::map<std::string, std::string> extraInfo;
 };
 
 }  // namespace InferenceEngine
index ccbf3e8..69d092a 100644 (file)
 
 namespace InferenceEngine {
 
+/**
+* @struct TensorInfo
+* @brief This structure describes tensor information
+*/
 struct TensorInfo {
+    /**
+    * @brief A shared pointer to the TensorInfo object
+    */
     using Ptr = std::shared_ptr<TensorInfo>;
 
-    // memory layout BFYX, BXYF (enum)
-    // size
-    // precision
+    /**
+    * @brief A map of extra info:
+    * - memory layout BFYX, BXYF (enum)
+    * - size
+    * - precision
+    */
     std::map<std::string, std::string> extraInfo;
 };
 
diff --git a/inference-engine/include/multi-device/multi_device_config.hpp b/inference-engine/include/multi-device/multi_device_config.hpp
new file mode 100644 (file)
index 0000000..a5f037a
--- /dev/null
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header that defines advanced related properties for Multi_Device plugin.
+ * These properties should be used in SetConfig() and LoadNetwork() methods
+ *
+ * @file multi_device_config.hpp
+ */
+
+#pragma once
+
+#include <string>
+#include "ie_plugin_config.hpp"
+
+namespace InferenceEngine {
+
+namespace MultiDeviceConfigParams {
+
+/**
+ * @def MULTI_CONFIG_KEY(name)
+ * @brief A macro which provides a MULTI-mangled name for configuration key with name `name`
+ */
+#define MULTI_CONFIG_KEY(name) InferenceEngine::MultiDeviceConfigParams::_CONFIG_KEY(MULTI_##name)
+
+#define DECLARE_MULTI_CONFIG_KEY(name) DECLARE_CONFIG_KEY(MULTI_##name)
+#define DECLARE_MULTI_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(MULTI_##name)
+
+/**
+ * @brief Device Priorities config option, with comma-separated devices listed in the desired priority
+ */
+DECLARE_MULTI_CONFIG_KEY(DEVICE_PRIORITIES);
+
+}  // namespace MultiDeviceConfigParams
+}  // namespace InferenceEngine
diff --git a/inference-engine/include/vpu/hddl_plugin_config.hpp b/inference-engine/include/vpu/hddl_plugin_config.hpp
new file mode 100644 (file)
index 0000000..d2a87c1
--- /dev/null
@@ -0,0 +1,184 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header that defines advanced related properties for VPU plugins.
+ * These properties should be used in SetConfig() and LoadNetwork() methods of plugins
+ *
+ * @file vpu_plugin_config.hpp
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "ie_plugin_config.hpp"
+#include "ie_api.h"
+
+//
+// Options
+//
+
+#define VPU_HDDL_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_HDDL_##name)
+#define VPU_HDDL_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_HDDL_##name
+
+#define DECLARE_VPU_HDDL_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_HDDL_##name)
+#define DECLARE_VPU_HDDL_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_HDDL_##name)
+
+//
+// Metrics
+//
+
+#define VPU_HDDL_METRIC(name) METRIC_KEY(VPU_HDDL_##name)
+#define DECLARE_VPU_HDDL_METRIC(name, ...)  DECLARE_METRIC_KEY(VPU_HDDL_##name, __VA_ARGS__)
+
+namespace InferenceEngine {
+
+namespace Metrics {
+
+/**
+* @brief Metric to get a int of the device number, String value is METRIC_VPU_HDDL_DEVICE_NUM
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_NUM, int);
+
+/**
+* @brief Metric to get a std::vector<std::string> of device names, String value is METRIC_VPU_HDDL_DEVICE_NAME
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_NAME, std::vector<std::string>);
+
+/**
+* @brief  Metric to get a std::vector<std::string> of device models, String value is METRIC_VPU_HDDL_DEVICE_MODEL
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_MODEL, std::vector<std::string>);
+
+/**
+* @brief  Metric to get a std::vector<float> of device thermal, String value is METRIC_VPU_HDDL_DEVICE_THERMAL
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_THERMAL, std::vector<float>);
+
+/**
+* @brief  Metric to get a std::vector<uint32> of device ids, String value is METRIC_VPU_HDDL_DEVICE_ID
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_ID, std::vector<unsigned int>);
+
+/**
+* @brief  Metric to get a std::vector<int> of device subclasses, String value is METRIC_VPU_HDDL_DEVICE_SUBCLASS
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_SUBCLASS, std::vector<int>);
+
+/**
+* @brief  Metric to get a std::vector<uint32> of device total memory, String value is METRIC_VPU_HDDL_MEMORY_TOTAL
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_TOTAL, std::vector<unsigned int>);
+
+/**
+* @brief  Metric to get a std::vector<uint32> of device used memory, String value is METRIC_VPU_HDDL_DEVICE_MEMORY_USED
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_USED, std::vector<unsigned int>);
+
+/**
+* @brief  Metric to get a std::vector<float> of device utilization, String value is METRIC_VPU_HDDL_DEVICE_UTILIZATION
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_UTILIZATION, std::vector<float>);
+
+/**
+* @brief  Metric to get a std::vector<std::string> of stream ids, String value is METRIC_VPU_HDDL_DEVICE_STREAM_ID
+*/
+DECLARE_VPU_HDDL_METRIC(STREAM_ID, std::vector<std::string>);
+
+
+/**
+* @brief  Metric to get a std::vector<std::string> of device tags, String value is METRIC_VPU_HDDL_DEVICE_TAG
+*/
+DECLARE_VPU_HDDL_METRIC(DEVICE_TAG, std::vector<std::string>);
+
+}  // namespace Metrics
+
+namespace VPUConfigParams {
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
+ * This option allows to specify the number of MYX devices used for inference a specific Executable network.
+ * Note: Only one network would be allocated to one device.
+ * The number of devices for the tag is specified in the hddl_service.config file.
+ * Example:
+ * "service_settings":
+ * {
+ *     "graph_tag_map":
+ *     {
+ *         "tagA":3
+ *     }
+ * }
+ * It means that an executable network marked with tagA will be executed on 3 devices
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(GRAPH_TAG);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
+ * This config makes the executable networks to be allocated on one certain device (instead of multiple devices).
+ * And all inference through this executable network, will be done on this device.
+ * Note: Only one network would be allocated to one device.
+ * The number of devices which will be used for stream-affinity must be specified in hddl_service.config file.
+ * Example:
+ * "service_settings":
+ * {
+ *     "stream_device_number":5
+ * }
+ * It means that 5 device will be used for stream-affinity
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(STREAM_ID);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
+ * This config allows user to control device flexibly. This config gives a "tag" for a certain device while
+ * allocating a network to it. Afterward, user can allocating/deallocating networks to this device with this "tag".
+ * Devices used for such use case is controlled by a so-called "Bypass Scheduler" in HDDL backend, and the number
+ * of such device need to be specified in hddl_service.config file.
+ * Example:
+ * "service_settings":
+ * {
+ *     "bypass_device_number": 5
+ * }
+ * It means that 5 device will be used for Bypass scheduler.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(DEVICE_TAG);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: "YES/NO", default is "NO".
+ * This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set. After a user load a
+ * network, the user got a handle for the network.
+ * If "YES", the network allocated is bind to the device (with the specified "DEVICE_TAG"), which means all afterwards
+ * inference through this network handle will be executed on this device only.
+ * If "NO", the network allocated is not bind to the device (with the specified "DEVICE_TAG"). If the same network
+ * is allocated on multiple other devices (also set BIND_DEVICE to "False"), then inference through any handle of these
+ * networks may be executed on any of these devices those have the network loaded.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(BIND_DEVICE);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: A signed int wrapped in a string, default is "0".
+ * This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set and "BIND_DEVICE" is "False".
+ * When there are multiple devices running a certain network (a same network running on multiple devices in Bypass Scheduler),
+ * the device with a larger number has a higher priority, and more inference tasks will be fed to it with priority.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(RUNTIME_PRIORITY);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: "YES/NO", default is "NO".
+ * SGAD is short for "Single Graph All Device". With this scheduler, once application allocates 1 network, all devices
+ * (managed by SGAD scheduler) will be loaded with this graph. The number of network that can be loaded to one device
+ * can exceed one. Once application deallocates 1 network from device, all devices will unload the network from them.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(USE_SGAD);
+
+}  // namespace VPUConfigParams
+
+}  // namespace InferenceEngine
index 69d04f1..5462acf 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "ie_plugin_config.hpp"
 #include "myriad_plugin_config.hpp"
+#include "hddl_plugin_config.hpp"
 #include "ie_api.h"
 
 //
@@ -105,6 +106,8 @@ DECLARE_VPU_CONFIG_KEY(COMPUTE_LAYOUT);
 DECLARE_VPU_CONFIG_VALUE(AUTO);
 DECLARE_VPU_CONFIG_VALUE(NCHW);
 DECLARE_VPU_CONFIG_VALUE(NHWC);
+DECLARE_VPU_CONFIG_VALUE(NCDHW);
+DECLARE_VPU_CONFIG_VALUE(NDHWC);
 
 /**
  * @brief This option allows to pass custom layers binding xml.
index d354f64..d3a094c 100644 (file)
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8.11)
+cmake_minimum_required (VERSION 2.8.12)
 
 project(Samples)
 
@@ -150,8 +150,6 @@ macro(ie_add_sample)
         if(NOT OpenCV_FOUND)
             message(WARNING "OPENCV is disabled or not found, " ${IE_SAMPLE_NAME} " skipped")
             return()
-        else()
-            add_definitions(-DUSE_OPENCV)
         endif()
     endif()
 
@@ -164,6 +162,9 @@ macro(ie_add_sample)
 
     # Create executable file from sources
     add_executable(${IE_SAMPLE_NAME} ${IE_SAMPLE_SOURCES} ${IE_SAMPLES_HEADERS})
+    if(IE_SAMPLE_OPENCV_DEPENDENCIES)
+        target_compile_definitions(${IE_SAMPLE_NAME} PRIVATE USE_OPENCV)
+    endif()
 
     if(WIN32)
         set_target_properties(${IE_SAMPLE_NAME} PROPERTIES COMPILE_PDB_NAME ${IE_SAMPLE_NAME})
@@ -176,7 +177,6 @@ macro(ie_add_sample)
 
     target_link_libraries(${IE_SAMPLE_NAME} PRIVATE ${OpenCV_LIBRARIES} ${InferenceEngine_LIBRARIES}
                                                     ${IE_SAMPLE_DEPENDENCIES} IE::ie_cpu_extension gflags)
-
     if(UNIX)
         target_link_libraries(${IE_SAMPLE_NAME} PRIVATE pthread)
     endif()
@@ -195,12 +195,12 @@ endmacro()
 
 # use this flag if you need to throw custom message in case if the IE package is not found.
 if (IE_NOT_FOUND_MESSAGE)
-    find_package(InferenceEngine 2.0 QUIET)
+    find_package(InferenceEngine 2.1 QUIET)
     if (NOT(InferenceEngine_FOUND))
         message(FATAL_ERROR ${IE_NOT_FOUND_MESSAGE})
     endif()
 else()
-    find_package(InferenceEngine 2.0 REQUIRED)
+    find_package(InferenceEngine 2.1 REQUIRED)
 endif()
 
 # collect all samples subdirectories
index b1bde47..9e4e9d5 100644 (file)
@@ -1,21 +1,18 @@
-# Benchmark C++ Application
+# Benchmark C++ Tool
 
-This topic demonstrates how to use the Benchmark Application to estimate deep learning inference performance on
-supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
+This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
 
-> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Application. For the Python* implementation, refer to [Benchmark Application (Python*)](./inference-engine/ie_bridges/python/sample/benchmark_app/README.md).
+> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](./inference-engine/tools/benchmark_tool/README.md).
 
 
 ## How It Works
 
-Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine
-plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend
-on the mode defined with the `-api` command-line parameter.
+Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend on the mode defined with the `-api` command-line parameter.
 
-> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+> **NOTE**: By default, Inference Engine samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
 If you run the application in the synchronous mode, it creates one infer request and executes the `Infer` method.
-If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq` command-line parameter and executes the `StartAsync` method for each of them. If `-nireq` is not set, the demo will use the default value for specified device.
+If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq` command-line parameter and executes the `StartAsync` method for each of them. If `-nireq` is not set, the application will use the default value for specified device.
 
 A number of execution steps is defined by one of the following parameters:
 * Number of iterations specified with the `-niter` command-line argument
@@ -45,14 +42,19 @@ The application also saves executable graph information serialized to a XML file
 `-exec_graph_path` parameter.
 
 
-## Running
+## Run the Tool
 Notice that the benchmark_app usually produces optimal performance for any device out of the box.
 
-**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.:
-```
-$benchmark_app -m <model> -i <input> -d CPU
+**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, for example, for CPU:
+```sh
+./benchmark_app -m <model> -i <input> -d CPU
 ```
-As explained in the  [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md) section, it is preferable to use the FP16 IR for the model.
+
+But it is still may be non-optimal for some cases, especially for very small networks. More details can read in [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md).
+
+As explained in the  [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md) section, for all devices, including new [MULTI device](./docs/IE_DG/supported_plugins/MULTI.md) it is preferable to use the FP16 IR for the model.
+Also if latency of the CPU inference on the multi-socket machines is of concern, please refer to the same
+[Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md) document.
 
 Running the application with the `-h` option yields the following usage message:
 ```
@@ -70,6 +72,7 @@ Options:
     -m "<path>"               Required. Path to an .xml file with a trained model.
     -d "<device>"             Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU.
                               Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin.
+                              Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin. 
     The application looks for a suitable plugin for the specified device.
     -l "<absolute_path>"      Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
           Or
@@ -84,8 +87,11 @@ Options:
 
   CPU-specific performance options:
     -nstreams "<integer>"     Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode
-                              (for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
-    -nthreads "<integer>"     Optional. Number of threads to use for inference on the CPU (including HETERO case).
+                              (for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
+                              Default value is determined automatically for a device. 
+                              Please note that although the automatic selection usually provides a reasonable performance, 
+                              it still may be non-optimal for some cases, especially for very small networks.
+    -nthreads "<integer>"     Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
     -pin "YES"/"NO"           Optional. Enable ("YES" is default value) or disable ("NO") CPU threads pinning for CPU-involved inference.
 
   Statistics dumping options:
@@ -102,48 +108,74 @@ If a model has only image input(s), please a provide folder with images or a pat
 If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input.
 If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
 
-To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
-
-> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-
-For example, to perform inference on CPU in the synchronous mode and get estimated performance metrics for AlexNet model,
-run the following command:
-
-```sh
-./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/alexnet_fp32.xml -d CPU -api sync
-```
-
-For the asynchronous mode:
-```sh
-./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/alexnet_fp32.xml -d CPU -api async
-```
-
-## Demo Output
+To run the tool, you can use public or Intel's pre-trained models. To download the models, use the OpenVINO [Model Downloader](./tools/downloader/README.md) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the tool with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+## Examples of Running the Tool
+
+This section provides step-by-step instructions on how to run the Benchmark Tool with the `googlenet-v1` public model on CPU or FPGA devices. As an input, the `car.png` file from the `<INSTALL_DIR>/deployment_tools/demo/` directory is used.  
+
+> **NOTE:** The Internet access is required to execute the following steps successfully. If you have access to the Internet through the proxy server only, please make sure that it is configured in your OS environment.
+
+1. Download the model. Go to the the Model Downloader directory and run the `downloader.py` script with specifying the model name and directory to download the model to:
+   ```sh
+   cd <INSTAL_DIR>/deployment_tools/open_model_zoo/tools/downloader
+   ```
+   ```sh
+   python3 downloader.py --name googlenet-v1 -o <models_dir>
+   ```
+2. Convert the model to the Inference Engine IR format. Go to the Model Optimizer directory and run the `mo.py` script with specifying the path to the model, model format (which must be FP32 for CPU and FPG) and output directory to generate the IR files:
+   ```sh
+   cd <INSTALL_DIR>/deployment_tools/model_optimizer
+   ```
+   ```sh
+   python3 mo.py --input_model <models_dir>/public/googlenet-v1/googlenet-v1.caffemodel --data_type FP32 --output_dir <ir_dir>
+   ```     
+3. Run the tool with specifying the `<INSTALL_DIR>/deployment_tools/demo/car.png` file as an input image, the IR of the `googlenet-v1` model and a device to perform inference on. The following commands demonstrate running the Benchmark Tool in the asynchronous mode on CPU and FPGA devices:
+   
+   * On CPU:
+   ```sh
+   ./benchmark_app -m <ir_dir>/googlenet-v1.xml -d CPU -api async -i <INSTALL_DIR>/deployment_tools/demo/car.png --progress true
+   ```
+   * On FPGA:
+   ```sh
+   ./benchmark_app -m <ir_dir>/googlenet-v1.xml -d HETERO:FPGA,CPU -api async -i <INSTALL_DIR>/deployment_tools/demo/car.png --progress true
+   ```
 
 The application outputs the number of executed iterations, total duration of execution, latency and throughput.
-Additionally, if you set the `-report_type` parameter, the application outputs statistics report.
-If you set the `-pc` parameter, the application outputs performance counters.
-If you set `-exec_graph_path`, the application reports executable graph information serialized.
+Additionally, if you set the `-report_type` parameter, the application outputs statistics report. If you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
 
-```
-[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
-Progress: [....................] 100.00% done
+Below are fragments of sample output for CPU and FPGA devices: 
 
-[Step 9/9] Dumping statistics report
-[ INFO ] Statistics collecting was not requested. No reports are dumped.
-Progress: [....................] 100.00% done
+* For CPU:
+   ```
+   [Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
+   Progress: [....................] 100.00% done
 
-Count:      4612 iterations
-Duration:   60110.04 ms
-Latency:    50.99 ms
-Throughput: 76.73 FPS
+   [Step 9/9] Dumping statistics report
+   [ INFO ] Statistics collecting was not requested. No reports are dumped.
+   Progress: [....................] 100.00% done
 
-```
+   Count:      4612 iterations
+   Duration:   60110.04 ms
+   Latency:    50.99 ms
+   Throughput: 76.73 FPS
+   ```
 
-All measurements including per-layer PM counters are reported in milliseconds.
+* For FPGA:
+   ```
+   [Step 10/11] Measuring performance (Start inference asynchronously, 5 inference requests using 4 streams for CPU, limits: 120000 ms duration)
+   Progress: [....................] 100% done
 
+   [Step 11/11] Dumping statistics report
+   Count:      102515 iterations
+   Duration:   120007.38 ms
+   Latency:    5.84 ms
+   Throughput: 854.24 FP
+   ```
 
 ## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
 * [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
+* [Model Downloader](./tools/downloader/README.md)
\ No newline at end of file
index 6b6991f..c9e2da2 100644 (file)
@@ -23,7 +23,9 @@ static const char api_message[] = "Optional. Enable Sync/Async API. Default valu
 
 /// @brief message for assigning cnn calculation to device
 static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). " \
-"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. ";
+"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. " \
+"Use \"-d MULTI:<comma-separated_devices_list>\" format to specify MULTI plugin. " \
+"The application looks for a suitable plugin for the specified device.";
 
 /// @brief message for iterations count
 static const char iterations_count_message[] = "Optional. Number of iterations. " \
@@ -37,11 +39,14 @@ static const char execution_time_message[] = "Optional. Time in seconds to execu
 
 /// @brief message for #threads for CPU inference
 static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU "
-                                                "(including HETERO case).";
+                                                "(including HETERO and MULTI cases).";
 
 /// @brief message for #streams for CPU inference
 static const char infer_num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode "
-                                                "(for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>)";
+                                                "(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just <nstreams>). "
+                                                "Default value is determined automatically for a device.Please note that although the automatic selection "
+                                                "usually provides a reasonable performance, it still may be non - optimal for some cases, especially for "
+                                                "very small networks. See sample's README for more details.";
 
 /// @brief message for user library argument
 static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.";
index 892bd5d..ca71319 100644 (file)
@@ -62,6 +62,10 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
         throw std::logic_error(err);
     }
 
+    if ((FLAGS_report_type == averageCntReport) && ((FLAGS_d.find("MULTI") != std::string::npos))) {
+        throw std::logic_error("only " + std::string(detailedCntReport) + " report type is supported for MULTI device");
+    }
+
     return true;
 }
 
@@ -89,10 +93,20 @@ static void next_step(const std::string additional_info = "") {
               << (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl;
 }
 
+template <typename T>
+T getMedianValue(const std::vector<T> &vec) {
+    std::vector<T> sortedVec(vec);
+    std::sort(sortedVec.begin(), sortedVec.end());
+    return (sortedVec.size() % 2 != 0) ?
+           sortedVec[sortedVec.size() / 2ULL] :
+           (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
+}
+
 /**
 * @brief The entry point of the benchmark application
 */
 int main(int argc, char *argv[]) {
+    std::shared_ptr<StatisticsReport> statistics;
     try {
         // ----------------- 1. Parsing and validating input arguments -------------------------------------------------
         next_step();
@@ -101,10 +115,30 @@ int main(int argc, char *argv[]) {
             return 0;
         }
 
+        if (!FLAGS_report_type.empty()) {
+            std::vector<gflags::CommandLineFlagInfo> flags;
+            StatisticsReport::Parameters command_line_arguments;
+            gflags::GetAllFlags(&flags);
+
+            for (auto &flag : flags) {
+                if (!flag.is_default) {
+                    command_line_arguments.push_back({ flag.name, flag.current_value });
+                }
+            }
+            statistics = std::make_shared<StatisticsReport>(StatisticsReport::Config{FLAGS_report_type, FLAGS_report_folder});
+            statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments);
+        }
+
         /** This vector stores paths to the processed images **/
         std::vector<std::string> inputFiles;
         parseInputFilesArguments(inputFiles);
 
+        if (FLAGS_nstreams.empty()) {
+            slog::warn << "-nstreams default value is determined automatically for a device. "
+                "Although the automatic selection usually provides a reasonable performance,"
+                "but it still may be non-optimal for some cases, for more information look at README." << slog::endl<< slog::endl;
+        }
+
         // ----------------- 2. Loading the Inference Engine -----------------------------------------------------------
         next_step();
 
@@ -141,9 +175,25 @@ int main(int argc, char *argv[]) {
         slog::info << "Loading network files" << slog::endl;
 
         CNNNetReader netBuilder;
+        auto startTime = Time::now();
         netBuilder.ReadNetwork(FLAGS_m);
         const std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin";
         netBuilder.ReadWeights(binFileName);
+        auto float_to_string = [] (const float number) {
+            std::stringstream ss;
+            ss << std::fixed << std::setprecision(2) << number;
+            return ss.str();
+        };
+        auto get_total_ms_time = [ &startTime ] () {
+            return std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
+        };
+        auto duration_ms = float_to_string(get_total_ms_time());
+        slog::info << "Read network took " << duration_ms << " ms" << slog::endl;
+        if (statistics)
+            statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+                                      {
+                                          {"read network time (ms)", duration_ms}
+                                      });
 
         CNNNetwork cnnNetwork = netBuilder.getNetwork();
         const InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
@@ -180,8 +230,9 @@ int main(int argc, char *argv[]) {
         }
 
         const size_t batchSize = cnnNetwork.getBatchSize();
+        const Precision precision = cnnNetwork.getPrecision();
         slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize <<
-            ", precision: " << cnnNetwork.getPrecision() << slog::endl;
+            ", precision: " << precision << slog::endl;
 
         // ----------------- 5. Configuring input ----------------------------------------------------------------------
         next_step();
@@ -198,7 +249,8 @@ int main(int argc, char *argv[]) {
 
         bool perf_counts = (FLAGS_report_type == detailedCntReport ||
                             FLAGS_report_type == averageCntReport ||
-                            FLAGS_pc);
+                            FLAGS_pc ||
+                            !FLAGS_exec_graph_path.empty());
 
         auto devices = parseDevices(device_name);
         std::map<std::string, uint32_t> device_nstreams = parseValuePerDevice(devices, FLAGS_nstreams);
@@ -208,8 +260,13 @@ int main(int argc, char *argv[]) {
                 if (FLAGS_nthreads != 0)
                     ie.SetConfig({{ CONFIG_KEY(CPU_THREADS_NUM), std::to_string(FLAGS_nthreads) }}, device);
 
-                // pin threads for CPU portion of inference
-                ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), FLAGS_pin }}, device);
+                if ((device_name.find("MULTI") != std::string::npos) &&
+                    (device_name.find("GPU") != std::string::npos)) {
+                    ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), CONFIG_VALUE(NO) }}, device);
+                } else {
+                    // pin threads for CPU portion of inference
+                    ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), FLAGS_pin }}, device);
+                }
 
                 // for CPU execution, more throughput-oriented execution via streams
                 if (FLAGS_api == "async")
@@ -223,6 +280,13 @@ int main(int argc, char *argv[]) {
                                     (device_nstreams.count(device) > 0 ? std::to_string(device_nstreams.at(device)) :
                                                                          "GPU_THROUGHPUT_AUTO") }}, device);
                 device_nstreams[device] = std::stoi(ie.GetConfig(device, CONFIG_KEY(GPU_THROUGHPUT_STREAMS)).as<std::string>());
+
+                if ((device_name.find("MULTI") != std::string::npos) &&
+                    (device_name.find("CPU") != std::string::npos)) {
+                    // multi-device execution with the CPU + GPU performs best with GPU trottling hint,
+                    // which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
+                    ie.SetConfig({{ CLDNN_CONFIG_KEY(PLUGIN_THROTTLE), "1" }}, "GPU");
+                }
             } else if (device == "MYRIAD") {
                 ie.SetConfig({{ CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_NONE) },
                               { VPU_CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_WARNING) }}, device);
@@ -234,7 +298,15 @@ int main(int argc, char *argv[]) {
 
         std::map<std::string, std::string> config = {{ CONFIG_KEY(PERF_COUNT), perf_counts ? CONFIG_VALUE(YES) :
                                                                                              CONFIG_VALUE(NO) }};
+        startTime = Time::now();
         ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, device_name, config);
+        duration_ms = float_to_string(get_total_ms_time());
+        slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
+        if (statistics)
+            statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+                                      {
+                                          {"load network time (ms)", duration_ms}
+                                      });
 
         // ----------------- 8. Setting optimal runtime parameters -----------------------------------------------------
         next_step();
@@ -274,6 +346,28 @@ int main(int argc, char *argv[]) {
         }
         uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds);
 
+        if (statistics) {
+            statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
+                                      {
+                                            {"topology", cnnNetwork.getName()},
+                                            {"target device", device_name},
+                                            {"API", FLAGS_api},
+                                            {"precision", std::string(precision.name())},
+                                            {"batch size", std::to_string(batchSize)},
+                                            {"number of iterations", std::to_string(niter)},
+                                            {"number of parallel infer requests", std::to_string(nireq)},
+                                            {"duration (ms)", std::to_string(getDurationInMilliseconds(duration_seconds))},
+                                      });
+            for (auto& nstreams : device_nstreams) {
+                std::stringstream ss;
+                ss << "number of " << nstreams.first << " streams";
+                statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
+                                          {
+                                                {ss.str(), std::to_string(nstreams.second)},
+                                          });
+            }
+        }
+
         // ----------------- 9. Creating infer requests and filling input blobs ----------------------------------------
         next_step();
 
@@ -333,7 +427,7 @@ int main(int argc, char *argv[]) {
         inferRequestsQueue.waitAll();
         inferRequestsQueue.resetTimes();
 
-        const auto startTime = Time::now();
+        startTime = Time::now();
         auto execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
 
         /** Start inference & calculate performance **/
@@ -373,35 +467,34 @@ int main(int argc, char *argv[]) {
         // wait the latest inference executions
         inferRequestsQueue.waitAll();
 
-        StatisticsReport statistics({ FLAGS_d,
-                                      FLAGS_api,
-                                      batchSize,
-                                      nireq,
-                                      niter,
-                                      getDurationInMilliseconds(duration_seconds),
-                                      FLAGS_nthreads,
-                                      device_nstreams,
-                                      FLAGS_pin,
-                                      FLAGS_report_type,
-                                      FLAGS_report_folder
-                                    });
-        if (perf_counts) {
-            for (auto& request : inferRequestsQueue.requests) {
-                statistics.addPerfCounts(request->getPerformanceCounts());
+        double latency = getMedianValue<double>(inferRequestsQueue.getLatencies());
+        double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
+        double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency :
+                                             batchSize * 1000.0 * iteration / totalDuration;
+
+        if (statistics) {
+            statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+                                      {
+                                        {"total execution time (ms)", float_to_string(totalDuration)},
+                                        {"total number of iterations", std::to_string(iteration)},
+                                      });
+            if (device_name.find("MULTI") == std::string::npos) {
+                statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+                                          {
+                                            {"latency (ms)", float_to_string(latency)},
+                                          });
             }
+            statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+                                      {
+                                          {"throughput", float_to_string(fps)}
+                                      });
         }
-        statistics.addLatencies(inferRequestsQueue.getLatencies());
 
-        double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
-        double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / statistics.getMedianLatency() :
-                                             batchSize * 1000.0 * iteration / totalDuration;
         progressBar.finish();
 
         // ----------------- 11. Dumping statistics report -------------------------------------------------------------
         next_step();
 
-        statistics.dump(fps, iteration, totalDuration);
-
         if (!FLAGS_exec_graph_path.empty()) {
             try {
                 CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo();
@@ -412,19 +505,40 @@ int main(int argc, char *argv[]) {
             }
         }
 
-        if (FLAGS_pc) {
+        if (perf_counts) {
+            std::vector<std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>> perfCounts;
             for (size_t ireq = 0; ireq < nireq; ireq++) {
-                slog::info << "Pefrormance counts for " << ireq << "-th infer request:" << slog::endl;
-                printPerformanceCounts(inferRequestsQueue.requests[ireq]->getPerformanceCounts(), std::cout, getFullDeviceName(ie, FLAGS_d), false);
+                auto reqPerfCounts = inferRequestsQueue.requests[ireq]->getPerformanceCounts();
+                if (FLAGS_pc) {
+                    slog::info << "Pefrormance counts for " << ireq << "-th infer request:" << slog::endl;
+                    printPerformanceCounts(reqPerfCounts, std::cout, getFullDeviceName(ie, FLAGS_d), false);
+                }
+                perfCounts.push_back(reqPerfCounts);
+            }
+            if (statistics) {
+                statistics->dumpPerformanceCounters(perfCounts);
             }
         }
 
+        if (statistics)
+            statistics->dump();
+
         std::cout << "Count:      " << iteration << " iterations" << std::endl;
-        std::cout << "Duration:   " << totalDuration << " ms" << std::endl;
-        std::cout << "Latency:    " << statistics.getMedianLatency() << " ms" << std::endl;
-        std::cout << "Throughput: " << fps << " FPS" << std::endl;
+        std::cout << "Duration:   " << float_to_string(totalDuration) << " ms" << std::endl;
+        if (device_name.find("MULTI") == std::string::npos)
+            std::cout << "Latency:    " << float_to_string(latency) << " ms" << std::endl;
+        std::cout << "Throughput: " << float_to_string(fps) << " FPS" << std::endl;
     } catch (const std::exception& ex) {
         slog::err << ex.what() << slog::endl;
+
+        if (statistics) {
+            statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+                                      {
+                                            {"error", ex.what()},
+                                      });
+            statistics->dump();
+        }
+
         return 3;
     }
 
index 821f4fe..2f8005d 100644 (file)
 
 #include "statistics_report.hpp"
 
-void StatisticsReport::addPerfCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat) {
-    if (_config.report_type == averageCntReport || _config.report_type == detailedCntReport) {
-        // collect per-iteration statistics only in case of enabled median/detailed statistic collecting
-        _performanceCounters.push_back(pmStat);
-    }
+void StatisticsReport::addParameters(const Category &category, const Parameters& parameters) {
+    if (_parameters.count(category) == 0)
+        _parameters[category] = parameters;
+    else
+        _parameters[category].insert(_parameters[category].end(), parameters.begin(), parameters.end());
 }
 
-void StatisticsReport::addLatencies(const std::vector<double> &latencies) {
-    _latencies.insert(_latencies.end(), latencies.begin(), latencies.end());
-}
+void StatisticsReport::dump() {
+    CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv");
 
-void StatisticsReport::dump(const double &fps, const size_t &iteration_number, const double &totalExecTime) {
-    if (_config.report_type.empty()) {
-        slog::info << "Statistics collecting was not requested. No reports are dumped." << slog::endl;
-        return;
-    }
+    auto dump_parameters = [ &dumper ] (const Parameters &parameters) {
+        for (auto& parameter : parameters) {
+            dumper << parameter.first << parameter.second;
+            dumper.endLine();
+        }
+    };
+    if (_parameters.count(Category::COMMAND_LINE_PARAMETERS)) {
+        dumper << "Command line parameters";
+        dumper.endLine();
 
-    std::string separator =
-#if defined _WIN32 || defined __CYGWIN__
-    #   if defined UNICODE
-        L"\\";
-    #   else
-        "\\";
-    #   endif
-#else
-        "/";
-#endif
-    if (_config.report_folder.empty())
-        separator = "";
-
-    CsvDumper dumper(true, _config.report_folder + separator + "benchmark_" + _config.report_type + "_report.csv");
-
-    // resulting number of columns in csv file depends on the report_type. If it's noCntReport, then
-    // no PM data is collected and there are only 3 columns in the file (in configuration section). If it's
-    // averageCntReport then median PM values are collected per each layer and the number of columns is 6.
-    // Example from GPU:
-    //
-    // layer name;exec status;layer type;exec type;real time;cpu time;
-    // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;615;3;
-    // Here, all the data are taken from InferenceEngine::InferenceEngineProfileInfo.
-    //
-    // In case of detailedCntReport the number of columns is 4 + _config.nireq * 2, because first 4 parameters
-    // are the same but realTime and cpuTime can be different on each iteration (example from 5 GPU requests):
-    // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;630,3;617,3;616,3;615,3;617,3;
-    size_t numOfColumns = 0;
-    if (_config.report_type == noCntReport) {
-        numOfColumns = 3;
-    } else if (_config.report_type == averageCntReport) {
-        numOfColumns = 6;
-    } else {
-        // for detailedCntReport
-        numOfColumns = 4 + _config.nireq * 2;
+        dump_parameters(_parameters.at(Category::COMMAND_LINE_PARAMETERS));
+        dumper.endLine();
     }
 
-    auto completeCsvRow = [](CsvDumper &dumper, size_t numOfColumns, size_t filled) {
-        for (size_t i = 0; i < numOfColumns - filled; i++)
-            dumper << "";
+    if (_parameters.count(Category::RUNTIME_CONFIG)) {
+        dumper << "Configuration setup";
         dumper.endLine();
-    };
-
-    // dump execution configuration
-    dumper << "Configuration setup";
-    completeCsvRow(dumper, numOfColumns, 1);
-    dumper << "config option" << "CLI parameter" << "value";
-    completeCsvRow(dumper, numOfColumns, 3);
-
-    dumper << "target device" << " -d" << _config.device;
-    completeCsvRow(dumper, numOfColumns, 3);
-    dumper << "execution mode" << " -api" << _config.api;
-    completeCsvRow(dumper, numOfColumns, 3);
-    dumper << "batch size" << " -b" << _config.batch;
-    completeCsvRow(dumper, numOfColumns, 3);
-    dumper << "number of iterations" << " -niter" << _config.niter;
-    completeCsvRow(dumper, numOfColumns, 3);
-    dumper << "number of parallel infer requests" << " -nireq" << _config.nireq;
-    completeCsvRow(dumper, numOfColumns, 3);
-    dumper << "duration in ms" << " -t" << _config.duration;
-    completeCsvRow(dumper, numOfColumns, 3);
-    dumper << "number of CPU threads" << " -nthreads" << _config.cpu_nthreads;
-    completeCsvRow(dumper, numOfColumns, 3);
-    for (auto& item : _config.nstreams)
-        dumper << "number of " << item.first << " streams" << " -nstreams" << item.second;
-    completeCsvRow(dumper, numOfColumns, 3);
-    dumper << "CPU pinning enabled" << " -pin" << _config.cpu_pin;
-    completeCsvRow(dumper, numOfColumns, 3);
 
-    dumper.endLine();
-
-    // write PM data from each iteration
-    if (!_performanceCounters.empty()) {
-        if (_config.report_type != averageCntReport && _config.report_type != detailedCntReport) {
-            throw std::logic_error("PM data can only be collected for average or detailed report types");
-        }
-
-        // this vector is sorted according to network layers execution order.
-        auto performanceMapSorted = preparePmStatistics();
-
-        dumper << "Performance counters";
-        completeCsvRow(dumper, numOfColumns, 1);
-        dumper << "layer name" << "exec status" << "layer type" << "exec type";
-
-        if (_config.report_type == averageCntReport) {
-            dumper << "average real time" << "average cpu time";
-            completeCsvRow(dumper, numOfColumns, 6);
-        } else {
-            // detailedCntReport case
-            for (size_t i = 0; i< _performanceCounters.size(); i++) {
-                dumper << "realTime_req" + std::to_string(i) << "cpuTime_req" + std::to_string(i);
-            }
-            completeCsvRow(dumper, numOfColumns, 4 + _performanceCounters.size() * 2);
-        }
-
-        for (const auto &layer : performanceMapSorted) {
-            dumper << layer.first;  // layer name
-
-            switch (layer.second.status) {
-                case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
-                    dumper << "EXECUTED";
-                    break;
-                case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
-                    dumper << "NOT_RUN";
-                    break;
-                case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
-                    dumper << "OPTIMIZED_OUT";
-                    break;
-            }
-            dumper << layer.second.layer_type << layer.second.exec_type;
-
-            if (_config.report_type == averageCntReport) {
-                // write average realTime and cpuTime from each processed request for current layer
-                dumper <<
-                std::to_string(std::accumulate(_perLayerRealTime[layer.first].begin(),
-                                               _perLayerRealTime[layer.first].end(), 0.0) / _perLayerRealTime[layer.first].size() / 1000.0) <<
-                std::to_string(std::accumulate(_perLayerCpuTime[layer.first].begin(),
-                                               _perLayerCpuTime[layer.first].end(), 0.0) / _perLayerCpuTime[layer.first].size()  / 1000.0);
-            } else {
-                // write all realTime and cpuTime from each processed request for current layer
-                for (size_t i = 0; i < _config.nireq; i++) {
-                    dumper << std::to_string(_perLayerRealTime[layer.first][i] / 1000.0) << std::to_string(_perLayerCpuTime[layer.first][i] / 1000.0);
-                }
-            }
-            dumper.endLine();
-        }
+        dump_parameters(_parameters.at(Category::RUNTIME_CONFIG));
         dumper.endLine();
     }
 
-    if (_config.report_type == detailedCntReport) {
-        dumper << "Statistics";
-        completeCsvRow(dumper, numOfColumns, 1);
+    if (_parameters.count(Category::EXECUTION_RESULTS)) {
+        dumper << "Execution results";
+        dumper.endLine();
 
-        dumper << "metric";
-        for (size_t i = 0; i < _totalLayersTime.size(); i++) {
-            // detailedCntReport case
-            dumper << "req" + std::to_string(i);
-        }
-        completeCsvRow(dumper, numOfColumns, 4 + _totalLayersTime.size());
-        dumper << "latencies";
-        for (const auto &lat : _totalLayersTime) {
-            dumper << lat / 1000.0;
-        }
-        completeCsvRow(dumper, numOfColumns, _totalLayersTime.size());
+        dump_parameters(_parameters.at(Category::EXECUTION_RESULTS));
         dumper.endLine();
     }
 
-    dumper << "Execution results";
-    completeCsvRow(dumper, numOfColumns, 1);
-    dumper << "number of iterations" << iteration_number;
-    completeCsvRow(dumper, numOfColumns, 2);
-    dumper << "latency" << getMedianValue<double>(_latencies);
-    completeCsvRow(dumper, numOfColumns, 2);
-    dumper << "throughput" << fps;
-    completeCsvRow(dumper, numOfColumns, 2);
-    dumper << "total execution time" << totalExecTime;
-    completeCsvRow(dumper, numOfColumns, 2);
-
-    slog::info << "statistics report is stored to " << dumper.getFilename() << slog::endl;
+    slog::info << "Statistics report is stored to " << dumper.getFilename() << slog::endl;
 }
 
-double StatisticsReport::getMedianLatency() {
-    return getMedianValue<double>(_latencies);
-}
+void StatisticsReport::dumpPerformanceCountersRequest(CsvDumper& dumper,
+                                                      const PerformaceCounters& perfCounts) {
+    auto performanceMapSorted = perfCountersSorted(perfCounts);
 
-std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> StatisticsReport::preparePmStatistics() {
-    if (_performanceCounters.empty()) {
-        throw std::logic_error("preparePmStatistics() was called when no PM data was collected");
-    }
+    long long total = 0L;
+    long long total_cpu = 0L;
+
+    dumper << "layerName" << "execStatus" << "layerType" << "execType";
+    dumper << "realTime (ms)" << "cpuTime (ms)";
+    dumper.endLine();
 
-    // sort PM data of first processed request according to layers execution order
-    auto performanceMapSorted = perfCountersSorted(_performanceCounters[0]);
-
-    // iterate over each processed infer request and handle its PM data
-    for (auto &pm : _performanceCounters) {
-        long long total = 0L;
-        // iterate over each layer from sorted vector and add required PM data to the per-layer maps
-        for (const auto & it : performanceMapSorted) {
-            _perLayerRealTime[it.first].push_back(pm[it.first].realTime_uSec);
-            _perLayerCpuTime[it.first].push_back(pm[it.first].cpu_uSec);
-            total += pm[it.first].realTime_uSec;
+    for (const auto &layer : performanceMapSorted) {
+        dumper << layer.first;  // layer name
+
+        switch (layer.second.status) {
+            case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
+                dumper << "EXECUTED";
+                break;
+            case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
+                dumper << "NOT_RUN";
+                break;
+            case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
+                dumper << "OPTIMIZED_OUT";
+                break;
         }
-        _totalLayersTime.push_back(total);
+        dumper << layer.second.layer_type << layer.second.exec_type;
+        dumper << std::to_string(layer.second.realTime_uSec / 1000.0) << std::to_string(layer.second.cpu_uSec/ 1000.0);
+        total += layer.second.realTime_uSec;
+        total_cpu += layer.second.cpu_uSec;
+        dumper.endLine();
     }
-    return performanceMapSorted;
+    dumper << "Total" << "" << "" << "";
+    dumper <<  total / 1000.0 << total_cpu / 1000.0;
+    dumper.endLine();
+    dumper.endLine();
 }
 
-template <typename T>
-T StatisticsReport::getMedianValue(const std::vector<T> &vec) {
-    std::vector<T> sortedVec(vec);
-    std::sort(sortedVec.begin(), sortedVec.end());
-    return (sortedVec.size() % 2 != 0) ?
-           sortedVec[sortedVec.size() / 2ULL] :
-           (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
+void StatisticsReport::dumpPerformanceCounters(const std::vector<PerformaceCounters> &perfCounts) {
+    if ((_config.report_type.empty()) || (_config.report_type == noCntReport)) {
+        slog::info << "Statistics collecting for performance counters was not requested. No reports are dumped." << slog::endl;
+        return;
+    }
+    if (perfCounts.empty()) {
+        slog::info << "Peformance counters are empty. No reports are dumped." << slog::endl;
+        return;
+    }
+    CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv");
+    if (_config.report_type == detailedCntReport) {
+        for (auto& pc : perfCounts) {
+            dumpPerformanceCountersRequest(dumper, pc);
+        }
+    } else if (_config.report_type == averageCntReport) {
+        auto getAveragePerformanceCounters = [ &perfCounts ] () {
+            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> performanceCountersAvg;
+            // sort PM data of first processed request according to layers execution order
+            auto performanceMapSorted = perfCountersSorted(perfCounts[0]);
+
+            // iterate over each processed infer request and handle its PM data
+            for (size_t i = 0; i < perfCounts.size(); i++) {
+                // iterate over each layer from sorted vector and add required PM data to the per-layer maps
+                for (const auto& pm : performanceMapSorted) {
+                    if (performanceCountersAvg.count(pm.first) == 0) {
+                        performanceCountersAvg[pm.first] = perfCounts.at(i).at(pm.first);
+                    } else {
+                        performanceCountersAvg[pm.first].realTime_uSec += perfCounts.at(i).at(pm.first).realTime_uSec;
+                        performanceCountersAvg[pm.first].cpu_uSec += perfCounts.at(i).at(pm.first).cpu_uSec;
+                    }
+                }
+            }
+            for (auto& pm : performanceCountersAvg) {
+                pm.second.realTime_uSec /= perfCounts.size();
+                pm.second.cpu_uSec /= perfCounts.size();
+            }
+            return performanceCountersAvg;
+        };
+        dumpPerformanceCountersRequest(dumper, getAveragePerformanceCounters());
+    } else {
+        throw std::logic_error("PM data can only be collected for average or detailed report types");
+    }
+    slog::info << "Pefromance counters report is stored to " << dumper.getFilename() << slog::endl;
 }
index f7e0bb2..58eae04 100644 (file)
@@ -22,51 +22,51 @@ static constexpr char detailedCntReport[] = "detailed_counters";
 /// @brief Responsible for collecting of statistics and dumping to .csv file
 class StatisticsReport {
 public:
+    typedef std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> PerformaceCounters;
+    typedef std::vector<std::pair<std::string, std::string>> Parameters;
+
     struct Config {
-        std::string device;
-        std::string api;
-        size_t batch;
-        size_t nireq;
-        size_t niter;
-        uint64_t duration;
-        size_t cpu_nthreads;
-        std::map<std::string, uint32_t> nstreams;
-        std::string cpu_pin;
         std::string report_type;
         std::string report_folder;
     };
 
+    enum class Category {
+        COMMAND_LINE_PARAMETERS,
+        RUNTIME_CONFIG,
+        EXECUTION_RESULTS,
+    };
+
     explicit StatisticsReport(Config config) : _config(std::move(config)) {
-        if (_config.nireq > 0) {
-            _performanceCounters.reserve(_config.nireq);
-        }
+        _separator =
+#if defined _WIN32 || defined __CYGWIN__
+    #   if defined UNICODE
+        L"\\";
+    #   else
+        "\\";
+    #   endif
+#else
+        "/";
+#endif
+        if (_config.report_folder.empty())
+            _separator = "";
     }
 
-    void addPerfCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat);
-
-    void addLatencies(const std::vector<double> &latency);
+    void addParameters(const Category &category, const Parameters& parameters);
 
-    void dump(const double &fps, const size_t &numProcessedReq, const double &totalExecTime);
+    void dump();
 
-    double getMedianLatency();
+    void dumpPerformanceCounters(const std::vector<PerformaceCounters> &perfCounts);
 
 private:
-    std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> preparePmStatistics();
-
-    template <typename T>
-    T getMedianValue(const std::vector<T> &vec);
-
-    // Contains PM data for each processed infer request
-    std::vector<std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>> _performanceCounters;
-    // Contains latency of each processed infer request
-    std::vector<double> _latencies;
+    void dumpPerformanceCountersRequest(CsvDumper& dumper,
+                                        const PerformaceCounters& perfCounts);
 
     // configuration of current benchmark execution
     const Config _config;
 
-    // mapping from network layer to a vector of calculated RealTime values from each processed infer request.
-    std::map<std::string, std::vector<long long>> _perLayerRealTime;
-    // mapping from network layer to a vector of calculated CPU Time values from each processed infer request.
-    std::map<std::string, std::vector<long long>> _perLayerCpuTime;
-    std::vector<long long> _totalLayersTime;
+    // parameters
+    std::map<Category, Parameters> _parameters;
+
+    // csv separator
+    std::string _separator;
 };
index 4c2634d..0dbd8c3 100644 (file)
@@ -12,4 +12,3 @@ std::vector<std::string> parseDevices(const std::string& device_string);
 uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
 std::map<std::string, uint32_t> parseValuePerDevice(const std::vector<std::string>& devices,
                                                     const std::string& values_string);
-uint32_t deviceDefaultRequestsNumber(const std::string& device);
index c4011c4..a8c9caf 100644 (file)
@@ -12,24 +12,21 @@ file (GLOB LIBRARY_HEADERS
         ${CMAKE_CURRENT_SOURCE_DIR}/*.h
         )
 
-# Find OpenCV components if exist
-find_package(OpenCV COMPONENTS imgcodecs videoio imgproc QUIET)
-if(NOT(OpenCV_FOUND))
-    message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " is built without OPENCV support")
-endif()
-
 # Create named folders for the sources within the .vcproj
 # Empty name lists them directly under the .vcproj
 source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${LIBRARY_HEADERS})
 
-
 # Create library file from sources.
 add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
 
-if(OpenCV_FOUND)
-       target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES})
-       target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV)
+# Find OpenCV components if exist
+find_package(OpenCV COMPONENTS imgcodecs videoio imgproc QUIET)
+if(NOT OpenCV_FOUND)
+    message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " will be built without OPENCV support")
+else()
+    target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES})
+    target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV)
 endif()
 
 target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_FORMAT_READER)
index e9111d9..4fa5611 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2019 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,31 +6,33 @@
 
 #if defined(_WIN32)
 
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
 #ifndef NOMINMAX
 # define NOMINMAX
+# define NOMINMAX_UNDEF
 #endif
 
-#include <winsock2.h>
-#include <windows.h>
-#include <stdlib.h>
-
-#else
-
-#include <unistd.h>
-#include <cstdlib>
-#include <string.h>
+#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
+# define _X86_
+#endif
 
+#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
+# define _AMD64_
 #endif
 
 #include <string>
-
+#include <windef.h>
+#include <fileapi.h>
+#include <Winbase.h>
 #include <sys/stat.h>
 
-#if defined(WIN32)
-    // Copied from linux libc sys/stat.h:
-    #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
-    #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
-#endif
+// Copied from linux libc sys/stat.h:
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
 
 struct dirent {
     char *d_name;
@@ -38,10 +40,9 @@ struct dirent {
     explicit dirent(const wchar_t *wsFilePath) {
         size_t i;
         auto slen = wcslen(wsFilePath);
-        d_name = static_cast<char*>(malloc(slen + 1));
+        d_name = static_cast<char *>(malloc(slen + 1));
         wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
     }
-
     ~dirent() {
         free(d_name);
     }
@@ -60,6 +61,11 @@ class DIR {
     }
 
 public:
+    DIR(const DIR &other) = delete;
+    DIR(DIR &&other) = delete;
+    DIR& operator=(const DIR &other) = delete;
+    DIR& operator=(DIR &&other) = delete;
+
     explicit DIR(const char *dirPath) : next(nullptr) {
         std::string ws = dirPath;
         if (endsWith(ws, "\\"))
@@ -72,6 +78,7 @@ public:
 
     ~DIR() {
         if (!next) delete next;
+        next = nullptr;
         FindClose(hFind);
     }
 
@@ -96,7 +103,7 @@ public:
 };
 
 
-static DIR *opendir(const char* dirPath) {
+static DIR* opendir(const char *dirPath) {
     auto dp = new DIR(dirPath);
     if (!dp->isValid()) {
         delete dp;
@@ -105,10 +112,27 @@ static DIR *opendir(const char* dirPath) {
     return dp;
 }
 
-static struct dirent *readdir(DIR *dp) {
+static struct direntreaddir(DIR *dp) {
     return dp->nextEnt();
 }
 
 static void closedir(DIR *dp) {
     delete dp;
 }
+
+#ifdef WIN32_LEAN_AND_MEAN_UNDEF
+# undef WIN32_LEAN_AND_MEAN
+# undef WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
+#ifdef NOMINMAX_UNDEF
+# undef NOMINMAX_UNDEF
+# undef NOMINMAX
+#endif
+
+#else
+
+#include <sys/types.h>
+#include <dirent.h>
+
+#endif
index fbcd249..58efdc6 100644 (file)
@@ -27,7 +27,7 @@
 #include <ie_blob.h>
 
 #ifndef UNUSED
-  #ifdef WIN32
+  #if defined (_MSC_VER) && !defined (__clang__)
     #define UNUSED
   #else
     #define UNUSED  __attribute__((unused))
@@ -1120,5 +1120,4 @@ inline void showAvailableDevices() {
     for (const auto& device : devices) {
         std::cout << "  " << device;
     }
-    std::cout << "  HDDL" << std::endl;
 }
index 5edfea8..c6adc5e 100644 (file)
@@ -4,7 +4,8 @@
 
 #pragma once
 
-#include <iostream>
+#include <cstdio>
+#include <sstream>
 #include <iomanip>
 
 /**
  * @brief A ConsoleProgress class provides functionality for printing progress dynamics
  */
 class ConsoleProgress {
-    static const int DEFAULT_DETALIZATION = 20;
+    static const size_t DEFAULT_DETALIZATION = 20;
+    static const size_t DEFAULT_PERCENT_TO_UPDATE_PROGRESS = 1;
 
     size_t total;
-    size_t current = 0;
+    size_t cur_progress = 0;
+    size_t prev_progress = 0;
     bool stream_output;
     size_t detalization;
+    size_t percent_to_update;
 
 public:
     /**
@@ -25,18 +29,19 @@ public:
     * @param _total - maximum value that is correspondent to 100%
     * @param _detalization - number of symbols(.) to use to represent progress
     */
-    explicit ConsoleProgress(size_t _total, bool _stream_output = false, size_t _detalization = DEFAULT_DETALIZATION) :
-            total(_total), detalization(_detalization) {
+    explicit ConsoleProgress(size_t _total,
+                             bool _stream_output = false,
+                             size_t _percent_to_update = DEFAULT_PERCENT_TO_UPDATE_PROGRESS,
+                             size_t _detalization = DEFAULT_DETALIZATION) :
+            total(_total), detalization(_detalization), percent_to_update(_percent_to_update) {
         stream_output = _stream_output;
         if (total == 0) {
             total = 1;
         }
-        std::cout << std::unitbuf;
     }
 
     /**
      * @brief Shows progress with current data. Progress is shown from the beginning of the current line.
-     * @return
      */
     void showProgress() const {
         std::stringstream strm;
@@ -45,28 +50,34 @@ public:
         }
         strm << "Progress: [";
         size_t i = 0;
-        for (; i < detalization * current / total; i++) {
+        for (; i < detalization * cur_progress / total; i++) {
             strm << ".";
         }
         for (; i < detalization; i++) {
             strm << " ";
         }
-        strm << "] " << std::fixed << std::setprecision(2) << 100 * static_cast<float>(current) / total << "% done";
+        strm << "] " << std::setw(3) << 100 * cur_progress / total << "% done";
         if (stream_output) {
-            std::cout << strm.str() << std::endl;
-        } else {
-            std::cout << strm.str() << std::flush;
+            strm << std::endl;
         }
+        std::fputs(strm.str().c_str(), stdout);
+        std::fflush(stdout);
     }
 
     /**
      * @brief Updates current value and progressbar
-     * @param newProgress - new value to represent
      */
-    void updateProgress(size_t newProgress) {
-        current = newProgress;
-        if (current > total) current = total;
-        showProgress();
+    void updateProgress() {
+        if (cur_progress > total) cur_progress = total;
+        size_t prev_percent = 100 * prev_progress / total;
+        size_t cur_percent = 100 * cur_progress / total;
+
+        if (prev_progress == 0 ||
+            cur_progress == total ||
+            prev_percent + percent_to_update <= cur_percent) {
+            showProgress();
+            prev_progress = cur_progress;
+        }
     }
 
     /**
@@ -74,10 +85,11 @@ public:
      * @param add - value to add
      */
     void addProgress(int add) {
-        if (add < 0 && -add > static_cast<int>(current)) {
-            add = -static_cast<int>(current);
+        if (add < 0 && -add > static_cast<int>(cur_progress)) {
+            add = -static_cast<int>(cur_progress);
         }
-        updateProgress(current + add);
+        cur_progress += add;
+        updateProgress();
     }
 
     /**
@@ -85,6 +97,9 @@ public:
      * @return
      */
     void finish() {
-        std::cerr << std::nounitbuf << "\n";
+        std::stringstream strm;
+        strm << std::endl;
+        std::fputs(strm.str().c_str(), stdout);
+        std::fflush(stdout);
     }
 };
index 0d7db2e..90833a5 100644 (file)
@@ -10,8 +10,8 @@ It demonstrates how to use the following Inference Engine API in applications:
 
 There is also an API introduced to crop a ROI object and set it as input without additional memory re-allocation.
 To properly demonstrate this API, it is required to run several networks in pipeline which is out of scope of this sample.
-Please refer to [Security Barrier Camera Demo](./inference-engine/samples/security_barrier_camera_demo/README.md), or
-[Crossroad Camera Demo](./inference-engine/samples/crossroad_camera_demo/README.md) with an example of using of new crop ROI API.
+Please refer to [Security Barrier Camera Demo](./demos/security_barrier_camera_demo/README.md), or
+[Crossroad Camera Demo](./demos/crossroad_camera_demo/README.md) with an example of using of new crop ROI API.
 
 Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
 
index 9d32bd1..0d796de 100644 (file)
@@ -1,8 +1,8 @@
 # Hello Query Device C++ Sample
 
-This topic demonstrates how to run the Hello Query Device sample application, which queries Inference Engine devices and prints their metrics and default configuration values. The sample shows how to use [Query Device API feature](./docs/IE_DG/QueryDeviceAPI.md).
+This topic demonstrates how to run the Hello Query Device sample application, which queries Inference Engine devices and prints their metrics and default configuration values. The sample shows how to use [Query Device API feature](./docs/IE_DG/InferenceEngine_QueryAPI.md).
 > **NOTE:** This topic describes usage of C++ implementation of the Query Device Sample. 
-> For the Python* implementation, refer to [Hello Query Device Python* Sample](./inference-engine/ie_brudges/python/sample/hello_query_device/README.md)
+> For the Python* implementation, refer to [Hello Query Device Python* Sample](./inference-engine/ie_bridges/python/sample/hello_query_device/README.md)
 ## Running
 
 To see quired information, run the following:
index 2370670..4ef6a14 100644 (file)
@@ -3,6 +3,8 @@
 This topic demonstrates how to run the Object Detection sample application, which does inference using object detection
 networks like SSD-VGG on Intel® Processors and Intel® HD Graphics.
 
+> **NOTE:** This topic describes usage of C++ implementation of the Object Detection Sample SSD. For the Python* implementation, refer to [Object Detection Python* Sample SSD](./inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md).
+
 ## How It Works
 
 Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference
index 0046c97..0785ee7 100644 (file)
@@ -48,17 +48,15 @@ will be removed in GNA hardware version 3 and higher.
 #### Execution Modes
 
 Several execution modes are supported via the `-d` flag.  If the device
-is set to `CPU` and the GNA plugin is selected, the GNA device is
-emulated in fast-but-not-bit-exact mode.  If the device is set to
-`GNA_AUTO`, then the GNA hardware is used if available and the driver is
-installed.  Otherwise, the GNA device is emulated in
-fast-but-not-bit-exact mode.  If the device is set to `GNA_HW`, then the
-GNA hardware is used if available and the driver is installed.
+is set to `CPU` mode, then all calculation will be performed  on CPU device
+using CPU Plugin.  If the device is set to `GNA_AUTO`, then the GNA hardware is
+used if available and the driver is installed.  Otherwise, the GNA device is 
+emulated in fast-but-not-bit-exact mode.  If the device is set to `GNA_HW`,
+then the GNA hardware is used if available and the driver is installed.
 Otherwise, an error will occur.  If the device is set to `GNA_SW`, the
 GNA device is emulated in fast-but-not-bit-exact mode.  Finally, if
 the device is set to `GNA_SW_EXACT`, the GNA device is emulated in
 bit-exact mode.
-`GNA_SW_FP32` mode is used for calculation on CPU device using GNA Plugin.
 
 #### Loading and Saving Models
 
@@ -94,7 +92,7 @@ Options:
     -m "<path>"             Required. Path to an .xml file with a trained model (required if -rg is missing).
     -o "<path>"             Optional. Output file name (default name is "scores.ark").
     -l "<absolute_path>"    Required for CPU custom layers. Absolute path to a shared library with the kernel implementations.
-    -d "<device>"           Optional. Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT, GNA_SW_FP32 and HETERO with combination of GNA
+    -d "<device>"           Optional. Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT and HETERO with combination of GNA
      as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. The sample will look for a suitable plugin for device specified.
     -p                      Optional. Plugin name. For example, GPU. If this parameter is set, the sample will look for this plugin only
     -pc                     Optional. Enables performance report
index efc38ca..be52db4 100644 (file)
@@ -706,7 +706,7 @@ int main(int argc, char *argv[]) {
             outputInfo = netBuilder.getNetwork().getOutputsInfo();
         }
 
-        Blob::Ptr ptrOutputBlob = inferRequests[0].inferRequest.GetBlob(cOutputInfo.rbegin()->first);
+        Blob::Ptr ptrOutputBlob = inferRequests.begin()->inferRequest.GetBlob(cOutputInfo.rbegin()->first);
 
         for (auto &item : outputInfo) {
             DataPtr outData = item.second;
@@ -839,7 +839,7 @@ int main(int argc, char *argv[]) {
                             if (!FLAGS_o.empty()) {
                                 outputFrame =
                                         &ptrScores.front() + numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex);
-                                Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first);
+                                Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.rbegin()->first);
                                 auto byteSize = inferRequest.numFramesThisBatch * numScoresPerFrame * sizeof(float);
                                 std::memcpy(outputFrame,
                                             outputBlob->buffer(),
@@ -848,7 +848,7 @@ int main(int argc, char *argv[]) {
 
                             if (!FLAGS_r.empty()) {
                                 Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first);
-                                CompareScores(outputBlob->buffer().as<float *>(),
+                                CompareScores(outputBlob->buffer().as<float*>(),
                                               &ptrReferenceScores[inferRequest.frameIndex *
                                                                   numFrameElementsReference *
                                                                   numBytesPerElementReference],
@@ -876,7 +876,7 @@ int main(int argc, char *argv[]) {
                         ptrInputBlobs.push_back(inferRequest.inferRequest.GetBlob(input.first));
                     }
 
-                    for (size_t i = 0; i < numInputArkFiles; i++) {
+                    for (size_t i = 0; i < numInputArkFiles; ++i) {
                         std::memcpy(ptrInputBlobs[i]->buffer(),
                                     inputFrame[i],
                                     ptrInputBlobs[i]->byteSize());
@@ -890,14 +890,14 @@ int main(int argc, char *argv[]) {
                     frameIndex += numFramesThisBatch;
                     for (size_t j = 0; j < inputArkFiles.size(); j++) {
                         if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) {
-                            int i = frameIndex - FLAGS_cw_l;
-                            if (i > 0 && i < static_cast<int>(numFramesArkFile)) {
+                            int idx = frameIndex - FLAGS_cw_l;
+                            if (idx > 0 && idx < static_cast<int>(numFramesArkFile)) {
                                 inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
-                            } else if (i >= static_cast<int>(numFramesArkFile)) {
-                                inputFrame[j] = &ptrUtterances[0].front() +
+                            } else if (idx >= static_cast<int>(numFramesArkFile)) {
+                                inputFrame[j] = &ptrUtterances[j].front() +
                                         (numFramesArkFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
-                            } else if (i < 0) {
-                                inputFrame[j] = &ptrUtterances[0].front();
+                            } else if (idx <= 0) {
+                                inputFrame[j] = &ptrUtterances[j].front();
                             }
                         } else {
                             inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
@@ -905,7 +905,6 @@ int main(int argc, char *argv[]) {
                     }
                     inferRequestFetched |= true;
                 }
-
                 if (!inferRequestFetched) {
                     std::this_thread::sleep_for(std::chrono::milliseconds(1));
                     continue;
index 63a18b2..02f8669 100644 (file)
@@ -23,7 +23,7 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If
                                      "the sample will look for this plugin only";
 
 /// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_FP32 "
+static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, "
                                             "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
                                             " as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. "
                                             "The sample will look for a suitable plugin for device specified.";
diff --git a/inference-engine/samples/thirdparty/gflags/.gitmodules b/inference-engine/samples/thirdparty/gflags/.gitmodules
deleted file mode 100644 (file)
index aa2072c..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-[submodule "doc"]
-       path = doc
-       url = https://github.com/gflags/gflags.git
-       branch = gh-pages
index bd1793f..63fda2a 100644 (file)
@@ -24,10 +24,10 @@ if (ENABLE_GNA)
     add_subdirectory(gna_plugin)
 endif()
 
-add_subdirectory(inference_engine)
-
 add_subdirectory(hetero_plugin)
 
+add_subdirectory(inference_engine)
+
 set(InferenceEngine_LIBRARIES inference_engine)
 set(InferenceEngine_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/include)
 set(InferenceEngine_SRC_DIRS ${CMAKE_SOURCE_DIR}/src)
index 211f660..b618084 100644 (file)
@@ -14,12 +14,11 @@ ie_add_plugin(NAME ${TARGET_NAME}
               SOURCES ${MAIN_SRC} ${LIBRARY_HEADERS}
               VERSION_DEFINES_FOR cldnn_engine.cpp)
 
-target_link_libraries(${TARGET_NAME} PRIVATE ${INTEL_ITT_LIBS} inference_engine clDNN_shlib pugixml)
+target_link_libraries(${TARGET_NAME} PRIVATE ${INTEL_ITT_LIBS} inference_engine clDNN_lib pugixml)
 
 set (CLDNN_TOP_FOLDER ${IE_MAIN_SOURCE_DIR}/thirdparty/clDNN)
 target_include_directories(${TARGET_NAME} PRIVATE
-        ${CLDNN_TOP_FOLDER}/api
-        ${CLDNN_TOP_FOLDER}/include
+        ${CLDNN_TOP_FOLDER}
         ${IE_MAIN_SOURCE_DIR}/src/inference_engine
         ${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src)
 
index cf863fb..29b3491 100644 (file)
@@ -16,7 +16,7 @@
 
 #include "cldnn_custom_layer.h"
 
-#include <CPP/network.hpp>
+#include <api/network.hpp>
 
 namespace CLDNNPlugin {
 
index e948f29..ee4cd32 100644 (file)
@@ -10,7 +10,7 @@
 #include <map>
 #include <ie_common.h>
 #include "pugixml.hpp"
-#include "CPP/tensor.hpp"
+#include "api/tensor.hpp"
 
 namespace CLDNNPlugin {
 
@@ -54,7 +54,7 @@ public:
     const std::vector<std::string>& GlobalSizeRules()const { return m_globalSizeRules; }
     const std::vector<std::string>& LocalSizeRules()const { return m_localSizeRules; }
     const std::vector<KerenlParam>& KernelParams()const { return m_kernelParams; }
-    const int InputDimSourceIndex() { return m_wgDimInputIdx; }
+    int InputDimSourceIndex() { return m_wgDimInputIdx; }
 
 protected:
     CLDNNCustomLayer() : m_wgDimInputIdx(0) {}
index 8aba309..a43fc91 100644 (file)
@@ -11,6 +11,7 @@
 #include <iostream>
 #include <cmath>
 #include <tuple>
+#include <cctype>
 
 #include "ie_metric_helpers.hpp"
 #include <debug.h>
@@ -132,7 +133,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
     try {
         plugin = make_ie_compatible_plugin(
-                {2, 0,
+                {2, 1,
                  CI_BUILD_NUMBER,
                  "clDNNPlugin"}, std::make_shared<clDNNEngine>());
         return OK;
@@ -233,6 +234,23 @@ Parameter clDNNEngine::GetConfig(const std::string& name, const std::map<std::st
     return result;
 }
 
+auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) {
+    auto ret_str = string;
+    if (!case_sensitive) {
+        std::transform(string.begin(), string.end(), string.begin(), ::tolower);
+        std::transform(substring.begin(), substring.end(), substring.begin(), ::tolower);
+    }
+    auto erase_position = string.rfind(substring);
+    if (erase_position != std::string::npos) {
+        // if space exists before substring remove it also
+        if (std::isspace(string.at(erase_position - 1))) {
+            erase_position--;
+        }
+        return ret_str.substr(0, erase_position);
+    }
+    return ret_str;
+};
+
 Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
     if (name == METRIC_KEY(SUPPORTED_METRICS)) {
         std::vector<std::string> metrics;
@@ -250,7 +268,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
         std::vector<std::string> availableDevices = { "" };
         IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
     } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
-        IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, std::string(engine_info.ocl_device_name));
+        IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, StringRightTrim(engine_info.dev_name, "NEO", false));
     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
         std::vector<std::string> configKeys;
         for (auto opt : _impl->m_config.key_config_map)
index 1fb3190..aa2baa2 100644 (file)
@@ -8,7 +8,7 @@
 #include <map>
 #include <string>
 #include <memory>
-#include <CPP/engine.hpp>
+#include <api/engine.hpp>
 #include <cpp_interfaces/impl/ie_plugin_internal.hpp>
 
 namespace CLDNNPlugin {
index 77fcfe4..7a3ce7a 100644 (file)
@@ -7,8 +7,8 @@
 #include <unordered_set>
 
 #include "ie_metric_helpers.hpp"
-#include <CPP/cldnn_defs.h>
-#include <CPP/data.hpp>
+#include <api/cldnn.hpp>
+#include <api/data.hpp>
 #include <chrono>
 #include <cmath>
 #include <algorithm>
index b1f98c9..928491b 100644 (file)
@@ -6,10 +6,10 @@
 #include <set>
 #include <unordered_set>
 #include <sstream>
-#include <CPP/cldnn_defs.h>
-#include <CPP/network.hpp>
-#include <CPP/profiling.hpp>
-#include <CPP/custom_gpu_primitive.hpp>
+#include <api/cldnn.hpp>
+#include <api/network.hpp>
+#include <api/profiling.hpp>
+#include <api/custom_gpu_primitive.hpp>
 #include <chrono>
 #include <cmath>
 #include <algorithm>
@@ -238,7 +238,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
         layer->type = to_IE_type_name(prim_info.type_id);
         layer->precision = data_type_to_precision(prim_info.output_layout.data_type);
         std::vector<std::string> originalNames{find_origin_layers(prim_info.original_id)};
-        for (auto& fused_id : prim_info.c_fused_ids.cpp_ids)
+        for (auto& fused_id : prim_info.c_fused_ids)
             for (auto& origin_id : find_origin_layers(fused_id))
                 originalNames.push_back(origin_id);
 
@@ -266,7 +266,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
 
         if (filter_const_primitives) {
             // Decrease expected dependencies count if there is a const input without original id in the IR
-            for (auto& dep : prim_info.c_dependencies.cpp_ids) {
+            for (auto& dep : prim_info.c_dependencies) {
                 auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
                     return entry.original_id == dep;
                 });
@@ -290,16 +290,16 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
         for (auto& pi : primitives_info) {
             // extract mutable_data primitives and connect it's dependencies and users directly
             if (pi.type_id == "mutable_data") {
-                if (pi.c_dependencies.cpp_ids.size() == 1 && !pi.c_users.cpp_ids.empty()) {
-                    auto dep = pi.c_dependencies.cpp_ids[0];
-                    auto users = pi.c_users.cpp_ids;
+                if (pi.c_dependencies.size() == 1 && !pi.c_users.empty()) {
+                    auto dep = pi.c_dependencies[0];
+                    auto users = pi.c_users;
                     auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
                         return entry.original_id == dep;
                     });
                     if (it == primitives_info.end())
                         continue;
 
-                    auto& dep_users = it->c_users.cpp_ids;
+                    auto& dep_users = it->c_users;
                     // Remove mutable data from users list
                     dep_users.erase(std::find_if(dep_users.begin(), dep_users.end(), [&](std::string user_id) {
                         return user_id == pi.original_id;
@@ -315,7 +315,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
                         if (it == primitives_info.end())
                             continue;
 
-                        for (auto& d : it->c_dependencies.cpp_ids) {
+                        for (auto& d : it->c_dependencies) {
                             if (d == pi.original_id)
                                 d = dep;
                         }
@@ -334,8 +334,8 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
 
             // Skip mutable_data
             if (pi.type_id == "mutable_data" &&
-                pi.c_dependencies.cpp_ids.size() == 1 &&
-                !pi.c_users.cpp_ids.empty()) {
+                pi.c_dependencies.size() == 1 &&
+                !pi.c_users.empty()) {
                 continue;
             }
         }
@@ -377,7 +377,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
     for (auto& pair : node2layer) {
         auto pi = pair.first;
         auto layer = pair.second;
-        auto user_ids = pi.c_users.cpp_ids;
+        auto user_ids = pi.c_users;
         for (int i = 0; i < user_ids.size(); i++) {
             auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
                 return entry.first.original_id == user_ids[i];
@@ -399,7 +399,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
             }
 
             int in_port_id = 0;
-            for (auto& dep : it->first.c_dependencies.cpp_ids) {
+            for (auto& dep : it->first.c_dependencies) {
                 if (filter_const_primitives) {
                     auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
                         return entry.first.original_id == dep;
@@ -461,16 +461,8 @@ void CLDNNGraph::UpdatePerfStatistics() {
     for (auto &profiledID : profilingIDs) {
         auto& perfCount = perfMap[profiledID].second;
         // Change status if layer wasn't executed by cldnn engine
-        if (perfCount.num == 0 &&
-            executedPrimitives.find(profiledID) == executedPrimitives.end()) {
-            if (allPrimitives.find(profiledID) != allPrimitives.end() &&
-                allPrimitives.at(profiledID) == "_optimized_") {
-                // Layer was marked as optimized by cldnn
-                perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
-            } else {
-                // Layer wasn't run for some reason
-                perfCount.status = InferenceEngineProfileInfo::NOT_RUN;
-            }
+        if (perfCount.num == 0 && executedPrimitives.find(profiledID) == executedPrimitives.end()) {
+            perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
             continue;
         }
 
@@ -546,22 +538,30 @@ void CLDNNGraph::UpdateImplementationsMap() {
 }
 
 void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &result) const {
+    bool combinePrimByIRLayers = false;
     unsigned i = 0;
-    for (auto& profiledID : profilingIDs) {
-        const auto& layerName = perfMap.at(profiledID).first;
-        if (layerName.length() == 0)    // no layer directly associated
-            continue;
+    auto allIds = GetNetwork()->get_all_primitive_org_ids();
+    auto executedPrimitives = GetNetwork()->get_executed_primitives();
+    auto primitivesInfo = GetNetwork()->get_primitives_info();
+
+    auto getFromProfiling = [&](std::string primId) -> bool {
+        const auto& layerName = perfMap.at(primId).first;
+        if (layerName.length() == 0)  // no layer directly associated
+            return false;
+
+        const auto& perfCounter = perfMap.at(primId).second;
+
+        if (!perfCounter.parentPrimitive.empty() && combinePrimByIRLayers)
+            return false;
 
-        const auto& perfCounter = perfMap.at(profiledID).second;
         auto& extPerfEntry = result[layerName];
 
-        // copy layer implementation
+        memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
         if (perfCounter.isCPU) {
             static const std::string cpuExecType("CPU");
-            memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
             cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length());  // Override execType as CPU
         } else {
-            std::string impl = implementationsMap.at(profiledID);
+            std::string impl = implementationsMap.at(primId);
             impl.copy(extPerfEntry.exec_type, impl.length());
         }
 
@@ -570,14 +570,97 @@ void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::Inf
         extPerfEntry.cpu_uSec = perfCounter.cpu_avg();
         extPerfEntry.realTime_uSec = perfCounter.realTime_avg();
 
+        if (combinePrimByIRLayers) {
+            std::string kernelId = "";
+            long long kernelTime = 0;  // used for finding the most complex computation kernel in sub_graph for perf stat
+            for (auto &id : profilingIDs) {
+                const auto &pc = perfMap.at(id).second;
+                if (id != primId && pc.parentPrimitive == primId) {
+                    extPerfEntry.cpu_uSec += pc.cpu_avg();
+                    extPerfEntry.realTime_uSec += pc.realTime_avg();
+                    if (pc.realTime_avg() > kernelTime) {
+                        kernelTime = pc.realTime_avg();
+                        kernelId = id;
+                    }
+                    allIds.erase(std::find(allIds.begin(), allIds.end(), id));
+                }
+            }
+            if (!kernelId.empty())
+                implementationsMap.at(kernelId).copy(extPerfEntry.exec_type, implementationsMap.at(kernelId).length());
+        }
+
         perfCounter.layerType.copy(extPerfEntry.layer_type, perfCounter.layerType.length());
-    }
+        return true;
+    };
+
+    for (auto& primId : allIds) {
+        if (std::find(profilingIDs.begin(), profilingIDs.end(), primId) != profilingIDs.end()) {
+            getFromProfiling(primId);
+        } else if (executedPrimitives.find(primId) != executedPrimitives.end()) {
+            auto event = executedPrimitives.at(primId);
+
+            cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()};
+
+            // Collect timings
+            long long cpuTime = 0;
+            long long deviceTime = 0;
 
-    for (auto& prim : GetNetwork()->get_executed_primitive_ids()) {
-        if (std::find(profilingIDs.begin(), profilingIDs.end(), prim) == profilingIDs.end()) {
-            // TODO: add primitives that was added inside cldnn to perf stat
+            for (auto &interval : cldnnInfo.intervals) {
+                using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
+                auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
+
+                if (interval.name == "submission") {
+                    cpuTime += count;
+                } else if (interval.name == "executing") {
+                    deviceTime += count;
+                } else if (interval.name == "duration") {  // "duration" is used for CPU layers
+                    cpuTime += count;
+                }
+            }
+
+            std::string layerName = primId;
+            if (primId.find(":") != std::string::npos) {
+                layerName = primId.substr(primId.find(":") + 1, primId.length());
+            }
+
+            for (auto& pi : primitivesInfo) {
+                if (pi.original_id == primId) {
+                    if (pi.type_id == "mutable_data")
+                        continue;
+
+                    auto& extPerfEntry = result[layerName];
+
+                    if (pi.is_cpu) {
+                        static const std::string cpuExecType("CPU");
+                        memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
+                        cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length());  // Override execType as CPU
+                    } else {
+                        std::string impl = pi.kernel_id;
+                        impl.copy(extPerfEntry.exec_type, impl.length());
+                    }
+
+                    pi.type_id.copy(extPerfEntry.layer_type, 256);
+                    extPerfEntry.execution_index = i++;
+                    extPerfEntry.status = InferenceEngineProfileInfo::LayerStatus::EXECUTED;
+                    extPerfEntry.cpu_uSec = cpuTime;
+                    extPerfEntry.realTime_uSec = deviceTime;
+
+                    if (pi.type_id == "input_layout") {
+                        const std::string input_string = "Input";
+                        const std::string undef_string = "undef";
+                        input_string.copy(extPerfEntry.layer_type, 256);
+                        undef_string.copy(extPerfEntry.exec_type, 256);
+                    }
+                }
+            }
         }
     }
+
+    // Checking primitives which has been deleted from execution order but added by clDNNPlugin
+    for (auto& primId : profilingIDs)
+        if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
+            getFromProfiling(primId);
+        }
 }
 
 std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {
index 48c414d..aeda853 100644 (file)
 #include "cpp/ie_cnn_network.h"
 #include "debug_options.h"
 #include "inference_engine.hpp"
-#include <CPP/network.hpp>
-#include <CPP/memory.hpp>
-#include <CPP/primitive.hpp>
-#include <CPP/topology.hpp>
-#include <CPP/pooling.hpp>
-#include <CPP/eltwise.hpp>
-#include <CPP/concatenation.hpp>
-#include <CPP/detection_output.hpp>
-#include <CPP/softmax.hpp>
+#include <api/network.hpp>
+#include <api/memory.hpp>
+#include <api/primitive.hpp>
+#include <api/topology.hpp>
+#include <api/pooling.hpp>
+#include <api/eltwise.hpp>
+#include <api/concatenation.hpp>
+#include <api/detection_output.hpp>
+#include <api/softmax.hpp>
+#include <api/upsampling.hpp>
 #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
-#include <CPP/upsampling.hpp>
 #include "cldnn_custom_layer.h"
 #include "cldnn_config.h"
 #include "cldnn_program.h"
index f2b7e10..8065e71 100644 (file)
@@ -6,7 +6,7 @@
 #include <string>
 #include <map>
 #include <functional>
-#include <CPP/detection_output.hpp>  // todo: find a way to remove this
+#include <api/detection_output.hpp>  // todo: find a way to remove this
 #include <description_buffer.hpp>
 #include "cldnn_infer_request.h"
 #include "cldnn_streams_task_executor.h"
@@ -356,7 +356,6 @@ void CLDNNInferRequest::SetBatch(int new_batch) {
 
         size_t offset = 0;
         size_t bsz = single_batch;
-        int b = 0;
 
         // calculate metadata for input buffers
         for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) {
index 6ca467f..f80c76a 100644 (file)
@@ -6,19 +6,19 @@
 #include <vector>
 #include <sstream>
 #include <utility>
-#include <CPP/cldnn_defs.h>
-#include <CPP/data.hpp>
-#include <CPP/mutable_data.hpp>
-#include <CPP/reorder.hpp>
-#include <CPP/fully_connected.hpp>
-#include <CPP/concatenation.hpp>
-#include <CPP/reshape.hpp>
-#include <CPP/permute.hpp>
-#include <CPP/split.hpp>
-#include <CPP/crop.hpp>
-#include <CPP/reverse_sequence.hpp>
-#include <CPP/lstm.hpp>
-#include <CPP/lstm_dynamic.hpp>
+#include <api/cldnn.hpp>
+#include <api/data.hpp>
+#include <api/mutable_data.hpp>
+#include <api/reorder.hpp>
+#include <api/fully_connected.hpp>
+#include <api/concatenation.hpp>
+#include <api/reshape.hpp>
+#include <api/permute.hpp>
+#include <api/split.hpp>
+#include <api/crop.hpp>
+#include <api/reverse_sequence.hpp>
+#include <api/lstm.hpp>
+#include <api/lstm_dynamic.hpp>
 #include "cldnn_program.h"
 
 using namespace InferenceEngine;
@@ -102,8 +102,8 @@ void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine
     topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
     topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
 
-    primitivesToIRLayersMap[inReshapeID] = { layer->name };
-    primitivesToIRLayersMap[permuteID] = { layer->name };
+    addInnerPrimitiveToProfiler(inReshapeID, layer->name, layer);
+    addInnerPrimitiveToProfiler(permuteID, layer->name, layer);
 
     std::string hiddenInResh = inHiddenReshapeID + "_1";
     std::string hiddenInStr = inHiddenReorderID + "_1";
@@ -115,8 +115,11 @@ void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine
     topology.add(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
     topology.add(cldnn::concatenation(concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
 
-    primitivesToIRLayersMap[hiddenInStr] = { layer->name };
-    primitivesToIRLayersMap[cellInStr] = { layer->name };
+    addInnerPrimitiveToProfiler(hiddenInResh, layer->name, layer);
+    addInnerPrimitiveToProfiler(hiddenInStr, layer->name, layer);
+    addInnerPrimitiveToProfiler(cellInResh, layer->name, layer);
+    addInnerPrimitiveToProfiler(cellInStr, layer->name, layer);
+    addInnerPrimitiveToProfiler(concatID, layer->name, layer);
 
     cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
     cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, gemmSz);
@@ -131,25 +134,26 @@ void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine
     topology.add(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
     topology.add(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
     topology.add(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
-                                    0, 0, {}, {}, cldnn_lstm_offset_order_fizo));
+                                    0, 0, {}, {}, cldnn::lstm_weights_order::fizo));
 
-    primitivesToIRLayersMap[lstm_fc_id] = { layer->name };
-    primitivesToIRLayersMap[lstm_elt_id] = { layer->name };
+    addInnerPrimitiveToProfiler(lstm_fc_id, layer->name, layer);
+    addInnerPrimitiveToProfiler(gemmReshapeID, layer->name, layer);
+    addInnerPrimitiveToProfiler(gemmReorderID, layer->name, layer);
+    addInnerPrimitiveToProfiler(lstm_elt_id, layer->name, layer);
 
     cldnn::primitive_id outputHiddenID = layerName;
     topology.add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
+    addInnerPrimitiveToProfiler(outputHiddenID, layer->name, layer);
     cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
     topology.add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
-
-    primitivesToIRLayersMap[outputHiddenID] = { layer->name };
-    primitivesToIRLayersMap[outputCellID] = { layer->name };
+    addInnerPrimitiveToProfiler(outputCellID, layer->name, layer);
 
     // output primitive IDs
     primitiveIDs[outputHiddenID] = outputHiddenID;                                // LSTMCell:LSTMCell - "concat hidden"
     primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = outputHiddenID;   // LSTMCell:LSTMCell:0 - hidden state
     primitiveIDs[outputCellID] = outputCellID;                                    // LSTMCell:LSTMCell:1 - cell state
 
-    profilingIDs.push_back(layerName);
+    addPrimitiveToProfiler(layerName, layer, outputHiddenID);
 }
 
 void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -250,10 +254,10 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
     topology.add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape));
     topology.add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape));
 
-    primitivesToIRLayersMap[inReshapeID] = { layer->name };
-    primitivesToIRLayersMap[permuteID] = { layer->name };
-    primitivesToIRLayersMap[inHiddenReshapeID+"_1"] = { layer->name };
-    primitivesToIRLayersMap[inHiddenReshapeID+"_2"] = { layer->name };
+    addInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
+    addInnerPrimitiveToProfiler(permuteID, layerName, layer);
+    addInnerPrimitiveToProfiler(inHiddenReshapeID+"_1", layerName, layer);
+    addInnerPrimitiveToProfiler(inHiddenReshapeID+"_2", layerName, layer);
 
     for (int i = 0; i < lstm_sequence_len; ++i)
         input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} });
@@ -262,14 +266,12 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
 
     if (permute_input) {
         topology.add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 }));
+        addInnerPrimitiveToProfiler(layerName + "_inputSwap", layerName, layer);
         topology.add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets));
-
-        primitivesToIRLayersMap[layerName + "_inputSwap"] = { layer->name };
-        primitivesToIRLayersMap[inputSplitID] = { layer->name };
     } else {
         topology.add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
-        primitivesToIRLayersMap[inputSplitID] = { layer->name };
     }
+    addInnerPrimitiveToProfiler(inputSplitID, layerName, layer);
 
     cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
     cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, gemmSz);
@@ -290,29 +292,33 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
         if (hiddenStr != "") {
             topology.add(cldnn::concatenation(concatID, { inputSplitID + ":" + get_string_id(seqIdx), hiddenStr },
                             cldnn::concatenation::concatenation_axis::along_x));
+            addInnerPrimitiveToProfiler(concatID, layerName, layer);
             topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : ""));
+            addInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
+            addInnerPrimitiveToProfiler(inputSplitID + ":" + get_string_id(seqIdx), layerName, layer);
         } else {
             topology.add(cldnn::fully_connected(lstm_fc_id, inputSplitID + ":" + get_string_id(seqIdx), weightID, hasBias ? biasID : ""));
+            addInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
         }
 
         topology.add(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
         topology.add(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
         topology.add(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id,
                                             cellStr, 0, 0, {}, {},
-                                            cldnn_lstm_offset_order_fizo));
+                                            cldnn::lstm_weights_order::fizo));
+        addInnerPrimitiveToProfiler(lstm_fc_resh_id, layerName, layer);
+        addInnerPrimitiveToProfiler(lstm_fc_reor_id, layerName, layer);
+        addInnerPrimitiveToProfiler(lstm_elt_id, layerName, layer);
 
         hiddenStr = crop_id + ":hidden";
         cellStr = crop_id + ":cell";
         topology.add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
+        addInnerPrimitiveToProfiler(hiddenStr, layerName, layer);
         output_ids_offsets.push_back(hiddenStr);
 
-        primitivesToIRLayersMap[lstm_fc_id] = { layer->name };
-        primitivesToIRLayersMap[lstm_elt_id] = { layer->name };
-        primitivesToIRLayersMap[hiddenStr] = { layer->name };
-
         if (i < lstm_sequence_len - 1) {
             topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
-            primitivesToIRLayersMap[cellStr] = { layer->name };
+            addInnerPrimitiveToProfiler(cellStr, layerName, layer);
         } else {
             // last hidden state crop (output 2)
             if (layer->outData.size() > 1) {
@@ -325,8 +331,7 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
             if (layer->outData.size() > 2) {
                 topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
                 cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName();
-                primitivesToIRLayersMap[cellStr] = { layer->name };
-                primitiveIDs[cellStr] = cellStr;
+                addInnerPrimitiveToProfiler(cellStr, layerName, layer);
                 primitiveIDs[outputCellID] = cellStr;
             }
         }
@@ -336,16 +341,13 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
 
     if (permute_input) {
         topology.add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f));
+        addInnerPrimitiveToProfiler(layerName + "_outputConcat", layerName, layer);
         topology.add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 }));
-        primitivesToIRLayersMap[layerName + "_outputConcat"] = { layer->name };
     } else {
         topology.add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f));
     }
-
-    primitivesToIRLayersMap[layerName] = { layer->name };
-    primitiveIDs[layerName] = layerName;
     primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = layerName;
-    profilingIDs.push_back(layerName);
+    addPrimitiveToProfiler(layerName, layer);
 }
 
 void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -478,9 +480,15 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
     topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
     topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
 
+    addInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
+    addInnerPrimitiveToProfiler(permuteID, layerName, layer);
+
     topology.add(cldnn::reshape(inHiddenReshapeID + "_1", inputPrimitives[1], hiddenStateShape));
     topology.add(cldnn::reshape(inHiddenReshapeID + "_2", inputPrimitives[2], hiddenStateShape));
 
+    addInnerPrimitiveToProfiler(inHiddenReshapeID + "_1", layerName, layer);
+    addInnerPrimitiveToProfiler(inHiddenReshapeID + "_2", layerName, layer);
+
     cldnn::primitive_id dynID = layerName + "_dynLength";
     cldnn::primitive_id dynReshapeID = layerName + "_dynReshape";
     cldnn::tensor dynShape = { 1, 1, lstm_batch_size, 1 };
@@ -488,10 +496,8 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
     topology.add(cldnn::reshape(dynReshapeID, inputPrimitives[3], dynShape));
     topology.add(cldnn::reorder(dynID, dynReshapeID, dynLayout));
 
-    primitivesToIRLayersMap[inReshapeID] = { layer->name };
-    primitivesToIRLayersMap[permuteID] = { layer->name };
-    primitivesToIRLayersMap[inHiddenReshapeID + "_1"] = { layer->name };
-    primitivesToIRLayersMap[inHiddenReshapeID + "_2"] = { layer->name };
+    addInnerPrimitiveToProfiler(dynReshapeID, layerName, layer);
+    addInnerPrimitiveToProfiler(dynID, layerName, layer);
 
     cldnn::primitive_id inputID = permuteID;
     cldnn::primitive_id prevInputID = permuteID;
@@ -500,14 +506,15 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
         inputID = layerName + "_inputSwap";
         topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
         prevInputID = inputID;
+        addInnerPrimitiveToProfiler(inputID, layerName, layer);
     }
-    primitivesToIRLayersMap[inputID] = { layer->name };
 
     cldnn::primitive_id seq_len_id = layer->name + "seq_lengths";
     if (reverseSeq) {
         inputID = layerName + "_inputReverse";
         topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
         primitivesToIRLayersMap[inputID] = { layer->name };
+        addInnerPrimitiveToProfiler(inputID, layerName, layer);
         prevInputID = inputID;
     }
 
@@ -538,26 +545,25 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
         weightID, recurrentID, outputHiddenID, outputCellID, biasID,
         inHiddenReshapeID + "_1", inHiddenReshapeID + "_2"));
     prevInputID = inputID = dlstmID;
-    primitivesToIRLayersMap[dlstmID] = { layer->name };
+    addInnerPrimitiveToProfiler(dlstmID, layerName, layer);
 
     if (reverseSeq) {
         inputID = layerName + "_outputReverse";
         topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
-        primitivesToIRLayersMap[inputID] = { layer->name };
+        addInnerPrimitiveToProfiler(inputID, layerName, layer);
         prevInputID = inputID;
     }
 
     if (permute_input) {
         inputID = layerName + "_outputSwap";
         topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
-        primitivesToIRLayersMap[inputID] = { layer->name };
+        addInnerPrimitiveToProfiler(inputID, layerName, layer);
         prevInputID = inputID;
     }
 
-    primitiveIDs[layerName] = inputID;
     primitiveIDs[inputID] = inputID;
     primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = inputID;
-    profilingIDs.push_back(layerName);
+    addPrimitiveToProfiler(layerName, layer, inputID);
 }
 
 void Program::CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
index cd8cf98..7c19b50 100644 (file)
@@ -7,52 +7,56 @@
 #include <unordered_set>
 #include <sstream>
 #include <functional>
-#include <CPP/cldnn_defs.h>
-#include <CPP/data.hpp>
-#include <CPP/input_layout.hpp>
-#include <CPP/reorder.hpp>
-#include <CPP/convolution.hpp>
-#include <CPP/binary_convolution.hpp>
-#include <CPP/pooling.hpp>
-#include <CPP/lrn.hpp>
-#include <CPP/fully_connected.hpp>
-#include <CPP/softmax.hpp>
-#include <CPP/activation.hpp>
-#include <CPP/concatenation.hpp>
-#include <CPP/proposal.hpp>
-#include <CPP/roi_pooling.hpp>
-#include <CPP/scale.hpp>
-#include <CPP/crop.hpp>
-#include <CPP/deconvolution.hpp>
-#include <CPP/prior_box.hpp>
-#include <CPP/detection_output.hpp>
-#include <CPP/normalize.hpp>
-#include <CPP/reshape.hpp>
-#include <CPP/batch_norm.hpp>
-#include <CPP/permute.hpp>
-#include <CPP/split.hpp>
-#include <CPP/upsampling.hpp>
-#include <CPP/network.hpp>
-#include <CPP/profiling.hpp>
-#include <CPP/custom_gpu_primitive.hpp>
-#include <CPP/reorg_yolo.hpp>
-#include <CPP/region_yolo.hpp>
-#include <CPP/mutable_data.hpp>
-#include <CPP/max_unpooling.hpp>
-#include <CPP/arg_max_min.hpp>
-#include <CPP/mvn.hpp>
-#include <CPP/tile.hpp>
-#include <CPP/border.hpp>
-#include <CPP/gather.hpp>
-#include <CPP/depth_to_space.hpp>
-#include <CPP/shuffle_channels.hpp>
-#include <CPP/strided_slice.hpp>
-#include <CPP/reverse_sequence.hpp>
-#include <CPP/quantize.hpp>
-#include <CPP/broadcast.hpp>
-#include <CPP/gemm.hpp>
-#include <CPP/reduce.hpp>
-#include <CPP/one_hot.hpp>
+#include <type_traits>
+
+#include <api/cldnn.hpp>
+#include <api/data.hpp>
+#include <api/input_layout.hpp>
+#include <api/reorder.hpp>
+#include <api/convolution.hpp>
+#include <api/binary_convolution.hpp>
+#include <api/pooling.hpp>
+#include <api/lrn.hpp>
+#include <api/fully_connected.hpp>
+#include <api/softmax.hpp>
+#include <api/activation.hpp>
+#include <api/concatenation.hpp>
+#include <api/proposal.hpp>
+#include <api/roi_pooling.hpp>
+#include <api/scale.hpp>
+#include <api/crop.hpp>
+#include <api/deconvolution.hpp>
+#include <api/prior_box.hpp>
+#include <api/detection_output.hpp>
+#include <api/normalize.hpp>
+#include <api/reshape.hpp>
+#include <api/batch_norm.hpp>
+#include <api/permute.hpp>
+#include <api/split.hpp>
+#include <api/upsampling.hpp>
+#include <api/network.hpp>
+#include <api/profiling.hpp>
+#include <api/custom_gpu_primitive.hpp>
+#include <api/reorg_yolo.hpp>
+#include <api/region_yolo.hpp>
+#include <api/mutable_data.hpp>
+#include <api/max_unpooling.hpp>
+#include <api/arg_max_min.hpp>
+#include <api/mvn.hpp>
+#include <api/tile.hpp>
+#include <api/border.hpp>
+#include <api/gather.hpp>
+#include <api/depth_to_space.hpp>
+#include <api/shuffle_channels.hpp>
+#include <api/strided_slice.hpp>
+#include <api/reverse_sequence.hpp>
+#include <api/quantize.hpp>
+#include <api/broadcast.hpp>
+#include <api/gemm.hpp>
+#include <api/reduce.hpp>
+#include <api/one_hot.hpp>
+#include <api/gather_tree.hpp>
+
 #include <chrono>
 #include <cmath>
 #include <algorithm>
@@ -87,18 +91,38 @@ const cldnn::primitive_id Program::m_scalesTag("_cldnn_scales");
 const cldnn::primitive_id Program::m_preCustomLayerTag("_cldnn_custom_preprocess");
 const cldnn::primitive_id Program::m_postCustomLayerTag("_cldnn_custom_postprocess");
 
-static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) {  // todo: add more checks
+static bool isValid(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) {  // todo: add more checks
     if (inputs && layer->insData.size() != inputs) {
-        THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name);
+        return false;
     }
+
     if (layer->_fusedWith) {
-        THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
+        return false;
+    }
+
+    return true;
+}
+
+static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) {
+    if (!isValid(layer, inputs)) {
+        THROW_CLDNN_EXCEPTION("Layer " << layer->name << " is inconsistent");
     }
 }
 
-static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) {
-    if (layer->_fusedWith) {
-        THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
+static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, std::vector<uint32_t> inputs) {  // todo: add more checks
+    bool is_valid = false;
+    if (inputs.empty()) {
+        if (!layer->_fusedWith) {
+            is_valid = true;
+        }
+    } else {
+        for (auto& input : inputs) {
+            is_valid |= isValid(layer, input);
+        }
+    }
+
+    if (!is_valid) {
+        THROW_CLDNN_EXCEPTION("Layer " << layer->name << " is inconsistent");
     }
 }
 
@@ -198,8 +222,7 @@ Program::Program(InferenceEngine::ICNNNetwork& network, std::shared_ptr<const cl
         ICNNNetworkStats* pstats = nullptr;
         StatusCode s = network.getStats(&pstats, nullptr);
 
-        // Check for FP32 main precision as further quantization of FP16 seems pointless and is not supported by normalizer
-        if (s == StatusCode::OK && pstats && !pstats->isEmpty() && network.getPrecision() == Precision::FP32) {
+        if (s == StatusCode::OK && pstats && !pstats->isEmpty()) {
             CNNNetworkInt8Normalizer normalizer;
             normalizer.NormalizeNetwork(network, *pstats);
         }
@@ -478,6 +501,7 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
         { "StridedSlice" , StridedSlice },
         { "ReverseSequence" , ReverseSequence },
         { "BinaryConvolution" , BinaryConvolution },
+        { "FakeQuantize" , Quantize },
         { "Quantize" , Quantize },
         { "Broadcast" , Broadcast },
         { "Squeeze" , Squeeze },
@@ -514,7 +538,9 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
         { "SoftSign" , SoftSign },
         { "Tan" , Tan },
         { "GEMM", Gemm },
-        { "OneHot", OneHot}
+        { "OneHot", OneHot},
+        { "GatherTree", GatherTree},
+        { "Convert", Convert }
     };
     auto it = LayerNameToType.find(str);
     if (it != LayerNameToType.end())
@@ -594,6 +620,23 @@ auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def
     }
 };
 
+template<class Type>
+std::vector<Type> PermuteIEDimsToCldnnOrder(const std::vector<Type>& ie_order, Type value_to_align = 0) {
+    static_assert(std::is_integral<Type>::value, "Integeral required.");
+    std::vector<Type> cldnn_order = ie_order;
+
+    // 1. Align to min. 4 sizes
+    if (cldnn_order.size() < 4)
+        cldnn_order.push_back(value_to_align);
+
+    // 2. Swap spatial positions
+    for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
+        std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
+    }
+
+    return cldnn_order;
+}
+
 cldnn::primitive_id Program::CreatePrimitiveFromBlob(cldnn::topology& topology,
                                                      cldnn::primitive_id primID,
                                                      const InferenceEngine::Blob::Ptr pBlob,
@@ -708,7 +751,7 @@ void Program::CreateWeightAndBiasPrimitives(cldnn::topology& topology,
             weightDimsVec.push_back(TensorValue(convLayer->_kernel[i]));
         }
         outFeatures = convLayer->_out_depth;
-        pWeightsBlob = getBlob(layer, "weights");
+        pWeightsBlob = getBlobOrNull(layer, "weights");
         pBiasBlob = getBlobOrNull(layer, "biases");
         break;
     }
@@ -726,7 +769,7 @@ void Program::CreateWeightAndBiasPrimitives(cldnn::topology& topology,
             weightDimsVec.push_back(TensorValue(deconvLayer->_kernel[i]));
         }
         outFeatures = deconvLayer->_out_depth;
-        pWeightsBlob = getBlob(layer, "weights");
+        pWeightsBlob = getBlobOrNull(layer, "weights");
         pBiasBlob = getBlobOrNull(layer, "biases");
 
         if ((groupSize < outFeatures) || (groupSize < inFeatures))
@@ -752,26 +795,35 @@ void Program::CreateWeightAndBiasPrimitives(cldnn::topology& topology,
         break;
     }
 
+    if (pWeightsBlob == nullptr) {
+        if (layer->insData.size() == 1)
+            THROW_IE_EXCEPTION << "No weights found in weightable layer " + layer->name;
+    }
+
     // create weights primitive
     cldnn::format wFmt = m_defaultFormat;
     if (weightDimsVec.size() > 4)
         wFmt = cldnn::format::bfzyx;
 
-    cldnn::layout weightsLayout = cldnn::layout(
-        DataTypeFromPrecision(pWeightsBlob->getTensorDesc().getPrecision()),
-        wFmt,
-        cldnn::tensor(weightDimsVec));
-    size_t bytesPerGroup = weightsLayout.bytes_count();
-
-    for (unsigned g = 0; g < groupSize; g++) {
-        cldnn::primitive_id weightID = layer_type_name_ID(layer) + m_weightsTag + std::to_string(g);
-        weightID = CreatePrimitiveFromBlob(topology,
-                                           weightID,
-                                           pWeightsBlob,
-                                           weightsLayout,
-                                           g * bytesPerGroup,
-                                           rearrange);
-        weightsPrimID.push_back(weightID);
+    if (pWeightsBlob == nullptr) {
+        auto wei_name = layer_type_name_ID(layer->insData[1].lock()->getCreatorLayer().lock());
+        weightsPrimID.push_back(wei_name);
+    } else {
+        cldnn::layout weightsLayout = cldnn::layout(
+            DataTypeFromPrecision(pWeightsBlob->getTensorDesc().getPrecision()),
+            wFmt,
+            cldnn::tensor(weightDimsVec));
+        size_t bytesPerGroup = weightsLayout.bytes_count();
+        for (unsigned g = 0; g < groupSize; g++) {
+            cldnn::primitive_id weightID = layer_type_name_ID(layer) + m_weightsTag + std::to_string(g);
+            weightID = CreatePrimitiveFromBlob(topology,
+                                               weightID,
+                                               pWeightsBlob,
+                                               weightsLayout,
+                                               g * bytesPerGroup,
+                                               rearrange);
+            weightsPrimID.push_back(weightID);
+        }
     }
 
     // create bias primitive
@@ -779,7 +831,7 @@ void Program::CreateWeightAndBiasPrimitives(cldnn::topology& topology,
         cldnn::layout biasesLayout = cldnn::layout(
             DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()),
             FormatFromLayout(pBiasBlob->getTensorDesc().getLayout()),
-            (cldnn::tensor) cldnn::spatial(TensorValue(outFeatures / groupSize)));
+            (cldnn::tensor) cldnn::feature(TensorValue(outFeatures / groupSize)));
         size_t bytesPerGroup = biasesLayout.bytes_count();
         for (unsigned g = 0; g < groupSize; g++) {
             cldnn::primitive_id biasID = layer_type_name_ID(layer) + m_biasesTag + std::to_string(g);
@@ -790,6 +842,9 @@ void Program::CreateWeightAndBiasPrimitives(cldnn::topology& topology,
                                              g * bytesPerGroup);
             biasesPrimID.push_back(biasID);
         }
+    } else if (layer->insData.size() == 3) {
+        auto bias_name = layer_type_name_ID(layer->insData[2].lock()->getCreatorLayer().lock());
+        biasesPrimID.push_back(bias_name);
     }
 }
 
@@ -824,6 +879,11 @@ void Program::CreateBinaryWeightAndBiasPrimitives(cldnn::topology& topology,
         };
         pWeightsBlob = binaryConvLayer->_weights;
         pBiasBlob = binaryConvLayer->_biases;
+
+        if (pWeightsBlob == nullptr) {
+            if (binaryConvLayer->insData.size() == 1)
+                THROW_IE_EXCEPTION << "No weights found in binary convolution layer " + layer->name;
+        }
         break;
     }
     default:
@@ -831,19 +891,24 @@ void Program::CreateBinaryWeightAndBiasPrimitives(cldnn::topology& topology,
     }
 
     // create weights primitive
-    cldnn::layout weightsLayout = cldnn::layout(
-        cldnn::data_types::bin,
-        cldnn::format::bfyx,
-        cldnn::tensor(weightDimsVec));
-
-    cldnn::primitive_id weightID = layer->name + m_weightsTag;
-    weightID = CreatePrimitiveFromBlob(topology,
-                                       weightID,
-                                       pWeightsBlob,
-                                       weightsLayout,
-                                       0,
-                                       rearrange);
-    weightsPrimID.push_back(weightID);
+    if (pWeightsBlob == nullptr) {
+        auto wei_name = layer_type_name_ID(layer->insData[1].lock()->getCreatorLayer().lock());
+        weightsPrimID.push_back(wei_name);
+    } else {
+        cldnn::layout weightsLayout = cldnn::layout(
+            cldnn::data_types::bin,
+            cldnn::format::bfyx,
+            cldnn::tensor(weightDimsVec));
+
+        cldnn::primitive_id weightID = layer->name + m_weightsTag;
+        weightID = CreatePrimitiveFromBlob(topology,
+                                           weightID,
+                                           pWeightsBlob,
+                                           weightsLayout,
+                                           0,
+                                           rearrange);
+        weightsPrimID.push_back(weightID);
+    }
 
     // create bias primitive
     if (pBiasBlob != nullptr) {
@@ -891,18 +956,13 @@ void Program::CreateScaleWeightsAndBiasesFromBN(cldnn::topology& topology,
         auto varianceData = static_cast<const uint16_t *>(bnLayer->_weights->buffer());
         auto meanData = static_cast<const uint16_t *>(bnLayer->_biases->buffer());
 
-        cldnn_status status = CLDNN_SUCCESS;
         for (size_t i = 0; i < weightsBlob.size(); i++) {
-            auto variance = cldnn_half_to_float(varianceData[i], &status);
-            if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
-            auto mean = cldnn_half_to_float(meanData[i], &status);
-            if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
+            auto variance = cldnn::half_to_float(varianceData[i]);
+            auto mean = cldnn::half_to_float(meanData[i]);
 
             float scale = 1.0f / sqrt(variance + bnLayer->epsilon);
-            weightsData[i] = cldnn_float_to_half(scale, &status);
-            if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
-            biasesData[i] = cldnn_float_to_half((-mean) * scale, &status);
-            if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name);
+            weightsData[i] = cldnn::float_to_half(scale);
+            biasesData[i] = cldnn::float_to_half((-mean) * scale);
         }
         weightsPrimID = CreatePrimitiveFromBlob(topology, weightsPrimID,
                                                 std::make_shared<InferenceEngine::TBlob<uint16_t>>(weightsBlob), blobLayout);
@@ -997,7 +1057,7 @@ void Program::CreateQuantizationPrimitives(cldnn::topology& topology,
 
 void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
     // Initialize a profiling entry
-    InitProfileInfo(layer->name, layer_type_lower(layer));
+    InitProfileInfo(layer->name, layer->type);
 
     // First check for custom layer
     auto customLayer = m_config.customLayers.find(layer->type);
@@ -1144,6 +1204,10 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng
             break;
         case OneHot: CreateOneHotPrimitive(topology, layer);
             break;
+        case Convert: CreateConvertPrimitive(topology, layer);
+            break;
+        case GatherTree: CreateGatherTreePrimitive(topology, layer);
+            break;
         default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
     }
 }
@@ -1202,9 +1266,7 @@ void Program::CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngi
             DataTypeFromPrecision(layerPrecision));
 
         topology.add(inReorderPrim);
-        profilingIDs.push_back(inReorderName);
-        primitivesToIRLayersMap[inReorderName] = { layer->name };
-        primitiveIDs[inReorderName] = inReorderName;
+        addInnerPrimitiveToProfiler(inReorderName, scaleShiftLayerName, layer);
 
         prevLayerName = inReorderName;
     }
@@ -1218,8 +1280,6 @@ void Program::CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngi
     prevLayerName = scaleShiftLayerName;
 
     topology.add(scaleShiftPrim);
-    profilingIDs.push_back(scaleShiftLayerName);
-    primitivesToIRLayersMap[scaleShiftLayerName] = { layer->name };
 
     // Cast output data if it doesn't match operating precision
     if (outPrecision != layerPrecision) {
@@ -1232,14 +1292,12 @@ void Program::CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngi
             DataTypeFromPrecision(outPrecision));
 
         topology.add(outReorderPrim);
-        profilingIDs.push_back(outReorderName);
-        primitivesToIRLayersMap[outReorderName] = { layer->name };
-        primitiveIDs[outReorderName] = outReorderName;
+        addInnerPrimitiveToProfiler(outReorderName, scaleShiftLayerName, layer);
 
         prevLayerName = outReorderName;
     }
 
-    primitiveIDs[scaleShiftLayerName] = prevLayerName;
+    addPrimitiveToProfiler(scaleShiftLayerName, layer, prevLayerName);
 }
 
 void Program::CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer) {
@@ -1328,8 +1386,6 @@ void Program::CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine
                 shift_anchors,
                 normalize);
 
-        primitivesToIRLayersMap[proposalLayerName] = { layer->name };
-        primitiveIDs[proposalLayerName] = proposalLayerName;
         topology.add(proposalPrim);
 
         cldnn::primitive_id proposal_mutable_id_r = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
@@ -1338,7 +1394,7 @@ void Program::CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine
         primitiveIDs[proposal_mutable_id_r] = proposal_mutable_id_r;
         topology.add(argmax_mutable_prim_r);
 
-        profilingIDs.push_back(proposalLayerName);
+        addPrimitiveToProfiler(proposalLayerName, layer);
         return;
     }
 
@@ -1369,10 +1425,8 @@ void Program::CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine
         shift_anchors,
         normalize);
 
-    primitivesToIRLayersMap[proposalLayerName] = { layer->name };
-    primitiveIDs[proposalLayerName] = proposalLayerName;
     topology.add(proposalPrim);
-    profilingIDs.push_back(proposalLayerName);
+    addPrimitiveToProfiler(proposalLayerName, layer);
 }
 
 void Program::CreatePReLUPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1405,25 +1459,19 @@ void Program::CreatePReLUPrimitive(cldnn::topology& topology, InferenceEngine::C
             break;
         case InferenceEngine::Precision::FP16:
         {
-            cldnn_status status = CLDNN_SUCCESS;
-            slope = cldnn_half_to_float(*static_cast<const uint16_t *>(slopeBlob->buffer()), &status);
-            if (status != CLDNN_SUCCESS) {
-                THROW_CLDNN_EXCEPTION("Error converting fp16 value in " << preluLayer->name);
-            }
+            slope = cldnn::half_to_float(*static_cast<const uint16_t *>(slopeBlob->buffer()));
         }
             break;
         default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name);
         }
-        topology.add(cldnn::activation(preluLayerName, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f }));
+        topology.add(cldnn::activation(preluLayerName, inputPrimitives[0], cldnn::activation_func::relu_negative_slope, { slope, 0.f }));
     } else {
         cldnn::primitive_id slopePrimID(preluLayerName + "_" + blobName + m_weightsTag);
         auto map = CreateGenericLayerBlobPrimitives(topology, preluLayer);
-        topology.add(cldnn::activation(preluLayerName, inputPrimitives[0], map.at(slopePrimID), activation_relu_negative_slope));
+        topology.add(cldnn::activation(preluLayerName, inputPrimitives[0], map.at(slopePrimID), cldnn::activation_func::relu_negative_slope));
     }
 
-    primitivesToIRLayersMap[preluLayerName] = { layer->name };
-    primitiveIDs[preluLayerName] = preluLayerName;
-    profilingIDs.push_back(preluLayerName);
+    addPrimitiveToProfiler(preluLayerName, layer);
 }
 
 void Program::CreateBatchNormalizationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer) {
@@ -1442,11 +1490,7 @@ void Program::CreateBatchNormalizationPrimitive(cldnn::topology& topology, Infer
     CreateScaleWeightsAndBiasesFromBN(topology, bnLayer, weightID, biasID);
     auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID);
 
-    primitivesToIRLayersMap[bnLayerName] = { layer->name };
-    primitiveIDs[bnLayerName] = bnLayerName;
     topology.add(scalePrim);
-    profilingIDs.push_back(bnLayerName);
-    return;
 #else
     cldnn::tensor blobTensor(0);
     const auto bnDims = bnLayer->outData[0]->getTensorDesc().getDims();
@@ -1480,11 +1524,9 @@ void Program::CreateBatchNormalizationPrimitive(cldnn::topology& topology, Infer
         varianceID,
         bnLayer->epsilon);
 
-    primitivesToIRLayersMap[bnLayerName] = { layer->name };
-    primitiveIDs[bnLayerName] = bnLayerName;
     topology.add(bnPrim);
-    profilingIDs.push_back(bnLayerName);
 #endif  // _SCALE_BN_OPT
+    addPrimitiveToProfiler(bnLayerName, layer);
 }
 
 void Program::CreateFlattenPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1498,10 +1540,8 @@ void Program::CreateFlattenPrimitive(cldnn::topology& topology, InferenceEngine:
         inputPrimitives[0],
         CldnnTensorFromIEDims(flattenLayer->outData[0]->getTensorDesc().getDims()));
 
-    primitivesToIRLayersMap[flattenLayerName] = { layer->name };
-    primitiveIDs[flattenLayerName] = flattenLayerName;
     topology.add(flattenPrim);
-    profilingIDs.push_back(flattenLayerName);
+    addPrimitiveToProfiler(flattenLayerName, layer);
 }
 
 void Program::CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1528,10 +1568,7 @@ void Program::CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine:
         else
             cldnn_permute_order.push_back(o);
     }
-    // 2. Swap spatial positions
-    for (int i = 0; i < (cldnn_permute_order.size() - 2) / 2; i++) {
-        std::swap(cldnn_permute_order[2 + i], cldnn_permute_order[1 + cldnn_permute_order.size() - (2 + i)]);
-    }
+    cldnn_permute_order = PermuteIEDimsToCldnnOrder(cldnn_permute_order);
 
     std::string permuteLayerName = layer_type_name_ID(layer);
 
@@ -1540,10 +1577,8 @@ void Program::CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine:
         inputPrimitives[0],
         cldnn_permute_order);
 
-    primitivesToIRLayersMap[permuteLayerName] = { layer->name };
-    primitiveIDs[permuteLayerName] = permuteLayerName;
     topology.add(permutePrim);
-    profilingIDs.push_back(permuteLayerName);
+    addPrimitiveToProfiler(permuteLayerName, layer);
 }
 
 void Program::CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1574,8 +1609,8 @@ void Program::CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine:
 
         cldnn::layout outputLayout(DataTypeFromPrecision(outDesc.getPrecision()), outputFormat, outTensor);
         topology.add(cldnn::reorder(reorderId, reshapeInputId, outputLayout));
+        addInnerPrimitiveToProfiler(reorderId, reshapeLayerName, layer);
         reshapeInputId = reorderId;
-        primitivesToIRLayersMap[reorderId] = { layer->name };
     }
 
     auto reshapePrim = cldnn::reshape(
@@ -1583,10 +1618,8 @@ void Program::CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine:
         reshapeInputId,
         outTensor);
 
-    primitivesToIRLayersMap[reshapeLayerName] = { layer->name };
-    primitiveIDs[reshapeLayerName] = reshapeLayerName;
     topology.add(reshapePrim);
-    profilingIDs.push_back(reshapeLayerName);
+    addPrimitiveToProfiler(reshapeLayerName, layer);
 }
 
 void Program::CreateNormalizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1613,10 +1646,8 @@ void Program::CreateNormalizePrimitive(cldnn::topology& topology, InferenceEngin
         across_spatial,
         eps);
 
-    primitivesToIRLayersMap[normLayerName] = { layer->name };
-    primitiveIDs[normLayerName] = normLayerName;
     topology.add(normPrim);
-    profilingIDs.push_back(normLayerName);
+    addPrimitiveToProfiler(normLayerName, layer);
 }
 
 void Program::CreateDetectionOutputPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1670,10 +1701,8 @@ void Program::CreateDetectionOutputPrimitive(cldnn::topology& topology, Inferenc
                                                  clip_before_nms,
                                                  clip_after_nms);
 
-    primitivesToIRLayersMap[detectionLayerName] = { layer->name };
-    primitiveIDs[detectionLayerName] = detectionLayerName;
     topology.add(detectionPrim);
-    profilingIDs.push_back(detectionLayerName);
+    addPrimitiveToProfiler(detectionLayerName, layer);
 }
 
 void Program::CreatePriorBoxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1744,14 +1773,12 @@ void Program::CreatePriorBoxPrimitive(cldnn::topology& topology, InferenceEngine
         offset,
         scale_all_sizes);
 
-    primitivesToIRLayersMap[priorBoxLayerName] = { layer->name };
-    primitiveIDs[priorBoxLayerName] = priorBoxLayerName;
     topology.add(priorBoxPrim);
-    profilingIDs.push_back(priorBoxLayerName);
+    addPrimitiveToProfiler(priorBoxLayerName, layer);
 }
 
 void Program::CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    ValidateLayer(layer, 1);
+    ValidateLayer(layer, {1, 2, 3});
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto deconvLayer = as<InferenceEngine::DeconvolutionLayer *> (layer);
 
@@ -1789,7 +1816,7 @@ void Program::CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceE
 
     std::string deconvLayerName = layer_type_name_ID(layer);
 
-    if (deconvLayer->_group >= 16) {
+    if (deconvLayer->_group >= 16 || layer->insData.size() > 1) {
         auto deconvPrim = cldnn::deconvolution(deconvLayerName,
             inputPrimitives[0],
             weightPrimID,
@@ -1797,8 +1824,6 @@ void Program::CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceE
             deconvLayer->_group,
             stride,
             padding,
-            false,
-            0.0f,
             CldnnTensorFromIEDims(deconvLayer->outData[0]->getTensorDesc().getDims()));
         topology.add(deconvPrim);
     } else {
@@ -1808,14 +1833,10 @@ void Program::CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceE
             biasPrimID,
             stride,
             padding,
-            false,
-            0.0f,
             CldnnTensorFromIEDims(deconvLayer->outData[0]->getTensorDesc().getDims()));
         topology.add(deconvPrim);
     }
-    primitivesToIRLayersMap[deconvLayerName] = { layer->name };
-    primitiveIDs[deconvLayerName] = deconvLayerName;
-    profilingIDs.push_back(deconvLayerName);
+    addPrimitiveToProfiler(deconvLayerName, layer);
 }
 
 void Program::CreateCropPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1858,10 +1879,8 @@ void Program::CreateCropPrimitive(cldnn::topology& topology, InferenceEngine::CN
         refSize,
         offSize);
 
-    primitivesToIRLayersMap[cropLayerName] = { layer->name };
-    primitiveIDs[cropLayerName] = cropLayerName;
     topology.add(cropPrim);
-    profilingIDs.push_back(cropLayerName);
+    addPrimitiveToProfiler(cropLayerName, layer);
 }
 
 void Program::CreateROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1891,10 +1910,8 @@ void Program::CreateROIPoolingPrimitive(cldnn::topology& topology, InferenceEngi
                                              pooled_height,
                                              spatial_scale);
 
-    primitivesToIRLayersMap[roiPoolingLayerName] = { layer->name };
-    primitiveIDs[roiPoolingLayerName] = roiPoolingLayerName;
     topology.add(roiPoolingPrim);
-    profilingIDs.push_back(roiPoolingLayerName);
+    addPrimitiveToProfiler(roiPoolingLayerName, layer);
 }
 
 void Program::CreatePSROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -1954,12 +1971,10 @@ void Program::CreatePSROIPoolingPrimitive(cldnn::topology& topology, InferenceEn
                                                    spatial_bins_y);
         topology.add(psROIPoolingPrim);
     }
-    primitivesToIRLayersMap[psROIPoolingLayerName] = {layer->name};
-    primitiveIDs[psROIPoolingLayerName] = psROIPoolingLayerName;
-    profilingIDs.push_back(psROIPoolingLayerName);
+    addPrimitiveToProfiler(psROIPoolingLayerName, layer);
 }
 
-void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) {
+void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer, CLDNNCustomLayerPtr customLayer) {
     ValidateLayer(layer, 0);
     // todo: handling fusing
     auto genericLayer = as<InferenceEngine::GenericLayer*> (layer);
@@ -2002,14 +2017,15 @@ void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEng
     }
 
     // Handle kernel parameters
-    std::vector<cldnn_arg> kernelParameters;
+    std::vector<cldnn::custom_gpu_primitive::arg_desc> kernelParameters;
     cldnn::format outputFormat(cldnn::format::any);
     for (const auto& param : customLayer->KernelParams()) {
         switch (param.type) {
         case CLDNNCustomLayer::ParamType::Input: {
             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
-            kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
-            kernelParameters[param.paramIndex].index = static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
+            kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_input;
+            kernelParameters[param.paramIndex].index =
+                static_cast<cldnn::custom_gpu_primitive::arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
 
             // Handle input reorder
             if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) {
@@ -2022,10 +2038,8 @@ void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEng
                         param.format,
                         DataTypeFromPrecision(layer->precision));
 
-                    primitivesToIRLayersMap[reorderPrimName] = { layer->name };
                     topology.add(preprocessPrim);
-                    profilingIDs.push_back(reorderPrimName);
-                    InitProfileInfo(reorderPrimName, "Reorder");
+                    addInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(layer), layer);
                     reorderedInputs[param.portIndex] = (reorderPrimName);
                 } else {
                     reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
@@ -2035,17 +2049,17 @@ void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEng
             break;
         case CLDNNCustomLayer::ParamType::Output: {
             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
-            kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_output;
+            kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_output;
             kernelParameters[param.paramIndex].index =
-                static_cast<cldnn_arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
+                static_cast<cldnn::custom_gpu_primitive::arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
             outputFormat = param.format;
         }
             break;
         case CLDNNCustomLayer::ParamType::Data: {
             kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
-            kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input;
+            kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_input;
             kernelParameters[param.paramIndex].index =
-                static_cast<cldnn_arg_index>((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName));
+                static_cast<cldnn::custom_gpu_primitive::arg_index>((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName));
         }
             break;
         default:
@@ -2121,6 +2135,7 @@ void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEng
         gws,
         lws);
 
+    auto prevLayerName = genericLayerName;
     if (outputLayout.format != cldnn::format::any &&
         p_currentOutputs.find(genericLayerName) == p_currentOutputs.end()) {
         // Handle output reorder
@@ -2131,17 +2146,12 @@ void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEng
                 genericLayerName,
                 m_defaultFormat,
                 customPrim.output_layout.data_type));
-        primitivesToIRLayersMap[reorderPrimName] = { layer->name };
-        primitiveIDs[genericLayerName] = reorderPrimName;
-        primitiveIDs[reorderPrimName] = reorderPrimName;
-        profilingIDs.push_back(reorderPrimName);
-        InitProfileInfo(reorderPrimName, "Reorder");
-    } else {
-        primitiveIDs[genericLayerName] = genericLayerName;
+        prevLayerName = reorderPrimName;
+        addInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(layer), layer);
     }
-    primitivesToIRLayersMap[genericLayerName] = { layer->name };
     topology.add(customPrim);
-    profilingIDs.push_back(genericLayerName);
+    addPrimitiveToProfiler(genericLayerName, layer);
+    primitiveIDs[genericLayerName] = prevLayerName;
 }
 
 void Program::CreateSimplerNMSPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -2174,14 +2184,12 @@ void Program::CreateSimplerNMSPrimitive(cldnn::topology& topology, InferenceEngi
         { 0.5f, 1.0f, 2.0f },  // ratios for the SimplerNMS variant
         scale);
 
-    primitivesToIRLayersMap[simpleNMSLayerName] = { layer->name };
-    primitiveIDs[simpleNMSLayerName] = simpleNMSLayerName;
     topology.add(simpleNMSPrim);
-    profilingIDs.push_back(simpleNMSLayerName);
+    addPrimitiveToProfiler(simpleNMSLayerName, layer);
 }
 
 void Program::CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    ValidateEltwiseLayer(layer);
+    ValidateLayer(layer, {});
 
     auto eltwiseLayer = as<InferenceEngine::EltwiseLayer *> (layer);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
@@ -2228,8 +2236,6 @@ void Program::CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine:
     }
 
     topology.add(eltwisePrim);
-    primitivesToIRLayersMap[eltwiseLayerName] = { layer->name };
-    profilingIDs.push_back(eltwiseLayerName);
 
     // Cast output data type if it differs from operation precision
     auto operationPrecision = layer->precision;
@@ -2246,14 +2252,12 @@ void Program::CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine:
             DataTypeFromPrecision(outputPrecision));
 
         topology.add(reorderPrim);
-        primitivesToIRLayersMap[reorderLayerName] = { layer->name };
-        profilingIDs.push_back(reorderLayerName);
-        primitiveIDs[reorderLayerName] = reorderLayerName;
+        addInnerPrimitiveToProfiler(reorderLayerName, eltwiseLayerName, layer);
 
         lastLayerName = reorderLayerName;
     }
 
-    primitiveIDs[eltwiseLayerName] = lastLayerName;
+    addPrimitiveToProfiler(eltwiseLayerName, layer, lastLayerName);
 }
 
 inline cldnn::concatenation::concatenation_axis ConcatAxisFromIEAxis(unsigned axis, unsigned sz) {
@@ -2301,10 +2305,8 @@ void Program::CreateConcatenatePrimitive(cldnn::topology& topology, InferenceEng
         ConcatAxisFromIEAxis(concatLayer->_axis,
                              concatLayer->input().get()->getTensorDesc().getDims().size()));
 
-    primitivesToIRLayersMap[concatLayerName] = { layer->name };
-    primitiveIDs[concatLayerName] = concatLayerName;
     topology.add(concatPrim);
-    profilingIDs.push_back(concatLayerName);
+    addPrimitiveToProfiler(concatLayerName, layer);
 }
 
 void Program::CreateSplitPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -2445,8 +2447,6 @@ void Program::CreateFusedSplitConvMergePrimitive(cldnn::topology& topology, Infe
                                         stride,
                                         padding,
                                         dilation,
-                                        false,
-                                        0.0f,
                                         CldnnTensorFromIEDims(concatLayer->outData[0]->getTensorDesc().getDims()));
 
     layer = concatLayerPtr;
@@ -2473,20 +2473,18 @@ void Program::CreatePowerPrimitive(cldnn::topology& topology, InferenceEngine::C
 
         std::string powerLayerName = layer_type_name_ID(layer);
         std::string linearLayerName = powerLayerName + "_linear_activation";
-        auto linearActivationPrim = cldnn::activation(linearLayerName, inputPrimitives[0], activation_linear, { scale, shift });
+        auto linearActivationPrim = cldnn::activation(linearLayerName, inputPrimitives[0], cldnn::activation_func::linear, { scale, shift });
         topology.add(linearActivationPrim);
-        profilingIDs.push_back(linearLayerName);
-        primitiveIDs[linearLayerName] = linearLayerName;
+        addInnerPrimitiveToProfiler(linearLayerName, powerLayerName, layer);
 
-        auto powActivationPrim = cldnn::activation(powerLayerName, linearLayerName, activation_pow, { power, 0.f });
+        auto powActivationPrim = cldnn::activation(powerLayerName, linearLayerName, cldnn::activation_func::pow, { power, 0.f });
         topology.add(powActivationPrim);
-        profilingIDs.push_back(powerLayerName);
-        primitiveIDs[powerLayerName] = powerLayerName;
+        addPrimitiveToProfiler(powerLayerName, layer);
     } else {
         std::string powerLayerName = layer_type_name_ID(layer);
         if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) {
             if (powerLayer->power == 0.5f) {
-                auto activationPrim = cldnn::activation(powerLayerName, inputPrimitives[0], activation_sqrt);
+                auto activationPrim = cldnn::activation(powerLayerName, inputPrimitives[0], cldnn::activation_func::sqrt);
                 topology.add(activationPrim);
                 profilingIDs.push_back(powerLayerName);
                 primitiveIDs[powerLayerName] = powerLayerName;
@@ -2520,7 +2518,7 @@ void Program::CreatePowerPrimitive(cldnn::topology& topology, InferenceEngine::C
             profilingIDs.push_back(powerLayerName);
 
             if (powerLayer->power == 0.5f) {
-                auto activationPrim = cldnn::activation(powerLayerName + "_sqrt", powerLayerName, activation_sqrt);
+                auto activationPrim = cldnn::activation(powerLayerName + "_sqrt", powerLayerName, cldnn::activation_func::sqrt);
                 topology.add(activationPrim);
                 profilingIDs.push_back(powerLayerName + "_sqrt");
             }
@@ -2537,14 +2535,12 @@ void Program::CreateSoftMaxPrimitive(cldnn::topology& topology, InferenceEngine:
     auto softmaxPrim = cldnn::softmax(softmaxLayerName,
                                       inputPrimitives[0],
                                       SoftmaxDimensionFromIEAxis(softmaxLayer));
-    primitivesToIRLayersMap[softmaxLayerName] = { layer->name };
-    primitiveIDs[softmaxLayerName] = softmaxLayerName;
     topology.add(softmaxPrim);
-    profilingIDs.push_back(softmaxLayerName);
+    addPrimitiveToProfiler(softmaxLayerName, layer);
 }
 
 void Program::CreateFullyConnectedPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    ValidateLayer(layer, 1);
+    ValidateLayer(layer, {1, 2, 3});
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto fcLayer = as<InferenceEngine::FullyConnectedLayer *> (layer);
 
@@ -2586,12 +2582,17 @@ void Program::CreateFullyConnectedPrimitive(cldnn::topology& topology, Inference
         break;
     default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
     }
-    auto weightsBlob = getBlob(layer, "weights");
-    cldnn::layout fcwLayout(
-        DataTypeFromPrecision(weightsBlob->getTensorDesc().getPrecision()),
-        m_defaultFormat,
-        weightsDims);
-    weightsPrimID = CreatePrimitiveFromBlob(topology, weightsPrimID, weightsBlob, fcwLayout);
+    auto weightsBlob = getBlobOrNull(layer, "weights");
+    if (weightsBlob != nullptr) {
+        cldnn::layout fcwLayout(
+            DataTypeFromPrecision(weightsBlob->getTensorDesc().getPrecision()),
+            m_defaultFormat,
+            weightsDims);
+        weightsPrimID = CreatePrimitiveFromBlob(topology, weightsPrimID, weightsBlob, fcwLayout);
+    } else {
+        auto wei_name = layer_type_name_ID(layer->insData[1].lock()->getCreatorLayer().lock());
+        weightsPrimID = wei_name;
+    }
 
     auto inputPrecision = layer->insData[0].lock()->getPrecision();
     auto inputQuantized =
@@ -2612,9 +2613,7 @@ void Program::CreateFullyConnectedPrimitive(cldnn::topology& topology, Inference
         auto fcPrim = cldnn::fully_connected(fcLayerName,
                                              inputPrimitives[0],
                                              weightsPrimID,
-                                             biasesPrimID,
-                                             false,
-                                             0.0f);
+                                             biasesPrimID);
 
         // Add quantization
         if (!wQuantizationPrimID.empty()) {
@@ -2639,6 +2638,7 @@ void Program::CreateFullyConnectedPrimitive(cldnn::topology& topology, Inference
             auto reshapePrim = cldnn::reshape(newWeightsPrimID, weightsPrimID, newShape);
 
             topology.add(reshapePrim);
+            addInnerPrimitiveToProfiler(newWeightsPrimID, fcLayerName, layer);
             weightsPrimID = newWeightsPrimID;
         }
 
@@ -2648,22 +2648,19 @@ void Program::CreateFullyConnectedPrimitive(cldnn::topology& topology, Inference
                                            { biasesPrimID },
                                            cldnn::tensor(1),
                                            cldnn::tensor(0),
-                                           cldnn::tensor(1),
-                                           false,
-                                           0.f);
+                                           cldnn::tensor(1));
 
         convPrim.output_data_type = DataTypeFromPrecision(outputPrecision);
 
         // TODO Fix in clDNN - there is no reason this should be immutable, most other fields are mutable
-        auto& wq = const_cast<std::vector<cldnn::primitive_id>&>(convPrim.weights_quantization_factors.ref());
+        auto& wq = const_cast<std::vector<cldnn::primitive_id>&>(convPrim.weights_quantization_factors);
         wq.insert(wq.end(), wQuantizationPrimID.begin(), wQuantizationPrimID.end());
 
         topology.add(convPrim);
+        addInnerPrimitiveToProfiler(convPrim, fcLayerName, layer);
     }
 
-    primitivesToIRLayersMap[fcLayerName] = { layer->name };
-    primitiveIDs[fcLayerName] = fcLayerName;
-    profilingIDs.push_back(fcLayerName);
+    addPrimitiveToProfiler(fcLayerName, layer);
 }
 
 void Program::CreatePoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -2785,19 +2782,17 @@ void Program::CreateLRNPrimitive(cldnn::topology& topology, InferenceEngine::CNN
         static_cast<float>(lrnLayer->_k),
         lrnLayer->_alpha,
         lrnLayer->_beta,
-        lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel);
+        lrnLayer->_isAcrossMaps ? cldnn::lrn_norm_region_across_channel : cldnn::lrn_norm_region_within_channel);
 
-    primitivesToIRLayersMap[lrnLayerName] = { layer->name };
-    primitiveIDs[lrnLayerName] = lrnLayerName;
     topology.add(lrnPrim);
-    profilingIDs.push_back(lrnLayerName);
+    addPrimitiveToProfiler(lrnLayerName, layer);
 }
 
 void Program::CreateActivationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type) {
     ValidateLayer(layer, 1);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
-    cldnn_activation_additional_params params{ 0.0f, 0.0f };
-    cldnn_activation_func func = cldnn_activation_func_t::activation_none;
+    cldnn::activation_additional_params params{ 0.0f, 0.0f };
+    cldnn::activation_func func = cldnn::activation_func::none;
 
     LayerType activationType;
     if (type == Activation) {
@@ -2829,142 +2824,142 @@ void Program::CreateActivationPrimitive(cldnn::topology& topology, InferenceEngi
     switch (activationType) {
     case TanH:
     {
-        func = cldnn_activation_func_t::activation_hyperbolic_tan;
+        func = cldnn::activation_func::hyperbolic_tan;
         break;
     }
     case ELU:
     {
-        func = cldnn_activation_func_t::activation_elu;
+        func = cldnn::activation_func::elu;
         params.a = layer->GetParamAsFloat("alpha", 1.0f);
         break;
     }
     case Sigmoid:
     {
-        func = cldnn_activation_func_t::activation_logistic;
+        func = cldnn::activation_func::logistic;
         break;
     }
     case ReLU:
     {
         auto negative_slope = layer->GetParamAsFloat("negative_slope", 0.0f);
         if (negative_slope == 0.f) {
-            func = cldnn_activation_func_t::activation_relu;
+            func = cldnn::activation_func::relu;
         } else {
-            func = cldnn_activation_func_t::activation_relu_negative_slope;
+            func = cldnn::activation_func::relu_negative_slope;
             params.a = negative_slope;
         }
         break;
     }
     case ReLU6:
     {
-        func = cldnn_activation_func_t::activation_clamp;
+        func = cldnn::activation_func::clamp;
         params.b = layer->GetParamAsFloat("n", 6.0f);
         break;
     }
     case Clamp:
     {
-        func = cldnn_activation_func_t::activation_clamp;
+        func = cldnn::activation_func::clamp;
         params.a = layer->GetParamAsFloat("min");
         params.b = layer->GetParamAsFloat("max");
         break;
     }
     case Exp:
     {
-        func = cldnn_activation_func_t::activation_exp;
+        func = cldnn::activation_func::exp;
         break;
     }
     case Not:
     {
-        func = cldnn_activation_func_t::activation_not;
+        func = cldnn::activation_func::negation;
         break;
     }
     case Asin:
     {
-        func = cldnn_activation_func_t::activation_asin;
+        func = cldnn::activation_func::asin;
         break;
     }
     case Asinh:
     {
-        func = cldnn_activation_func_t::activation_asinh;
+        func = cldnn::activation_func::asinh;
         break;
     }
     case Acos:
     {
-        func = cldnn_activation_func_t::activation_acos;
+        func = cldnn::activation_func::acos;
         break;
     }
     case Acosh:
     {
-        func = cldnn_activation_func_t::activation_acosh;
+        func = cldnn::activation_func::acosh;
         break;
     }
     case Atan:
     {
-        func = cldnn_activation_func_t::activation_atan;
+        func = cldnn::activation_func::atan;
         break;
     }
     case Atanh:
     {
-        func = cldnn_activation_func_t::activation_atanh;
+        func = cldnn::activation_func::atanh;
         break;
     }
     case Abs:
     {
-        func = cldnn_activation_func_t::activation_abs;
+        func = cldnn::activation_func::abs;
         break;
     }
     case Floor:
     {
-        func = cldnn_activation_func_t::activation_floor;
+        func = cldnn::activation_func::floor;
         break;
     }
     case Ceil:
     {
-        func = cldnn_activation_func_t::activation_ceil;
+        func = cldnn::activation_func::ceil;
         break;
     }
     case Erf:
     {
-        func = cldnn_activation_func_t::activation_erf;
+        func = cldnn::activation_func::erf;
         break;
     }
     case HardSigmoid:
     {
-        func = cldnn_activation_func_t::activation_hard_sigmoid;
+        func = cldnn::activation_func::hard_sigmoid;
         break;
     }
     case Log:
     {
-        func = cldnn_activation_func_t::activation_log;
+        func = cldnn::activation_func::log;
         break;
     }
     case Neg:
     {
-        func = cldnn_activation_func_t::activation_negative;
+        func = cldnn::activation_func::negative;
         break;
     }
     case Reciprocal:
     {
-        func = cldnn_activation_func_t::activation_reciprocal;
+        func = cldnn::activation_func::reciprocal;
         break;
     }
     case Selu:
     {
-        func = cldnn_activation_func_t::activation_selu;
+        func = cldnn::activation_func::selu;
         break;
     }
     case SoftPlus:
     {
-        func = cldnn_activation_func_t::activation_softplus;
+        func = cldnn::activation_func::softplus;
         break;
     }
     case SoftSign:
     {
-        func = cldnn_activation_func_t::activation_softsign;
+        func = cldnn::activation_func::softsign;
         break;
     }
     case Tan:
     {
-        func = cldnn_activation_func_t::activation_tan;
+        func = cldnn::activation_func::tan;
         break;
     }
     default:
@@ -2974,10 +2969,8 @@ void Program::CreateActivationPrimitive(cldnn::topology& topology, InferenceEngi
 
     std::string layerName = layer_type_name_ID(layer);
     auto activationPrimitive = cldnn::activation(layerName, inputPrimitives[0], func, params);
-    primitivesToIRLayersMap[layerName] = { layer->name };
-    primitiveIDs[layerName] = layerName;
     topology.add(activationPrimitive);
-    profilingIDs.push_back(layerName);
+    addPrimitiveToProfiler(layerName, layer);
 }
 
 void Program::CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -2996,7 +2989,8 @@ void Program::CreateUpsamplingPrimitive(cldnn::topology& topology, InferenceEngi
     ValidateLayer(layer, 1);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto upsamplingLayer = as<InferenceEngine::GenericLayer*> (layer);
-    uint32_t scale = upsamplingLayer->GetParamAsUInt("scale");
+
+    auto output_tensor = CldnnTensorFromIEDims(upsamplingLayer->outData[0]->getTensorDesc().getDims());
     uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter");
     std::string sampleType = upsamplingLayer->GetParamAsString("sample_type");
 
@@ -3004,14 +2998,12 @@ void Program::CreateUpsamplingPrimitive(cldnn::topology& topology, InferenceEngi
     auto upsamplingPrim = cldnn::upsampling(
         upsamplingLayerName,
         inputPrimitives[0],
-        scale,
+        output_tensor,
         numFilter,
         UpsamplingTypeFromString(sampleType));
 
-    primitivesToIRLayersMap[upsamplingLayerName] = { layer->name };
-    primitiveIDs[upsamplingLayerName] = upsamplingLayerName;
     topology.add(upsamplingPrim);
-    profilingIDs.push_back(upsamplingLayerName);
+    addPrimitiveToProfiler(upsamplingLayerName, layer);
 }
 
 void Program::CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3020,15 +3012,15 @@ void Program::CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine
     auto resampleLayer = as<InferenceEngine::GenericLayer*> (layer);
 
     size_t inFeatures = 1;
-    float scale = 1.0f;
     std::shared_ptr<Data> insData0 = layer->insData[0].lock();
     IE_ASSERT(insData0 != nullptr);
     auto insData0dims = insData0->getTensorDesc().getDims();
     auto outDims = layer->outData[0]->getTensorDesc().getDims();
+    auto outTensor = CldnnTensorFromIEDims(outDims);
 
     if (insData0dims.size() > 1) {
         inFeatures = insData0dims[1];
-        scale = static_cast<float>(outDims.back()) / static_cast<float>(insData0dims.back());
+        auto scale = static_cast<float>(outDims.back()) / static_cast<float>(insData0dims.back());
         if (scale < 1.0f) {
             THROW_CLDNN_EXCEPTION("Unsupported scale in layer " + layer->name);
         }
@@ -3047,14 +3039,12 @@ void Program::CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine
     auto upsamplingPrim = cldnn::upsampling(
         resampleLayerName,
         inputPrimitives[0],
-        scale,
+        outTensor,
         inFeatures,
         cldnnSampleType);
 
-    primitivesToIRLayersMap[resampleLayerName] = { layer->name };
-    primitiveIDs[resampleLayerName] = resampleLayerName;
     topology.add(upsamplingPrim);
-    profilingIDs.push_back(resampleLayerName);
+    addPrimitiveToProfiler(resampleLayerName, layer);
 }
 
 void Program::CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3083,10 +3073,8 @@ void Program::CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEng
         mask_size,
         do_softmax);
 
-    primitivesToIRLayersMap[YOLOregionLayerName] = { layer->name };
-    primitiveIDs[YOLOregionLayerName] = YOLOregionLayerName;
     topology.add(regionPrim);
-    profilingIDs.push_back(YOLOregionLayerName);
+    addPrimitiveToProfiler(YOLOregionLayerName, layer);
 }
 
 void Program::CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3101,10 +3089,8 @@ void Program::CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngi
         inputPrimitives[0],
         stride);
 
-    primitivesToIRLayersMap[YOLOreorgLayerName] = { layer->name };
-    primitiveIDs[YOLOreorgLayerName] = YOLOreorgLayerName;
     topology.add(reorgPrim);
-    profilingIDs.push_back(YOLOreorgLayerName);
+    addPrimitiveToProfiler(YOLOreorgLayerName, layer);
 }
 
 void Program::CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type) {
@@ -3160,10 +3146,8 @@ void Program::CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngin
         top_k,
         chosen_axis);
 
-    primitivesToIRLayersMap[ArgMaxLayerName] = { layer->name };
-    primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName;
     topology.add(argmaxPrim);
-    profilingIDs.push_back(ArgMaxLayerName);
+    addPrimitiveToProfiler(ArgMaxLayerName, layer);
 }
 
 void Program::CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3252,7 +3236,7 @@ void Program::CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CN
         topology.add(argmax_mutable_prim);
         inputPrimitives.push_back(argmax_mutable_id_w);
 
-        std::string ArgMaxLayerName = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
+        std::string ArgMaxLayerName = layer_type_lower(layer) + ":" + layer->outData[0]->getName();
         auto argmaxPrim = cldnn::arg_max_min(
                 ArgMaxLayerName,
                 inputPrimitives,
@@ -3260,19 +3244,18 @@ void Program::CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CN
                 top_k,
                 chosen_axis,
                 stype,
-                true);
+                true,
+                cldnn::padding({0, 0, 0, 0}, 0),
+                DataTypeFromPrecision(layer->precision));
 
-        primitivesToIRLayersMap[ArgMaxLayerName] = {layer->name};
-        primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName;
         topology.add(argmaxPrim);
 
-        cldnn::primitive_id argmax_mutable_id_r = layer_type_lower(layer) + ":" + layer->outData[0]->getName();
+        cldnn::primitive_id argmax_mutable_id_r = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
         auto argmax_mutable_prim_r = cldnn::mutable_data(argmax_mutable_id_r, {ArgMaxLayerName}, shared_memory);
         primitivesToIRLayersMap[argmax_mutable_id_r] = {layer->name};
         primitiveIDs[argmax_mutable_id_r] = argmax_mutable_id_r;
         topology.add(argmax_mutable_prim_r);
-
-        profilingIDs.push_back(ArgMaxLayerName);
+        addPrimitiveToProfiler(ArgMaxLayerName, layer);
     } else if (layer->outData.size() == 1) {
         std::string ArgMaxLayerName = layer_type_lower(layer) + ":" + layer->outData[0]->getName();
         auto argmaxPrim = cldnn::arg_max_min(
@@ -3282,12 +3265,12 @@ void Program::CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CN
                 top_k,
                 chosen_axis,
                 stype,
-                true);
+                true,
+                cldnn::padding({0, 0, 0, 0}, 0),
+                DataTypeFromPrecision(layer->precision));
 
-        primitivesToIRLayersMap[ArgMaxLayerName] = {layer->name};
-        primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName;
         topology.add(argmaxPrim);
-        profilingIDs.push_back(ArgMaxLayerName);
+        addPrimitiveToProfiler(ArgMaxLayerName, layer);
     } else {
         THROW_IE_EXCEPTION << layer->name << " Incorrect TopK outputs number";
     }
@@ -3333,10 +3316,8 @@ void Program::CreateMaxUnpoolingPrimitive(cldnn::topology& topology, InferenceEn
         (cldnn::tensor) cldnn::spatial(kernel_size, kernel_size),  // size
         (cldnn::tensor) cldnn::spatial(stride, stride) );          // stride
 
-    primitivesToIRLayersMap[UnpoolingLayerName] = { layer->name };
-    primitiveIDs[UnpoolingLayerName] = UnpoolingLayerName;
     topology.add(unpoolingPrim);
-    profilingIDs.push_back(UnpoolingLayerName);
+    addPrimitiveToProfiler(UnpoolingLayerName, layer);
 }
 
 void Program::CreateMVNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3356,10 +3337,8 @@ void Program::CreateMVNPrimitive(cldnn::topology& topology, InferenceEngine::CNN
         normalize_variance,
         eps);
 
-    primitivesToIRLayersMap[MvnLayerName] = { layer->name };
-    primitiveIDs[MvnLayerName] = MvnLayerName;
     topology.add(mvnPrim);
-    profilingIDs.push_back(MvnLayerName);
+    addPrimitiveToProfiler(MvnLayerName, layer);
 }
 
 void Program::CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3398,10 +3377,8 @@ void Program::CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CN
         cldnnAxisFromIE(axis),
         tiles);
 
-    primitivesToIRLayersMap[tileLayerName] = { layer->name };
-    primitiveIDs[tileLayerName] = tileLayerName;
     topology.add(tilePrim);
-    profilingIDs.push_back(tileLayerName);
+    addPrimitiveToProfiler(tileLayerName, layer);
 }
 
 void Program::CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3409,30 +3386,10 @@ void Program::CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNN
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto padLayer = as<InferenceEngine::GenericLayer*> (layer);
 
-    auto PadTensorFromArgs = [](const std::string &s) -> cldnn::tensor {
-        std::stringstream ss(s);
-        std::string item;
-        std::vector<cldnn::tensor::value_type> elems;
-        while (std::getline(ss, item, ',')) {
-            elems.push_back(static_cast<cldnn::tensor::value_type>(std::atoll(item.c_str())));
-        }
-
-        while (elems.size() < 4) {
-            elems.push_back(0);
-        }
-
-        // Swap x and y
-        auto tmp = elems[2];
-        elems[2] = elems[3];
-        elems[3] = tmp;
-
-        return cldnn::tensor(elems, 0);
-    };
-
-    auto pads_begin = PadTensorFromArgs(padLayer->GetParamAsString("pads_begin"));
-    auto pads_end = PadTensorFromArgs(padLayer->GetParamAsString("pads_end"));
+    auto pads_begin  = cldnn::tensor(PermuteIEDimsToCldnnOrder(padLayer->GetParamAsInts("pads_begin")), 0);
+    auto pads_end    = cldnn::tensor(PermuteIEDimsToCldnnOrder(padLayer->GetParamAsInts("pads_end")), 0);
     std::string mode = padLayer->GetParamAsString("pad_mode");
-    float pad_value = padLayer->GetParamAsFloat("pad_value", 0.0f);
+    float pad_value  = padLayer->GetParamAsFloat("pad_value", 0.0f);
 
     cldnn::border_type border_mode;
     if (mode == "constant")
@@ -3455,10 +3412,8 @@ void Program::CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNN
             border_mode,
             pad_value);
 
-    primitivesToIRLayersMap[padLayerName] = { layer->name };
-    primitiveIDs[padLayerName] = padLayerName;
     topology.add(tilePrim);
-    profilingIDs.push_back(padLayerName);
+    addPrimitiveToProfiler(padLayerName, layer);
 }
 
 void Program::AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3486,8 +3441,37 @@ void Program::AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::C
         break;
     case 1: constTensor = cldnn::tensor(1, TensorValue(constDims[0]), 1, 1);
         break;
+    case 0:
+        if (constBlob->size() != 1)
+            THROW_CLDNN_EXCEPTION("Invalid constant blob with 0-dim shape");
+
+        constTensor = cldnn::tensor(1, 1, 1, 1);
+        break;
     default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
     }
+
+    if (GetNextLayers(layer->outData[0]).size() == 1) {
+        auto next = GetNextSingleLayer(layer->outData[0]);
+        auto nextConv = tryAs<InferenceEngine::ConvolutionLayer*>(next);
+        auto nextDeconv = tryAs<InferenceEngine::DeconvolutionLayer*>(next);
+        auto nextDefConv = tryAs<InferenceEngine::DeformableConvolutionLayer*>(next);
+        auto nextBinConv = tryAs<InferenceEngine::BinaryConvolutionLayer*>(next);
+
+        bool isWeights = (nextConv != nullptr && nextConv->insData.size() > 1 && nextConv->insData[1].lock() == layer->outData[0]) ||
+                         (nextDeconv != nullptr && nextDeconv->insData.size() > 1 && nextDeconv->insData[1].lock() == layer->outData[0]) ||
+                         (nextDefConv != nullptr && nextDefConv->insData.size() > 2 && nextDefConv->insData[2].lock() == layer->outData[0]) ||
+                         (nextBinConv != nullptr && nextBinConv->insData.size() > 1 && nextBinConv->insData[1].lock() == layer->outData[0]);
+
+        // TODO: Need to change format of weights that is passed to cldnn
+        // Group dimension should be a part of size tensor and split should be done only inside cldnn (if necessary)
+        // Unless this is implemented we have to divide feature dimension by group size for const inputs that represent weights
+        // in order to have shape expected by cldnn
+        if (isWeights) {
+            auto group = next->GetParamAsUInt("group", 1);
+            constTensor.feature[0] /= group;
+        }
+    }
+
     cldnn::layout constLayout = cldnn::layout(
         DataTypeFromPrecision(layer->blobs.begin()->second->getTensorDesc().getPrecision()),
         FormatFromLayout(constBlob->getTensorDesc().getLayout()),
@@ -3495,12 +3479,11 @@ void Program::AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::C
 
     cldnn::primitive_id initialconstPrimID = layer_type_name_ID(layer);
     cldnn::primitive_id constPrimID = CreatePrimitiveFromBlob(topology, initialconstPrimID, constBlob, constLayout);
-    primitiveIDs[initialconstPrimID] = constPrimID;
-    primitivesToIRLayersMap[initialconstPrimID] = { layer->name };
+    addPrimitiveToProfiler(initialconstPrimID, layer, constPrimID);
 }
 
 void Program::CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    ValidateLayer(layer, 1);
+    ValidateLayer(layer, {1, 2, 3});
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto convLayer = as<InferenceEngine::ConvolutionLayer*>(layer);
     std::string convLayerName = layer_type_name_ID(layer);
@@ -3541,8 +3524,6 @@ void Program::CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEng
                                        stride,
                                        padding,
                                        dilation,
-                                       false,
-                                       0.0f,
                                        CldnnTensorFromIEDims(convLayer->outData[0]->getTensorDesc().getDims()));
 
     if (convLayer->precision == Precision::I8 || convLayer->precision == Precision::U8) {
@@ -3550,7 +3531,7 @@ void Program::CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEng
         convPrim.output_data_type = DataTypeFromPrecision(convLayer->outData[0]->getTensorDesc().getPrecision());
     }
 
-    if (convLayer->_group >= 16) {
+    if (convLayer->_group >= 16 || layer->insData.size() > 1) {
         convPrim.groups = convLayer->_group;
     }
 
@@ -3559,14 +3540,12 @@ void Program::CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEng
 
     if (!wScalePrimID.empty()) {
         // TODO Fix in clDNN - there is no reason this should be immutable, most other fields are mutable
-        auto& wq = const_cast<std::vector<cldnn::primitive_id>&>(convPrim.weights_quantization_factors.ref());
+        auto& wq = const_cast<std::vector<cldnn::primitive_id>&>(convPrim.weights_quantization_factors);
         wq.insert(wq.end(), wScalePrimID.begin(), wScalePrimID.end());
     }
 
     topology.add(convPrim);
-    primitivesToIRLayersMap[convLayerName] = { layer->name };
-    primitiveIDs[convLayerName] = convLayerName;
-    profilingIDs.push_back(convLayerName);
+    addPrimitiveToProfiler(convLayerName, layer);
 }
 
 void Program::CreateDeformableConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3604,9 +3583,7 @@ void Program::CreateDeformableConvolutionPrimitive(cldnn::topology& topology, In
                                               dilation,
                                               CldnnTensorFromIEDims(defConvLayer->outData[0]->getTensorDesc().getDims()));
         topology.add(defConvPrim);
-        primitivesToIRLayersMap[defConvLayerName] = { layer->name };
-        primitiveIDs[defConvLayerName] = defConvLayerName;
-        profilingIDs.push_back(defConvLayerName);
+        addPrimitiveToProfiler(defConvLayerName, layer);
     } else {
         std::string defConvLayerNameInterp = layer_type_name_ID(layer)+"_interp";
         std::string defConvLayerNameConv = layer_type_name_ID(layer);
@@ -3621,9 +3598,7 @@ void Program::CreateDeformableConvolutionPrimitive(cldnn::topology& topology, In
                                                           CldnnTensorFromIEDims(defConvLayer->outData[0]->getTensorDesc().getDims()),
                                                           kernel);
         topology.add(defConvPrimInterp);
-        primitivesToIRLayersMap[defConvLayerNameInterp] = { layer->name };
-        primitiveIDs[defConvLayerNameInterp] = defConvLayerNameInterp;
-        profilingIDs.push_back(defConvLayerNameInterp);
+        addInnerPrimitiveToProfiler(defConvLayerNameInterp, defConvLayerNameConv, layer);
         auto defConvPrim = cldnn::deformable_conv(defConvLayerNameConv,
                                                   defConvLayerNameInterp,
                                                   weightPrimID,
@@ -3631,9 +3606,7 @@ void Program::CreateDeformableConvolutionPrimitive(cldnn::topology& topology, In
                                                   defConvLayer->_group,
                                                   CldnnTensorFromIEDims(defConvLayer->outData[0]->getTensorDesc().getDims()));
         topology.add(defConvPrim);
-        primitivesToIRLayersMap[defConvLayerNameConv] = { layer->name };
-        primitiveIDs[defConvLayerNameConv] = defConvLayerNameConv;
-        profilingIDs.push_back(defConvLayerNameConv);
+        addPrimitiveToProfiler(defConvLayerNameConv, layer);
     }
 }
 
@@ -3670,10 +3643,8 @@ void Program::CreateBinaryConvolutionPrimitive(cldnn::topology& topology, Infere
                                                     binaryConvLayer->_pad_value,
                                                     calc_precision);
 
-    primitivesToIRLayersMap[binaryConvLayerName] = { layer->name };
-    primitiveIDs[binaryConvLayerName] = binaryConvLayerName;
     topology.add(binaryConvPrim);
-    profilingIDs.push_back(binaryConvLayerName);
+    addPrimitiveToProfiler(binaryConvLayerName, layer);
 }
 
 void Program::CreateQuantizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3697,10 +3668,8 @@ void Program::CreateQuantizePrimitive(cldnn::topology& topology, InferenceEngine
                                             output_high_id,
                                             levels);
 
-    primitivesToIRLayersMap[quantizeLayerName] = { layer->name };
-    primitiveIDs[quantizeLayerName] = quantizeLayerName;
     topology.add(quantizationPrim);
-    profilingIDs.push_back(quantizeLayerName);
+    addPrimitiveToProfiler(quantizeLayerName, layer);
 }
 
 void Program::CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3733,10 +3702,26 @@ void Program::CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine::
             cldnnAxisFromIE(axis),
             CldnnTensorFromIEDims(gatherLayer->outData[0]->getTensorDesc().getDims()));
 
-    primitivesToIRLayersMap[gatherLayerName] = { layer->name };
-    primitiveIDs[gatherLayerName] = gatherLayerName;
     topology.add(gatherPrim);
-    profilingIDs.push_back(gatherLayerName);
+    addPrimitiveToProfiler(gatherLayerName, layer);
+}
+
+void CLDNNPlugin::Program::CreateGatherTreePrimitive(cldnn::topology & topology, InferenceEngine::CNNLayerPtr & layer) {
+    ValidateLayer(layer, 4);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto gatherTreeLayer = as<InferenceEngine::GenericLayer*>(layer);
+
+    std::string gatherTreeLayerName = layer_type_name_ID(layer);
+    auto gatherTreePrim = cldnn::gather_tree(
+        gatherTreeLayerName,
+        inputPrimitives[0],
+        inputPrimitives[1],
+        inputPrimitives[2],
+        inputPrimitives[3]);
+
+    topology.add(gatherTreePrim);
+    addPrimitiveToProfiler(gatherTreeLayerName, layer);
 }
 
 void Program::CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3762,10 +3747,8 @@ void Program::CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEn
             inputPrimitives[0],
             blockSize);
 
-    primitivesToIRLayersMap[depthToSpaceName] = { layer->name };
-    primitiveIDs[depthToSpaceName] = depthToSpaceName;
     topology.add(depthToSpacePrim);
-    profilingIDs.push_back(depthToSpaceName);
+    addPrimitiveToProfiler(depthToSpaceName, layer);
 }
 
 void Program::CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3799,10 +3782,8 @@ void Program::CreateShuffleChannelsPrimitive(cldnn::topology& topology, Inferenc
             group,
             axis);
 
-    primitivesToIRLayersMap[shuffleChannelsName] = { layer->name };
-    primitiveIDs[shuffleChannelsName] = shuffleChannelsName;
     topology.add(shuffleChannelsPrim);
-    profilingIDs.push_back(shuffleChannelsName);
+    addPrimitiveToProfiler(shuffleChannelsName, layer);
 }
 
 void Program::CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3824,10 +3805,8 @@ void Program::CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEn
             inputPrimitives[0], inputPrimitives[1], inputPrimitives[2], inputPrimitives[3],
             begin_mask, end_mask, new_axis_mask, shrink_axis_mask);
 
-    primitivesToIRLayersMap[stridedSliceLayerName] = { layer->name };
-    primitiveIDs[stridedSliceLayerName] = stridedSliceLayerName;
     topology.add(stridedSlicePrim);
-    profilingIDs.push_back(stridedSliceLayerName);
+    addPrimitiveToProfiler(stridedSliceLayerName, layer);
 }
 
 void Program::CreateReverseSequencePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3869,10 +3848,8 @@ void Program::CreateReverseSequencePrimitive(cldnn::topology& topology, Inferenc
             seq_axis,
             batch_axis);
 
-    primitivesToIRLayersMap[reverseSequenceLayerName] = { layer->name };
-    primitiveIDs[reverseSequenceLayerName] = reverseSequenceLayerName;
     topology.add(reverseSequencePrim);
-    profilingIDs.push_back(reverseSequence->name);
+    addPrimitiveToProfiler(reverseSequenceLayerName, layer);
 }
 
 void Program::CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3887,9 +3864,8 @@ void Program::CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngin
             inputPrimitives[0],
             CldnnTensorFromIEDims(broadcast->outData[0]->getTensorDesc().getDims()));
 
-    primitiveIDs[broadcastLayerName] = broadcastLayerName;
     topology.add(broadcastPrim);
-    profilingIDs.push_back(broadcast->name);
+    addPrimitiveToProfiler(broadcastLayerName, layer);
 }
 
 void Program::CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -3933,9 +3909,7 @@ void Program::CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CN
             auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
 
             topology.add(reorderPrim);
-            primitivesToIRLayersMap[reorderName] = { layer->name };
-            profilingIDs.push_back(reorderName);
-            primitiveIDs[reorderName] = reorderName;
+            addInnerPrimitiveToProfiler(reorderName, gemmLayerName, layer);
 
             inputPrimitives[i] = reorderName;
         }
@@ -3952,9 +3926,7 @@ void Program::CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CN
             auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
 
             topology.add(reshapePrim);
-            primitivesToIRLayersMap[reshapeName] = { layer->name };
-            profilingIDs.push_back(reshapeName);
-            primitiveIDs[reshapeName] = reshapeName;
+            addInnerPrimitiveToProfiler(reshapeName, gemmLayerName, layer);
 
             inputPrimitives[i] = reshapeName;
         }
@@ -3975,8 +3947,6 @@ void Program::CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CN
         beta);
 
     topology.add(gemmPrim);
-    primitivesToIRLayersMap[gemmLayerName] = { layer->name };
-    profilingIDs.push_back(gemmLayerName);
 
     auto lastLayerName = gemmLayerName;
 
@@ -3987,14 +3957,12 @@ void Program::CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CN
         auto outReshapePrim = cldnn::reshape(outReshapeName, gemmLayerName, outputShape);
 
         topology.add(outReshapePrim);
-        primitivesToIRLayersMap[outReshapeName] = { layer->name };
-        profilingIDs.push_back(outReshapeName);
-        primitiveIDs[outReshapeName] = outReshapeName;
+        addInnerPrimitiveToProfiler(outReshapeName, gemmLayerName, layer);
 
         lastLayerName = outReshapeName;
     }
 
-    primitiveIDs[gemmLayerName] = lastLayerName;
+    addPrimitiveToProfiler(gemmLayerName, layer, lastLayerName);
 }
 
 
@@ -4087,9 +4055,8 @@ void Program::CreateReducePrimitive(cldnn::topology& topology, InferenceEngine::
             axes,
             static_cast<int32_t>(reduce->keep_dims));
 
-    primitiveIDs[reduceLayerName] = reduceLayerName;
     topology.add(reducePrim);
-    profilingIDs.push_back(reduce->name);
+    addPrimitiveToProfiler(reduceLayerName, layer);
 }
 
 void Program::CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@@ -4125,9 +4092,24 @@ void Program::CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::
             on_value,
             off_value);
 
-    primitiveIDs[oneHotLayerName] = oneHotLayerName;
     topology.add(oneHotPrim);
-    profilingIDs.push_back(oneHot->name);
+    addPrimitiveToProfiler(oneHotLayerName, layer);
+}
+
+void Program::CreateConvertPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
+    ValidateLayer(layer, 1);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+
+    auto precisionParam = layer->GetParamAsString("precision");
+    auto outPrecision = Precision::FromStr(precisionParam);
+    auto outDataType = DataTypeFromPrecision(outPrecision);
+
+    auto name = layer_type_name_ID(layer);
+    auto prim = cldnn::reorder(name, inputPrimitives[0], cldnn::format::any, outDataType);
+
+    topology.add(prim);
+    addPrimitiveToProfiler(name, layer);
 }
 
 bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
@@ -4258,7 +4240,7 @@ void Program::AddInputPrimitive(cldnn::topology& topology, InferenceEngine::Inpu
     inputLayout.format = inputFormat;
     inputLayout.size = inputLayout.size.transform(inputFormat, 1);
     inputLayout.data_type = DataTypeFromPrecision(inputPrecision);
-    auto preprocessPrimID = inputName + m_preProcessTag;
+    auto preprocessPrimID = "reorder:" + inputName + m_preProcessTag;
 
     if ((meanChannels > 0) &&
         (meanChannels != inputLayout.size.feature[0])) {
@@ -4277,12 +4259,11 @@ void Program::AddInputPrimitive(cldnn::topology& topology, InferenceEngine::Inpu
                 }
             }
             topology.add(cldnn::reorder(preprocessPrimID, inputName, inputLayout, meanValues));
-            primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
+            InitProfileInfo(preprocessPrimID, "reorder");
+            primitiveIDs[preprocessPrimID] = preprocessPrimID;
             profilingIDs.push_back(preprocessPrimID);
-            InitProfileInfo(preprocessPrimID, "Reorder");
-        }
             break;
-
+        }
         case MEAN_IMAGE: {
             IE_ASSERT(meanChannels);
             // first merge all mean values to a single blob
@@ -4325,9 +4306,9 @@ void Program::AddInputPrimitive(cldnn::topology& topology, InferenceEngine::Inpu
                                         inputName,
                                         inputLayout,
                                         meanBlobID));
-            primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
+            InitProfileInfo(preprocessPrimID, "reorder");
+            primitiveIDs[preprocessPrimID] = preprocessPrimID;
             profilingIDs.push_back(preprocessPrimID);
-            InitProfileInfo(preprocessPrimID, "Reorder");
             break;
         }
         default: THROW_CLDNN_EXCEPTION("Invalid mean variant in input " + inputName);
@@ -4388,7 +4369,7 @@ void Program::AddOutputPrimitive(cldnn::topology& topology, std::string outputNa
     else
         outLayerName += outputCreator->name;
 
-    auto outputReorderID = outputName + m_postProcessTag;
+    auto outputReorderID = "reorder:" + outputName + m_postProcessTag;
     Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision;
 
     // Find correct output ID. Start with name stored in IR.
@@ -4408,9 +4389,10 @@ void Program::AddOutputPrimitive(cldnn::topology& topology, std::string outputNa
     topology.add(cldnn::reorder(outputReorderID, outputID,
         FormatFromLayout(outputData->getLayout()),
         DataTypeFromPrecision(precision)));
-    primitiveIDs[outputName] = outputReorderID;
+    InitProfileInfo(outputReorderID, "reorder");
+    primitiveIDs[outputReorderID] = outputReorderID;
     profilingIDs.push_back(outputReorderID);
-    InitProfileInfo(outputReorderID, "Reorder");
+    primitiveIDs[outputName] = outputReorderID;
 
     outputDims[outputName] = outputDesc.getDims();
     prevPrimitiveIDs[outputReorderID] = {outputName};
@@ -4429,11 +4411,7 @@ void Program::AddSingleValuePrimitive(cldnn::topology& topology, cldnn::primitiv
     case cldnn::data_types::f16:
     {
         auto tmpPointer = primMem.pointer<uint16_t>();  // implicitly maps buffer - unmap in destructor
-        cldnn_status status = CLDNN_SUCCESS;
-        tmpPointer[0] = cldnn_float_to_half(value, &status);
-        if (status != CLDNN_SUCCESS) {
-            THROW_CLDNN_EXCEPTION("Error converting value to fp16.");
-        }
+        tmpPointer[0] = cldnn::float_to_half(value);
     }
         break;
     default:
@@ -4544,7 +4522,7 @@ Program::GenericBlobMap Program::CreateGenericLayerBlobPrimitives(cldnn::topolog
 
         cldnn::layout genericLayout(DataTypeFromPrecision(blob.second->getTensorDesc().getPrecision()),
                                     m_defaultFormat,
-                                    (cldnn::tensor) cldnn::spatial(TensorValue(blobDims.back())));
+                                    (cldnn::tensor) cldnn::feature(TensorValue(blobDims.back())));
 
         cldnn::primitive_id initialWeightID = layer_type_name_ID(layer) + "_" + blob.first + m_weightsTag;
         cldnn::primitive_id weightID = CreatePrimitiveFromBlob(topology, initialWeightID, blob.second, genericLayout);
@@ -4563,17 +4541,41 @@ void Program::ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* lay
     }
 }
 
+void Program::addPrimitiveToProfiler(cldnn::primitive_id id, const InferenceEngine::CNNLayerPtr &layer,
+                                     cldnn::primitive_id customOutputId) {
+    primitivesToIRLayersMap[id] = { layer->name };
+    primitiveIDs[id] = customOutputId.empty() ? id : customOutputId;
+    profilingIDs.push_back(id);
+}
+
+void Program::addInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
+                                          const InferenceEngine::CNNLayerPtr &layer) {
+    InitProfileInfo(id, layer_type_lower(layer), false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, parentId);
+    primitivesToIRLayersMap[id] = { layer->name };
+    primitiveIDs[id] = id;
+    profilingIDs.push_back(id);
+}
+
 void Program::InitProfileInfo(const std::string& layerName,
                               const std::string& layerType,
                               bool isCPU,
-                              InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) {
-    perfMap[layerType + ":" + layerName].first = layerName;
-    auto& perfEntry = perfMap[layerType + ":" + layerName].second;
+                              InferenceEngine::InferenceEngineProfileInfo::LayerStatus status, std::string parentId) {
+    std::string layer_type_lower = layerType;
+    for (auto& c : layer_type_lower)
+        c = tolower(c);
+
+    std::string name = layerName;
+    if (name.find(layerType + ":") != std::string::npos) {
+        name = layerName.substr(layerName.find(":") + 1, layerName.length());
+    }
+
+    perfMap[layer_type_lower + ":" + name].first = name;
+    auto& perfEntry = perfMap[layer_type_lower + ":" + name].second;
     perfEntry.layerType = layerType;
     perfEntry.status = status;
     perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0;
     perfEntry.isCPU = isCPU;
-    perfEntry.status = status;
+    perfEntry.parentPrimitive = parentId;
 }
 
 }  // namespace CLDNNPlugin
index 25c7310..325670e 100644 (file)
 #include "cldnn_custom_layer.h"
 #include "cldnn_config.h"
 
-#include <CPP/engine.hpp>
-#include <CPP/memory.hpp>
-#include <CPP/topology.hpp>
-#include <CPP/primitive.hpp>
-#include <CPP/softmax.hpp>
-#include <CPP/upsampling.hpp>
-#include <CPP/pooling.hpp>
-#include <CPP/eltwise.hpp>
-#include <CPP/concatenation.hpp>
-#include <CPP/detection_output.hpp>
+#include <api/engine.hpp>
+#include <api/memory.hpp>
+#include <api/topology.hpp>
+#include <api/primitive.hpp>
+#include <api/softmax.hpp>
+#include <api/upsampling.hpp>
+#include <api/pooling.hpp>
+#include <api/eltwise.hpp>
+#include <api/concatenation.hpp>
+#include <api/detection_output.hpp>
 
 #ifndef NDEBUG
 #include <iostream>
@@ -88,6 +88,7 @@ struct PerfCounter {
     uint64_t cpu_uSec;
     uint32_t num;
     std::string layerType;
+    std::string parentPrimitive;
 
 public:
     PerfCounter() : realTime_uSec(0), cpu_uSec(0), num(0),
@@ -122,6 +123,11 @@ public:
     const std::map<std::string, cldnn::layout>& getInputLayouts() const { return inputLayouts; }
     int GetMaxBatchSizeForSingleProgram();
 
+    void addPrimitiveToProfiler(cldnn::primitive_id id, const InferenceEngine::CNNLayerPtr &layer,
+                                cldnn::primitive_id customOutputId = "");
+
+    void addInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
+                                     const InferenceEngine::CNNLayerPtr &layer);
 
     // internal types
     enum LayerType {
@@ -206,6 +212,8 @@ public:
         Tan,
         Gemm,
         OneHot,
+        Convert,
+        GatherTree,
         NO_TYPE
     };
     using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
@@ -223,7 +231,8 @@ private:
                          const std::string& layerType,
                          bool isCPU = false,
                          InferenceEngine::InferenceEngineProfileInfo::LayerStatus status
-                         = InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+                         = InferenceEngine::InferenceEngineProfileInfo::EXECUTED,
+                         std::string parentId = "");
 
     static const cldnn::primitive_id m_preProcessTag;
     static const cldnn::primitive_id m_weightsTag;
@@ -357,6 +366,8 @@ private:
     void CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
     void CreateReducePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
     void CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
+    void CreateGatherTreePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
+    void CreateConvertPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 };
 
 }  // namespace CLDNNPlugin
index c0767c2..ada1385 100644 (file)
@@ -338,4 +338,4 @@ std::string DebugOptions::IELayoutToString(InferenceEngine::Layout layout) {
     }
 }
 
-};  // namespace CLDNNPlugin
\ No newline at end of file
+};  // namespace CLDNNPlugin
index 1dad92e..7fab969 100644 (file)
@@ -12,9 +12,9 @@
 #include <map>
 #include <algorithm>
 #include "cpp/ie_cnn_network.h"
-#include <CPP/memory.hpp>
-#include <CPP/primitive.hpp>
-#include <CPP/network.hpp>
+#include <api/memory.hpp>
+#include <api/primitive.hpp>
+#include <api/network.hpp>
 
 // Debugging options flags
 // #define _DEBUG_LAYER_CONTENT
@@ -68,11 +68,10 @@ protected:
         auto ptr = mem.pointer<T>();
         auto data = ptr.data();  // +offset;
         auto elements = std::min(layout.count(), numElements);
-        cldnn::status_t status = CLDNN_SUCCESS;
         for (size_t i = 0; i < elements;) {
             // size_t linearAddress = ... // todo calc linear with pitches
             std::cout << std::setprecision(10)
-                      << ((layout.data_type == cldnn::data_types::f32) ? data[i] : cldnn_half_to_float(uint16_t(data[i]), &status))
+                      << ((layout.data_type == cldnn::data_types::f32) ? data[i] : cldnn::half_to_float(uint16_t(data[i])))
                       << ", ";
             i++;
             for (auto& pitch : pitches) {
@@ -85,4 +84,4 @@ protected:
     }
 };
 
-};  // namespace CLDNNPlugin
\ No newline at end of file
+};  // namespace CLDNNPlugin
index 91a26ec..e05e80e 100644 (file)
@@ -28,6 +28,7 @@ when cross-compiling this library for another platform.
  * LogSoftmax
  * Math (Abs, Acos, Acosh, Asin, Asinh, Atan, Atanh, Ceil, Cos, Cosh, Erf, Floor, HardSigmoid, Log, Neg, Reciprocal, Selu, Sign, Sin, Sinh, Softplus, Softsign, Tan)
  * MVN
+ * NonMaxSuppression
  * Normalize
  * OneHot
  * Pad
@@ -42,6 +43,7 @@ when cross-compiling this library for another platform.
  * ReorgYolo
  * Resample
  * ReverseSequence
+ * ScatterUpdate
  * ShuffleChannels
  * SimplerNMS
  * SpaceToDepth
index 8de54b3..a76b825 100644 (file)
@@ -10,6 +10,7 @@
 #include <vector>
 #include <cassert>
 #include "ie_parallel.hpp"
+#include "common/simple_copy.h"
 
 namespace InferenceEngine {
 namespace Extensions {
@@ -29,19 +30,7 @@ public:
             if (shape_dims.size() > 1)
                 THROW_IE_EXCEPTION << layer->name << " Shape vector should be 1 dimension";
 
-            if (layer->insData[BROADCAST_SHAPE].lock()->getTensorDesc().getPrecision() != Precision::I32)
-                THROW_IE_EXCEPTION << layer->name << " Shape vector should be I32!";
-
-            if (!(layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
-                !(layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
-                THROW_IE_EXCEPTION << layer->name <<
-                    " Input and output tensors should have same precision and only FP32 and I32 are supported!";
-            }
-
-            src_dims = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getDims();
-            srcStrides = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getBlockingDesc().getStrides();
+            data_size = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getPrecision().size();
             addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
                              { DataConfigurator(ConfLayout::PLN) });
         } catch (InferenceEngine::details::InferenceEngineException &ex) {
@@ -50,10 +39,15 @@ public:
     }
 
     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        int32_t* shape_dims = inputs[BROADCAST_SHAPE]->cbuffer().as<int32_t *>() +
-                              inputs[BROADCAST_SHAPE]->getTensorDesc().getBlockingDesc().getOffsetPadding();
         size_t shape_size = (inputs[BROADCAST_SHAPE]->getTensorDesc().getDims())[0];
         SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
+        SizeVector src_dims = inputs[BROADCAST_INPUT]->getTensorDesc().getDims();
+        SizeVector srcStrides = inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getStrides();
+
+        if (!src_dims.size())
+            src_dims = SizeVector(1, 1);
+        if (!srcStrides.size())
+            srcStrides = SizeVector(1, 1);
 
         if (dst_dims.size() != shape_size) {
             if (resp) {
@@ -71,33 +65,11 @@ public:
             return PARAMETER_MISMATCH;
         }
 
-        size_t i;
-        for (i = 0; i < dst_dims.size(); i++) {
-            if (static_cast<int>(dst_dims[i]) != shape_dims[i]) {
-                if (resp) {
-                    std::string errorMsg = "Output tensor dimension size mismatch";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return PARAMETER_MISMATCH;
-            }
-        }
-
-        size_t prefix_size = dst_dims.size() - src_dims.size();
-        for (i = 0; i < src_dims.size(); i++) {
-            if (src_dims[i] != 1 &&
-                    static_cast<int>(src_dims[i]) != shape_dims[i + prefix_size]) {
-                if (resp) {
-                    std::string errorMsg = "In/Output corresponding dimension must have the same value, or Input dimension is equal to 1";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return PARAMETER_MISMATCH;
-            }
-        }
-
         InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides();
         InferenceEngine::SizeVector src_aligned(dst_dims.size());
         InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
-        for (i = 0; i < dst_dims.size(); i++) {
+        size_t prefix_size = dst_dims.size() - src_dims.size();
+        for (size_t i = 0; i < dst_dims.size(); i++) {
             if (i < prefix_size) {
                 src_aligned[i] = 1;
                 srcStrides_aligned[i] = srcStrides[0];
@@ -108,71 +80,31 @@ public:
         }
 
         size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+        const uint8_t *src_data = inputs[BROADCAST_INPUT]->cbuffer().as<const uint8_t *>() +
+                                inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        uint8_t* dst_data = outputs[0]->cbuffer().as<uint8_t *>() +
+                          outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            size_t i, src_idx, start = 0, end = 0;
+            SizeVector counters(dst_dims.size(), 0);
+            splitter(work_amount_dst, nthr, ithr, start, end);
+            for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
+                counters[j] = i % dst_dims[j];
+                i /= dst_dims[j];
+            }
+            for (size_t iwork = start * data_size; iwork < end * data_size; iwork += data_size) {
+                for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
+                    src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
 
-        switch (outputs[0]->getTensorDesc().getPrecision()) {
-        case Precision::FP32: {
-            const float *src_data = inputs[BROADCAST_INPUT]->cbuffer().as<const float *>() +
-                                    inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            float* dst_data = outputs[0]->cbuffer().as<float *>() +
-                              outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-            parallel_nt(0, [&](const int ithr, const int nthr) {
-                size_t i, src_idx, start = 0, end = 0;
-                SizeVector counters(dst_dims.size(), 0);
-                splitter(work_amount_dst, nthr, ithr, start, end);
-                for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
-                    counters[j] = i % dst_dims[j];
-                    i /= dst_dims[j];
-                }
-                for (size_t iwork = start; iwork < end; ++iwork) {
-                    for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-                        src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
-
-                    dst_data[iwork] = src_data[src_idx];
-
-                    for (int j = dst_dims.size() - 1; j >= 0; j--) {
-                        counters[j] = (counters[j] + 1) % dst_dims[j];
-                        if (counters[j] != 0) break;
-                    }
-                }
-            });
-        }
-        break;
-        case Precision::I32: {
-            const int32_t *src_data = inputs[BROADCAST_INPUT]->cbuffer().as<const int32_t *>() +
-                                      inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            int32_t* dst_data = outputs[0]->cbuffer().as<int32_t *>() +
-                                outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-            parallel_nt(0, [&](const int ithr, const int nthr) {
-                size_t i, src_idx, start = 0, end = 0;
-                SizeVector counters(dst_dims.size(), 0);
-                splitter(work_amount_dst, nthr, ithr, start, end);
-                for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
-                    counters[j] = i % dst_dims[j];
-                    i /= dst_dims[j];
-                }
-                for (size_t iwork = start; iwork < end; ++iwork) {
-                    for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-                        src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
-
-                    dst_data[iwork] = src_data[src_idx];
+                simple_copy(&dst_data[iwork], data_size, &src_data[src_idx * data_size], data_size);
 
-                    for (int j = dst_dims.size() - 1; j >= 0; j--) {
-                        counters[j] = (counters[j] + 1) % dst_dims[j];
-                        if (counters[j] != 0) break;
-                    }
+                for (int j = dst_dims.size() - 1; j >= 0; j--) {
+                    counters[j] = (counters[j] + 1) % dst_dims[j];
+                    if (counters[j] != 0) break;
                 }
-            });
-        }
-                             break;
-        default:
-            if (resp) {
-                std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
             }
-            return GENERAL_ERROR;
-        }
+        });
 
         return OK;
     }
@@ -181,8 +113,7 @@ private:
     const size_t BROADCAST_INPUT = 0;
     const size_t BROADCAST_SHAPE = 1;
 
-    SizeVector src_dims;
-    SizeVector srcStrides;
+    size_t data_size = 1;
 };
 
 REG_FACTORY_FOR(ImplFactory<BroadcastImpl>, Broadcast);
index 6c1f243..fa06d3f 100644 (file)
@@ -27,7 +27,7 @@ struct Indexer {
       }
   }
 
-  const int operator()(const std::vector<int>& idx) const {
+  int operator()(const std::vector<int>& idx) const {
       int flat_idx = 0;
       assert(idx.size() == dims_.size());
       for (size_t i = 0; i < dims_.size(); ++i) {
index 898149b..ea9b79f 100644 (file)
@@ -30,44 +30,28 @@ public:
             if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16)
                 THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32, FP16 or I32 are supported!";
 
-            Precision inDataPrecision = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getPrecision();
-            if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::FP16)
-                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 or FP16 are supported!";
+            axis = layer->GetParamAsInt("axis");
 
-            //  Remove redundant dimensions
             const SizeVector& dictionary_dims = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getDims();
-            SizeVector dims_actual;
-            for (size_t i = 0; i < dictionary_dims.size(); i++) {
-                if (dictionary_dims[i] > 1) {
-                    for (size_t j = i; j < dictionary_dims.size(); j++)
-                        dims_actual.push_back(dictionary_dims[j]);
-                    break;
-                }
-            }
-
-            if (dims_actual.size() == 0)
+            if (dictionary_dims.size() == 0)
                 THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!";
-
-            axis = static_cast<int>(layer->GetParamAsInt("axis"));
             // Dictionary must be at least rank axis + 1
-            if (axis > 0 && static_cast<int>(dims_actual.size()) < (1 + axis))
-                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
-            else if (axis < 0 && (static_cast<int>(dims_actual.size()) + axis) < 0)
-                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
-
+            IE_ASSERT(-static_cast<int>(dictionary_dims.size()) <= axis && axis < static_cast<int>(dictionary_dims.size()))
+                << layer->name << " Incorrect input parameters dimensions and axis number!";
             if (axis < 0)
-                axis += dims_actual.size();
+                axis += dictionary_dims.size();
 
             //  Find number of dictionaries, index range and data length
             for (int i = 0; i < axis; i++)
-                numDictionaries *= dims_actual[i];
-            indexRange = dims_actual[axis];
-            for (size_t i = axis + 1; i < dims_actual.size(); i++)
-                dataLength *= dims_actual[i];
+                numDictionaries *= dictionary_dims[i];
+            indexRange = dictionary_dims[axis];
+            for (size_t i = axis + 1; i < dictionary_dims.size(); i++)
+                dataLength *= dictionary_dims[i];
 
             if (dataLength == 0)
                 THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!";
 
+            dataLength *= layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getPrecision().size();
             addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
                              { DataConfigurator(ConfLayout::PLN) });
         } catch (InferenceEngine::details::InferenceEngineException &ex) {
@@ -96,13 +80,13 @@ public:
     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
         switch (inputs[GATHER_INDEXES]->getTensorDesc().getPrecision()) {
             case Precision::FP32:
-                gather<float, float, f32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
+                gather<float, f32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
                 break;
             case Precision::FP16:
-                gather<ie_fp16, ie_fp16, f16toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
+                gather<ie_fp16, f16toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
                 break;
             case Precision::I32:
-                gather<int32_t, float, i32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
+                gather<int32_t, i32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
                 break;
             default:
                 return GENERAL_ERROR;
@@ -112,48 +96,31 @@ public:
     }
 
 private:
-    template <typename index_t, typename data_t, class Conversion>
+    template <typename index_t, class Conversion>
     void gather(Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output) {
         size_t src_indexSize = indexes->size();
         const index_t *src_index = indexes->cbuffer().as<const index_t *>() + indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const data_t *src_dataDict = dictionary->cbuffer().as<const data_t *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        data_t *dst_data = output->cbuffer().as<data_t*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        if (axis == 0) {
-            parallel_for(src_indexSize, [&](size_t i) {
-                unsigned int idx = Conversion()(src_index[i]);
-
-                //  Index clipping
-                if (idx < indexRange) {
-                    //  Copying data to destination from Dictionary
-                    simple_copy(&dst_data[i * dataLength],
-                                output->byteSize() - (dataLength * i),
-                                &src_dataDict[dataLength * idx],
-                                sizeof(data_t) * dataLength);
-                } else {
-                    memset(&dst_data[i * dataLength], 0, sizeof(data_t) * dataLength);
+        const uint8_t *src_dataDict = dictionary->cbuffer().as<const uint8_t *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        uint8_t *dst_data = output->cbuffer().as<uint8_t*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        parallel_for(src_indexSize, [&](size_t i) {
+            unsigned int idx = Conversion()(src_index[i]);
+
+            //  Index clipping
+            if (idx < indexRange) {
+                //  Copying data to destination from Dictionary
+                for (size_t j = 0; j < numDictionaries; j++) {
+                    simple_copy(&dst_data[dataLength * (i + j * src_indexSize)],
+                                output->byteSize() - (dataLength * (i + j * src_indexSize)),
+                                &src_dataDict[dataLength * (idx + j * indexRange)],
+                                dataLength);
                 }
-            });
-        } else {
-            parallel_for(src_indexSize, [&](size_t i) {
-                unsigned int idx = Conversion()(src_index[i]);
-
-                //  Index clipping
-                if (idx < indexRange) {
-                    //  Copying data to destination from Dictionary
-                    for (size_t j = 0; j < numDictionaries; j++) {
-                        simple_copy(&dst_data[dataLength * (i + j * src_indexSize)],
-                                    output->byteSize() - (dataLength * (i + j * src_indexSize)),
-                                    &src_dataDict[dataLength * (idx + j * indexRange)],
-                                    sizeof(data_t) * dataLength);
-                    }
-                } else {
-                    for (size_t j = 0; j < numDictionaries; j++) {
-                        memset(&dst_data[dataLength * (i + j * src_indexSize)], 0, sizeof(data_t) * dataLength);
-                    }
+            } else {
+                for (size_t j = 0; j < numDictionaries; j++) {
+                    memset(&dst_data[dataLength * (i + j * src_indexSize)], 0, dataLength);
                 }
-            });
-        }
+            }
+        });
     }
 
     int axis = 0;
index 4bcd9fa..533821c 100644 (file)
@@ -31,7 +31,7 @@ void CpuExtensions::AddShapeInferImpl(std::string name, const IShapeInferImpl::P
 
 void CpuExtensions::GetVersion(const Version*& versionInfo) const noexcept {
     static Version ExtensionDescription = {
-            { 2, 0 },    // extension API version
+            { 2, 1 },    // extension API version
             "2.0",
             "ie-cpu-ext"  // extension description message
     };
index ba53dc8..6effeef 100644 (file)
@@ -31,6 +31,8 @@ public:
                 THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only FP32 is supported!";
 
             SizeVector dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            if (!dims.size())
+                dims = SizeVector(1, 1);
             int axis = layer->GetParamAsInt("axis", -1);
             if (axis < 0)
                 axis += dims.size();
diff --git a/inference-engine/src/extension/ext_non_max_suppression.cpp b/inference-engine/src/extension/ext_non_max_suppression.cpp
new file mode 100644 (file)
index 0000000..e90a084
--- /dev/null
@@ -0,0 +1,244 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include <algorithm>
+#include <utility>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class NonMaxSuppressionImpl: public ExtLayerBase {
+public:
+    explicit NonMaxSuppressionImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.size() < 2 || layer->insData.size() > 5)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
+
+            if (layer->outData.size() != 1)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of output edges!";
+
+            if (layer->insData[NMS_BOXES].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'boxes' input precision. Only FP32 is supported!";
+            SizeVector boxes_dims = layer->insData[NMS_BOXES].lock()->getTensorDesc().getDims();
+            if (boxes_dims.size() != 3 || boxes_dims[2] != 4)
+                THROW_IE_EXCEPTION << layer->name << " 'boxes' should be with shape [num_batches, spatial_dimension, 4]";
+
+            if (layer->insData[NMS_SCORES].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'scores' input precision. Only FP32 is supported!";
+            SizeVector scores_dims = layer->insData[NMS_SCORES].lock()->getTensorDesc().getDims();
+            if (scores_dims.size() != 3)
+                THROW_IE_EXCEPTION << layer->name << " 'scores' should be with shape [num_batches, num_classes, spatial_dimension]";
+
+            if (boxes_dims[0] != scores_dims[0])
+                THROW_IE_EXCEPTION << layer->name << " num_batches is different in 'boxes' and 'scores' tensors";
+            if (boxes_dims[1] != scores_dims[2])
+                THROW_IE_EXCEPTION << layer->name << " spatial_dimension is different in 'boxes' and 'scores' tensors";
+
+            if (layer->insData.size() > 2) {
+                if (layer->insData[NMS_MAXOUTPUTBOXESPERCLASS].lock()->getTensorDesc().getPrecision() != Precision::I32)
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect 'max_output_boxes_per_class' input precision. Only I32 is supported!";
+                SizeVector max_output_boxes_per_class_dims = layer->insData[NMS_MAXOUTPUTBOXESPERCLASS].lock()->getTensorDesc().getDims();
+                if (max_output_boxes_per_class_dims.size() != 1 || max_output_boxes_per_class_dims[0] != 1)
+                    THROW_IE_EXCEPTION << layer->name << " 'max_output_boxes_per_class' should be scalar";
+            }
+
+            if (layer->insData.size() > 3) {
+                if (layer->insData[NMS_IOUTHRESHOLD].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect 'iou_threshold' input precision. Only FP32 is supported!";
+                SizeVector iou_threshold_dims = layer->insData[NMS_IOUTHRESHOLD].lock()->getTensorDesc().getDims();
+                if (iou_threshold_dims.size() != 1 || iou_threshold_dims[0] != 1)
+                    THROW_IE_EXCEPTION << layer->name << " 'iou_threshold' should be scalar";
+            }
+
+            if (layer->insData.size() > 4) {
+                if (layer->insData[NMS_SCORETHRESHOLD].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect 'score_threshold' input precision. Only FP32 is supported!";
+                SizeVector score_threshold_dims = layer->insData[NMS_SCORETHRESHOLD].lock()->getTensorDesc().getDims();
+                if (score_threshold_dims.size() != 1 || score_threshold_dims[0] != 1)
+                    THROW_IE_EXCEPTION << layer->name << " 'score_threshold' should be scalar";
+            }
+
+            if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::I32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'selected_indices' input precision. Only I32 is supported!";
+            SizeVector selected_indices_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (selected_indices_dims.size() != 2 || selected_indices_dims[1] != 3)
+                THROW_IE_EXCEPTION << layer->name << " 'selected_indices' should be with shape [num_selected_indices, 3]";
+
+            center_point_box = layer->GetParamAsBool("center_point_box", false);
+
+            if (layer->insData.size() == 2) {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+            } else if (layer->insData.size() == 3) {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
+                    { DataConfigurator(ConfLayout::PLN) });
+            } else if (layer->insData.size() == 4) {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
+                    DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+            } else {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
+                    DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+            }
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    static float intersectionOverUnion(float* boxesI, float* boxesJ, bool center_point_box) {
+        float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ;
+        if (center_point_box) {
+            //  box format: x_center, y_center, width, height
+            yminI = boxesI[1] - boxesI[3] / 2.f;
+            xminI = boxesI[0] - boxesI[2] / 2.f;
+            ymaxI = boxesI[1] + boxesI[3] / 2.f;
+            xmaxI = boxesI[0] + boxesI[2] / 2.f;
+            yminJ = boxesJ[1] - boxesJ[3] / 2.f;
+            xminJ = boxesJ[0] - boxesJ[2] / 2.f;
+            ymaxJ = boxesJ[1] + boxesJ[3] / 2.f;
+            xmaxJ = boxesJ[0] + boxesJ[2] / 2.f;
+        } else {
+            //  box format: y1, x1, y2, x2
+            yminI = (std::min)(boxesI[0], boxesI[2]);
+            xminI = (std::min)(boxesI[1], boxesI[3]);
+            ymaxI = (std::max)(boxesI[0], boxesI[2]);
+            xmaxI = (std::max)(boxesI[1], boxesI[3]);
+            yminJ = (std::min)(boxesJ[0], boxesJ[2]);
+            xminJ = (std::min)(boxesJ[1], boxesJ[3]);
+            ymaxJ = (std::max)(boxesJ[0], boxesJ[2]);
+            xmaxJ = (std::max)(boxesJ[1], boxesJ[3]);
+        }
+
+        float areaI = (ymaxI - yminI) * (xmaxI - xminI);
+        float areaJ = (ymaxJ - yminJ) * (xmaxJ - xminJ);
+        if (areaI <= 0.f || areaJ <= 0.f)
+            return 0.f;
+
+        float intersection_area =
+            (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ), 0.f) *
+            (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ), 0.f);
+        return intersection_area / (areaI + areaJ - intersection_area);
+    }
+
+    typedef struct {
+        float score;
+        int batch_index;
+        int class_index;
+        int box_index;
+    } filteredBoxes;
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        float *boxes = inputs[NMS_BOXES]->cbuffer().as<float *>() +
+            inputs[NMS_BOXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float *scores = inputs[NMS_SCORES]->cbuffer().as<float *>() +
+            inputs[NMS_SCORES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        SizeVector scores_dims = inputs[NMS_SCORES]->getTensorDesc().getDims();
+        int num_boxes = static_cast<int>(scores_dims[2]);
+        int max_output_boxes_per_class = num_boxes;
+        if (inputs.size() > 2)
+            max_output_boxes_per_class = (std::min)(max_output_boxes_per_class,
+                (inputs[NMS_MAXOUTPUTBOXESPERCLASS]->cbuffer().as<int *>() +
+                inputs[NMS_MAXOUTPUTBOXESPERCLASS]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0]);
+
+        float iou_threshold = 1.f;  //  Value range [0, 1]
+        if (inputs.size() > 3)
+            iou_threshold = (std::min)(iou_threshold, (inputs[NMS_IOUTHRESHOLD]->cbuffer().as<float *>() +
+                inputs[NMS_IOUTHRESHOLD]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0]);
+
+        float score_threshold = 0.f;
+        if (inputs.size() > 4)
+            score_threshold = (inputs[NMS_SCORETHRESHOLD]->cbuffer().as<float *>() +
+                inputs[NMS_SCORETHRESHOLD]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+        int* selected_indices = outputs[0]->cbuffer().as<int *>() +
+            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        SizeVector selected_indices_dims = outputs[0]->getTensorDesc().getDims();
+
+        SizeVector boxesStrides = inputs[NMS_BOXES]->getTensorDesc().getBlockingDesc().getStrides();
+        SizeVector scoresStrides = inputs[NMS_SCORES]->getTensorDesc().getBlockingDesc().getStrides();
+
+        // boxes shape: {num_batches, num_boxes, 4}
+        // scores shape: {num_batches, num_classes, num_boxes}
+        int num_batches = static_cast<int>(scores_dims[0]);
+        int num_classes = static_cast<int>(scores_dims[1]);
+        std::vector<filteredBoxes> fb;
+
+        for (int batch = 0; batch < num_batches; batch++) {
+            float *boxesPtr = boxes + batch * boxesStrides[0];
+            for (int class_idx = 0; class_idx < num_classes; class_idx++) {
+                float *scoresPtr = scores + batch * scoresStrides[0] + class_idx * scoresStrides[1];
+                std::vector<std::pair<float, int> > scores_vector;
+                for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
+                    if (scoresPtr[box_idx] > score_threshold)
+                        scores_vector.push_back(std::make_pair(scoresPtr[box_idx], box_idx));
+                }
+
+                if (scores_vector.size()) {
+                    parallel_sort(scores_vector.begin(), scores_vector.end(),
+                        [](const std::pair<float, int>& l, const std::pair<float, int>& r) { return l.first > r.first; });
+
+                    int io_selection_size = 1;
+                    fb.push_back({ scores_vector[0].first, batch, class_idx, scores_vector[0].second });
+                    for (int box_idx = 1; (box_idx < static_cast<int>(scores_vector.size()) && io_selection_size < max_output_boxes_per_class); box_idx++) {
+                        bool box_is_selected = true;
+                        for (int idx = io_selection_size - 1; idx >= 0; idx--) {
+                            float iou = intersectionOverUnion(&boxesPtr[scores_vector[box_idx].second * 4],
+                                             &boxesPtr[scores_vector[idx].second * 4], center_point_box);
+                            if (iou > iou_threshold) {
+                                box_is_selected = false;
+                                break;
+                            }
+                        }
+
+                        if (box_is_selected) {
+                            scores_vector[io_selection_size] = scores_vector[box_idx];
+                            io_selection_size++;
+                            fb.push_back({ scores_vector[box_idx].first, batch, class_idx, scores_vector[box_idx].second });
+                        }
+                    }
+                }
+            }
+        }
+
+        parallel_sort(fb.begin(), fb.end(), [](const filteredBoxes& l, const filteredBoxes& r) { return l.score > r.score; });
+        int selected_indicesStride = outputs[0]->getTensorDesc().getBlockingDesc().getStrides()[0];
+        int* selected_indicesPtr = selected_indices;
+        size_t idx;
+        for (idx = 0; idx < (std::min)(selected_indices_dims[0], fb.size()); idx++) {
+            selected_indicesPtr[0] = fb[idx].batch_index;
+            selected_indicesPtr[1] = fb[idx].class_index;
+            selected_indicesPtr[2] = fb[idx].box_index;
+            selected_indicesPtr += selected_indicesStride;
+        }
+        for (; idx < selected_indices_dims[0]; idx++) {
+            selected_indicesPtr[0] = -1;
+            selected_indicesPtr[1] = -1;
+            selected_indicesPtr[2] = -1;
+            selected_indicesPtr += selected_indicesStride;
+        }
+
+        return OK;
+    }
+
+private:
+    const size_t NMS_BOXES = 0;
+    const size_t NMS_SCORES = 1;
+    const size_t NMS_MAXOUTPUTBOXESPERCLASS = 2;
+    const size_t NMS_IOUTHRESHOLD = 3;
+    const size_t NMS_SCORETHRESHOLD = 4;
+    bool center_point_box = false;
+};
+
+REG_FACTORY_FOR(ImplFactory<NonMaxSuppressionImpl>, NonMaxSuppression);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
index 39ff4a4..338e054 100644 (file)
@@ -29,7 +29,7 @@ struct Indexer {
       }
   }
 
-  const int operator()(const std::vector<int>& idx) const {
+  int operator()(const std::vector<int>& idx) const {
       int flat_idx = 0;
       assert(idx.size() == dims_.size());
       for (size_t i = 0; i < dims_.size(); ++i) {
index 16d6dec..2334792 100644 (file)
@@ -111,6 +111,9 @@ public:
             }
         }
 
+        if (!our_dims.size())
+            our_dims = InferenceEngine::SizeVector(1, 1);
+
         InferenceEngine::SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
         for (size_t i = 0; i < (std::min)(out_dims.size(), dst_dims.size()); i++) {
             if (out_dims[i] != dst_dims[i]) {
@@ -126,7 +129,12 @@ public:
             inputs[REDUCE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding();
         float* dst_data = outputs[0]->cbuffer().as<float *>() +
             outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        size_t work_amount_dst = outputs[0]->getTensorDesc().getBlockingDesc().getStrides()[0] * dst_dims[0];
+
+        size_t work_amount_dst;
+        if (!dst_dims.size())
+            work_amount_dst = 1;
+        else
+            work_amount_dst = outputs[0]->getTensorDesc().getBlockingDesc().getStrides()[0] * dst_dims[0];
 
         switch (reduceMode) {
         case Reduce::And:
index d4d187d..e3f717d 100644 (file)
@@ -54,6 +54,11 @@ public:
             addConfig(layer, {DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
             if (type == "caffe.ResampleParameter.NEAREST")
                 addConfig(layer, {DataConfigurator(blk_layout)}, {DataConfigurator(blk_layout)});
+
+            // WA to enable the implementation only for equal input and output precisions
+            for (auto &conf : confs) {
+                conf.inConfs[0].desc.setPrecision(conf.outConfs[0].desc.getPrecision());
+            }
         } catch (InferenceEngine::details::InferenceEngineException &ex) {
             errorMsg = ex.what();
         }
@@ -63,7 +68,7 @@ public:
                        ResponseDesc *resp) noexcept override {
         const auto *src_data = inputs[0]->cbuffer().as<const float *>();
         auto *dst_data = outputs[0]->buffer().as<float *>();
-#ifdef WIN32
+#ifdef _WIN32
 #undef IN
 #endif
         const Layout &layout = inputs[0]->getTensorDesc().getLayout();
diff --git a/inference-engine/src/extension/ext_scatter.cpp b/inference-engine/src/extension/ext_scatter.cpp
new file mode 100644 (file)
index 0000000..0ec01be
--- /dev/null
@@ -0,0 +1,174 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include <algorithm>
+#include <limits>
+#include "ie_parallel.hpp"
+#include "common/simple_copy.h"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class ScatterImpl: public ExtLayerBase {
+public:
+    explicit ScatterImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.size() != 3 || layer->outData.size() != 1)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output tensors!";
+
+
+            inIdxPrecision = layer->insData[SCATTER_INDEXES].lock()->getTensorDesc().getPrecision();
+            if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indexes' precision. Only FP32 or I32 are supported!";
+
+            Precision inDataPrecision = layer->insData[SCATTER_DATA].lock()->getTensorDesc().getPrecision();
+            if (inDataPrecision != layer->insData[SCATTER_UPDATES].lock()->getTensorDesc().getPrecision())
+                THROW_IE_EXCEPTION << layer->name << " Precision should be equal for input tensors 'Data' and 'Updates'";
+
+            if (inDataPrecision != layer->outData[0]->getTensorDesc().getPrecision())
+                THROW_IE_EXCEPTION << layer->name << " Precision should be equal for input tensor 'Data' and output";
+
+            //  Remove redundant dimensions
+            const SizeVector& data_dims = layer->insData[SCATTER_DATA].lock()->getTensorDesc().getDims();
+            if (data_dims.size() == 0 ||
+                (data_dims.size() == 1 && data_dims[0] == 1) ||
+                layer->insData[SCATTER_DATA].lock()->getTensorDesc().getLayout() == Layout::SCALAR)
+                    THROW_IE_EXCEPTION << layer->name << " 'Data' tensor rank should be >= 1";
+
+            axis = layer->GetParamAsInt("axis", 0);
+
+            IE_ASSERT(-static_cast<int>(data_dims.size()) <= axis && axis < static_cast<int>(data_dims.size()))
+                << layer->name << " Incorrect input parameters dimensions and axis number!";
+
+            if (axis < 0)
+                axis += data_dims.size();
+
+            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (data_dims != dst_dims)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
+
+            SizeVector idx_dims = layer->insData[SCATTER_INDEXES].lock()->getTensorDesc().getDims();
+            if (idx_dims.size() == 0 ||
+                (idx_dims.size() == 1 && idx_dims[0] == 1) ||
+                layer->insData[SCATTER_INDEXES].lock()->getTensorDesc().getLayout() == Layout::SCALAR)
+                THROW_IE_EXCEPTION << layer->name << " 'Indexes' tensor rank should be >= 1";
+
+            SizeVector upd_dims = layer->insData[SCATTER_UPDATES].lock()->getTensorDesc().getDims();
+            if (layer->insData[SCATTER_UPDATES].lock()->getTensorDesc().getLayout() == Layout::SCALAR)
+                THROW_IE_EXCEPTION << layer->name << " 'Indexes' tensor rank should be >= 1";
+
+            if (idx_dims != upd_dims)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'indexes' and 'updates' tensors dimension";
+
+            for (size_t i = 0; i < idx_dims.size(); i++) {
+                if (i == static_cast<size_t>(axis)) continue;
+                if (idx_dims[i] > data_dims[i])
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect number of data and indexes dimensions!";
+            }
+
+            data_size = layer->insData[SCATTER_DATA].lock()->getTensorDesc().getPrecision().size();
+
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN, false, 0), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
+                             { DataConfigurator(ConfLayout::PLN, false, 0) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        switch (inIdxPrecision) {
+            case Precision::FP32:
+                scatter<float>(inputs[SCATTER_DATA], inputs[SCATTER_INDEXES], inputs[SCATTER_UPDATES], outputs[0]);
+                break;
+            case Precision::I32:
+                scatter<int32_t>(inputs[SCATTER_DATA], inputs[SCATTER_INDEXES], inputs[SCATTER_UPDATES], outputs[0]);
+                break;
+            default:
+                return GENERAL_ERROR;
+        }
+
+        return OK;
+    }
+
+private:
+    template <typename index_t>
+    void scatter(Blob::Ptr data, Blob::Ptr indexes, Blob::Ptr updates, Blob::Ptr output) {
+        const uint8_t *src_data = data->cbuffer().as<const uint8_t *>() + data->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const index_t *src_index = indexes->cbuffer().as<const index_t *>() + indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const uint8_t *src_updates = updates->cbuffer().as<const uint8_t *>() + updates->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        uint8_t *dst_data = output->cbuffer().as<uint8_t*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        InferenceEngine::SizeVector index_dims = indexes->getTensorDesc().getDims();
+        InferenceEngine::SizeVector data_dims = data->getTensorDesc().getDims();
+        InferenceEngine::SizeVector dataStrides = data->getTensorDesc().getBlockingDesc().getStrides();
+
+        if (src_data != dst_data) {
+            parallel_nt(0, [&](const int ithr, const int nthr) {
+                size_t start = 0, end = 0;
+                splitter(output->size(), nthr, ithr, start, end);
+                size_t size = (end - start) * data_size;
+                start *= data_size;
+                simple_copy(dst_data + start, size, src_data + start, size);
+            });
+        }
+
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            int j;
+            size_t i, dst_idx = 0, start = 0, end = 0;
+            SizeVector counters(index_dims.size(), 0);
+            splitter(indexes->size(), nthr, ithr, start, end);
+            for (j = index_dims.size() - 1, i = start; j >= 0; j--) {
+                counters[j] = i % index_dims[j];
+                i /= index_dims[j];
+            }
+
+            for (i = 0; i < static_cast<size_t>(axis); ++i)
+                dst_idx += counters[i] * dataStrides[i];
+            for (i++; i < data_dims.size(); ++i)
+                dst_idx += counters[i] * dataStrides[i];
+
+            for (size_t iwork = start; iwork < end; iwork++) {
+                unsigned int idx = static_cast<unsigned int>(src_index[iwork]);
+                if (idx < data_dims[axis])
+                    simple_copy(dst_data + data_size * (dst_idx + idx * dataStrides[axis]), data_size,
+                                src_updates + iwork * data_size, data_size);
+
+                for (j = index_dims.size() - 1; j >= 0; j--) {
+                    counters[j]++;
+                    if (counters[j] < index_dims[j]) {
+                        dst_idx += dataStrides[j];
+                        break;
+                    } else {
+                        counters[j] = 0;
+                        for (dst_idx = 0, i = 0; i < static_cast<size_t>(axis); ++i)
+                            dst_idx += counters[i] * dataStrides[i];
+                        for (i++; i < data_dims.size(); ++i)
+                            dst_idx += counters[i] * dataStrides[i];
+                    }
+                }
+            }
+        });
+    }
+
+    int axis = 0;
+    Precision inIdxPrecision;
+    const size_t SCATTER_DATA = 0;
+    const size_t SCATTER_INDEXES = 1;
+    const size_t SCATTER_UPDATES = 2;
+    size_t data_size = 1;
+};
+
+REG_FACTORY_FOR(ImplFactory<ScatterImpl>, ScatterUpdate);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
index c00e76f..933b26f 100644 (file)
@@ -17,7 +17,7 @@ namespace Cpu {
 struct simpler_nms_roi_t {
     float x0, y0, x1, y1;
 
-    static inline const float clamp_v(const float v, const float v_min, const float v_max) {
+    static inline float clamp_v(const float v, const float v_min, const float v_max) {
         return std::max(v_min, std::min(v, v_max));
     }
 
diff --git a/inference-engine/src/extension/ext_sparse_fill_empty_rows.cpp b/inference-engine/src/extension/ext_sparse_fill_empty_rows.cpp
new file mode 100644 (file)
index 0000000..e07d54a
--- /dev/null
@@ -0,0 +1,232 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <array>
+#include <cassert>
+#include <algorithm>
+#include <limits>
+#include "ie_parallel.hpp"
+#include "simple_copy.h"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class SparseFillEmptyRowsImpl : public ExtLayerBase {
+public:
+    explicit SparseFillEmptyRowsImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.size() != 4 || layer->outData.size() != 3) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+            }
+
+            Precision input_indices_precision = layer->insData[INPUT_INDICES_PORT].lock()->getTensorDesc().getPrecision();
+            if (input_indices_precision != Precision::FP32) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 is supported!";
+            }
+
+            // check dimensions of input tensors
+            SizeVector input_indices_dims = layer->insData[INPUT_INDICES_PORT].lock()->getTensorDesc().getDims();
+            if (input_indices_dims.size() != 2 || input_indices_dims[1] != 2) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for input indices. It must be Nx2 dimension tensor.";
+            }
+            SizeVector input_values_dims = layer->insData[INPUT_VALUES_PORT].lock()->getTensorDesc().getDims();
+            if (input_values_dims.size() != 1) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for input values. It must be N dimension tensor.";
+            }
+            if (input_indices_dims[0] != input_values_dims[0]) {
+                THROW_IE_EXCEPTION << layer->name << " Mismatch of the first dimensions of input indices and values.";
+            }
+            SizeVector input_dense_shape_dims = layer->insData[INPUT_DENSE_SHAPE_PORT].lock()->getTensorDesc().getDims();
+            if (input_dense_shape_dims.size() != 1 || input_dense_shape_dims[0] != 2) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for input dense shape.";
+            }
+            SizeVector input_default_value_dims = layer->insData[INPUT_DEFAULT_VALUE_PORT].lock()->getTensorDesc().getDims();
+            if (input_default_value_dims[0] != 1) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for input dense shape.";
+            }
+            inMaxNumValues = input_indices_dims[0];
+
+            // check dimensions of output tensors
+            SizeVector output_indices_dims = layer->outData[OUTPUT_INDICES_PORT]->getTensorDesc().getDims();
+            if (output_indices_dims.size() != 2 || output_indices_dims[1] != 2) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for output indices. It must be Nx2 dimension tensor.";
+            }
+            SizeVector output_values_dims = layer->outData[OUTPUT_VALUES_PORT]->getTensorDesc().getDims();
+            if (output_values_dims.size() != 1) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for output values. It must be N dimension tensor.";
+            }
+            if (output_indices_dims[0] != output_values_dims[0]) {
+                THROW_IE_EXCEPTION << layer->name << " Mismatch of the first dimensions of output indices and values.";
+            }
+            SizeVector output_empty_rows_indicator_dims = layer->outData[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->getTensorDesc().getDims();
+            if (output_empty_rows_indicator_dims.size() != 1) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for output empty rows indicator. It must be 1-D tensor.";
+            }
+            outMaxNumValues = output_indices_dims[0];
+            if (outMaxNumValues < inMaxNumValues) {
+                THROW_IE_EXCEPTION << layer->name << " The first dimension size of input indices can not be greater the first dimension of output indices.";
+            }
+
+            // TODO: check that dense shape value is set
+            addConfig(layer,
+                {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)},
+                {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)});
+        }
+        catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        const float *input_indices_ptr = inputs[INPUT_INDICES_PORT]->cbuffer().as<const float *>() +
+            inputs[INPUT_INDICES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const float *input_values_ptr = inputs[INPUT_VALUES_PORT]->cbuffer().as<const float *>() +
+            inputs[INPUT_VALUES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const float *dense_shape_ptr = inputs[INPUT_DENSE_SHAPE_PORT]->cbuffer().as<const float *>() +
+            inputs[INPUT_DENSE_SHAPE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const float *default_value_ptr = inputs[INPUT_DEFAULT_VALUE_PORT]->cbuffer().as<const float *>() +
+            inputs[INPUT_DEFAULT_VALUE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        float default_value = default_value_ptr[0];
+        float num_rows = dense_shape_ptr[0];
+        float num_cols = dense_shape_ptr[1];
+
+        // compute actual number of values by searching out of range indice that serves as a marker
+        size_t in_actual_num_values = 0;
+        for (in_actual_num_values = 0; in_actual_num_values < inMaxNumValues; in_actual_num_values++) {
+            float indice_x = input_indices_ptr[2 * in_actual_num_values];
+            float indice_y = input_indices_ptr[2 * in_actual_num_values + 1];
+            if (indice_x < 0 || indice_y < 0 || indice_x >= num_rows || indice_y >= num_cols) break;
+        }
+
+        // create auxiliary container for sorting
+        std::vector<std::array<float, 3>> indices_values(in_actual_num_values);
+        parallel_for(in_actual_num_values, [&](size_t i) {
+            float row = input_indices_ptr[2 * i];
+            float col = input_indices_ptr[2 * i + 1];
+            float value = input_values_ptr[i];
+            std::array<float, 3> elem = { row, col, value };
+            indices_values[i] = elem;
+        });
+
+        // sort values by row
+        parallel_sort(indices_values.begin(), indices_values.end(),
+            [](const std::array<float, 3>& first, const std::array<float, 3>& second) {
+            return first[0] < second[0];
+        });
+
+        // unsplit indices and values
+        std::vector<float> indices_with_sorted_rows(in_actual_num_values * 2);
+        std::vector<float> values_for_sorted_rows(in_actual_num_values);
+        parallel_for(in_actual_num_values, [&](size_t i) {
+            auto elem = indices_values[i];
+            indices_with_sorted_rows[i * 2] = elem[0];
+            indices_with_sorted_rows[i * 2 + 1] = elem[1];
+            values_for_sorted_rows[i] = elem[2];
+        });
+
+        // compute start indice for each row and a number of values at each row
+        std::vector<int> values_at_row(static_cast<unsigned int>(num_rows));
+        std::fill(values_at_row.begin(), values_at_row.end(), 0);
+        float prev_row_with_value = -1.0f;
+        unsigned int total_num_values = 0;
+        std::vector<std::array<float, 3>>::iterator curr_it, prev_it;
+        for (float row_ind = 0.0; row_ind < num_rows; row_ind = row_ind + 1.0f) {
+            curr_it = std::find_if(indices_values.begin(), indices_values.end(),
+                [row_ind](std::array<float, 3> elem) { return elem[0] == row_ind; });
+            if (curr_it != indices_values.end()) {
+                if (prev_row_with_value != -1.0f) {
+                    unsigned int num_values_at_prev_row = static_cast<unsigned int>(std::distance(prev_it, curr_it));
+                    values_at_row[static_cast<int>(prev_row_with_value)] = num_values_at_prev_row;
+                    total_num_values += num_values_at_prev_row;
+                }
+                prev_row_with_value = row_ind;
+                prev_it = curr_it;
+            } else {
+                total_num_values++;
+            }
+        }
+        if (prev_row_with_value != -1.0) {
+            unsigned int num_values_at_prev_row = static_cast<unsigned int>(std::distance(prev_it, indices_values.end()));
+            values_at_row[static_cast<int>(prev_row_with_value)] = num_values_at_prev_row;
+            total_num_values += num_values_at_prev_row;
+        }
+
+        // check that output buffer size is sufficient
+        if (outMaxNumValues < total_num_values) return GENERAL_ERROR;
+
+        // create output indices
+        float *output_indices_ptr = outputs[OUTPUT_INDICES_PORT]->cbuffer().as<float *>() +
+            inputs[OUTPUT_INDICES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float *output_values_ptr = outputs[OUTPUT_VALUES_PORT]->cbuffer().as<float *>() +
+            inputs[OUTPUT_VALUES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float *output_empty_rows_indicator_ptr = outputs[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->cbuffer().as<float *>() +
+            inputs[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        auto output_indices_size = outputs[OUTPUT_INDICES_PORT]->byteSize();
+        memset(output_indices_ptr, 0, output_indices_size);
+
+        auto output_values_size = outputs[OUTPUT_VALUES_PORT]->byteSize();
+        memset(output_values_ptr, 0, output_values_size);
+
+        auto output_empty_rows_indicator_size = outputs[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->byteSize();
+        memset(output_empty_rows_indicator_ptr, 0, output_empty_rows_indicator_size);
+
+
+        unsigned int curr_pos_from_copy = 0;
+        unsigned int curr_pos_to_copy = 0;
+        for (int row_ind = 0; row_ind < static_cast<int>(num_rows); row_ind++) {
+            unsigned int num_values_at_row = values_at_row[row_ind];
+            if (num_values_at_row == 0) {
+                output_empty_rows_indicator_ptr[row_ind] = 1.0;
+                output_values_ptr[curr_pos_to_copy] = default_value;
+                output_indices_ptr[curr_pos_to_copy * 2] = static_cast<float>(row_ind);
+                output_indices_ptr[curr_pos_to_copy * 2 + 1] = 0.0;
+                curr_pos_to_copy++;
+            } else {
+                output_empty_rows_indicator_ptr[row_ind] = 0.0;
+                std::copy(values_for_sorted_rows.begin() + curr_pos_from_copy,
+                    values_for_sorted_rows.begin() + curr_pos_from_copy + num_values_at_row,
+                    output_values_ptr + curr_pos_to_copy);
+                std::copy(indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy,
+                    indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy + 2 * num_values_at_row, output_indices_ptr + curr_pos_to_copy * 2);
+                curr_pos_to_copy += num_values_at_row;
+                curr_pos_from_copy += num_values_at_row;
+            }
+        }
+
+        // mark the end of output using (-1, -1) indice
+        if (total_num_values < outMaxNumValues) {
+            output_indices_ptr[total_num_values * 2] = -1.0;
+            output_indices_ptr[total_num_values * 2 + 1] = -1.0;
+        }
+
+        return OK;
+    }
+
+private:
+    const size_t INPUT_INDICES_PORT = 0;
+    const size_t INPUT_VALUES_PORT = 1;
+    const size_t INPUT_DENSE_SHAPE_PORT = 2;
+    const size_t INPUT_DEFAULT_VALUE_PORT = 3;
+    const size_t OUTPUT_INDICES_PORT = 0;
+    const size_t OUTPUT_VALUES_PORT = 1;
+    const size_t OUTPUT_EMPTY_ROWS_INDICATOR_PORT = 2;
+
+    size_t inMaxNumValues = 0;
+    size_t outMaxNumValues = 0;
+};
+
+REG_FACTORY_FOR(ImplFactory<SparseFillEmptyRowsImpl>, SparseFillEmptyRows);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
index 4773770..bb20a60 100644 (file)
@@ -168,6 +168,9 @@ public:
         InferenceEngine::SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
         InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides();
 
+        auto dst_size = outputs[0]->byteSize();
+        memset(dst_data, 0, dst_size);
+
         size_t i, j, k, bj, ej, sj;
         InferenceEngine::SizeVector our_dims;
         InferenceEngine::SizeVector out_dims;
diff --git a/inference-engine/src/extension/ext_unique.cpp b/inference-engine/src/extension/ext_unique.cpp
new file mode 100644 (file)
index 0000000..939128c
--- /dev/null
@@ -0,0 +1,206 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <cassert>
+#include <algorithm>
+#include <functional>
+#include <limits>
+#include <utility>
+#include "ie_parallel.hpp"
+#include "simple_copy.h"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class UniqueImpl : public ExtLayerBase {
+public:
+    explicit UniqueImpl(const CNNLayer* layer) {
+        try {
+            // check number of inputs and outputs
+            if (layer->insData.size() != 1 || layer->outData.size() < 1 || layer->outData.size() > 3) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+            }
+
+            // check precision of tensors
+            Precision input_indices_precision = layer->insData[0].lock()->getTensorDesc().getPrecision();
+            if (input_indices_precision != Precision::FP32) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 is supported!";
+            }
+
+            // check attributes
+            sorted = layer->GetParamAsBool("sorted");
+            return_inverse = layer->GetParamAsBool("return_inverse");
+            return_counts = layer->GetParamAsBool("return_counts");
+
+            // check that a real number of outputs matches one claimed by attributes
+            size_t claimed_num_outputs = 1;
+            if (return_inverse) {
+                claimed_num_outputs++;
+            }
+            if (return_counts) {
+                claimed_num_outputs++;
+            }
+            if (layer->outData.size() != claimed_num_outputs) {
+                THROW_IE_EXCEPTION << layer->name << " A number of outputs claimed by attributes does not match a real number of outputs!";
+            }
+
+            // check dimensions of input tensors
+            SizeVector input_dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            if (input_dims.size() != 1) {
+                THROW_IE_EXCEPTION << layer->name << " Input must be 1-D tensor.";
+            }
+            num_elements = input_dims[0];
+
+            // check dimensions of output tensors and its precisions
+            size_t cur_output_port = 0;
+            SizeVector output_uniques_dims = layer->outData[cur_output_port]->getTensorDesc().getDims();
+            Precision output_uniques_precision = layer->outData[cur_output_port]->getTensorDesc().getPrecision();
+            if (output_uniques_precision != Precision::FP32) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect precision for output tensor of unique elements. Only FP32 is supported!";
+            }
+            if (output_uniques_dims.size() != 1 || output_uniques_dims[0] != num_elements) {
+                THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for output tensor of unique elements.";
+            }
+            if (return_inverse) {
+                cur_output_port++;
+                SizeVector output_indices_dims = layer->outData[cur_output_port]->getTensorDesc().getDims();
+                Precision output_indices_precision = layer->outData[cur_output_port]->getTensorDesc().getPrecision();
+                if (output_indices_precision != Precision::FP32) {
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect precision for output tensor of indices. Only FP32 is supported!";
+                }
+                if (output_indices_dims.size() != 1 || output_indices_dims[0] != num_elements) {
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for output tensor of indices.";
+                }
+            }
+            if (return_counts) {
+                cur_output_port++;
+                SizeVector output_counts_dims = layer->outData[cur_output_port]->getTensorDesc().getDims();
+                Precision output_counts_precision = layer->outData[cur_output_port]->getTensorDesc().getPrecision();
+                if (output_counts_precision != Precision::FP32) {
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect precision for output tensor of counts. Only FP32 is supported!";
+                }
+                if (output_counts_dims.size() != 1 || output_counts_dims[0] != num_elements) {
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect dimensions for output tensor of counts.";
+                }
+            }
+
+            // add a layer configuration
+            if (layer->outData.size() == 1) {
+                addConfig(layer,
+                    { DataConfigurator(ConfLayout::PLN) },
+                    { DataConfigurator(ConfLayout::PLN) });
+            } else if (layer->outData.size() == 2) {
+                addConfig(layer,
+                    { DataConfigurator(ConfLayout::PLN) },
+                    { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) });
+            } else if (layer->outData.size() == 3) {
+                addConfig(layer,
+                    { DataConfigurator(ConfLayout::PLN) },
+                    { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) });
+            }
+        }
+        catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        const float *input_ptr = inputs[0]->cbuffer().as<const float *>() +
+            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        size_t cur_output_port = 0;
+        float *output_uniques_ptr = outputs[cur_output_port]->cbuffer().as<float *>() +
+            outputs[cur_output_port]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float *output_indices_ptr = nullptr;
+        if (return_inverse) {
+            cur_output_port++;
+            output_indices_ptr = outputs[cur_output_port]->cbuffer().as<float *>() +
+                outputs[cur_output_port]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        }
+        float *output_counts_ptr = nullptr;
+        if (return_counts) {
+            cur_output_port++;
+            output_counts_ptr = outputs[cur_output_port]->cbuffer().as<float *>() +
+                outputs[cur_output_port]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        }
+
+        // create a copy since input can be changed by sorting
+        std::vector<float> input_copy(num_elements);
+        std::copy(input_ptr, input_ptr + num_elements, input_copy.begin());
+
+        // sort elements in the input copy
+        if (sorted) {
+            parallel_sort(input_copy.begin(), input_copy.end(), std::less<float>());
+        }
+
+        // walk through elements and save them along with its indice and occurences
+        std::unordered_map<float, float> indices;
+        for (size_t i = 0, num_unique_elements = 0; i < num_elements; i++) {
+            auto it = indices.find(input_copy[i]);
+            if (it == indices.end()) {
+                indices.insert(std::make_pair(input_copy[i], static_cast<float>(num_unique_elements)));
+                output_uniques_ptr[num_unique_elements] = input_copy[i];
+                if (return_inverse && !sorted) {
+                    output_indices_ptr[i] = static_cast<float>(num_unique_elements);
+                }
+                if (return_counts) {
+                    output_counts_ptr[num_unique_elements] = 1.0f;
+                }
+                num_unique_elements++;
+            } else {
+                if (return_inverse && !sorted) {
+                    output_indices_ptr[i] = it->second;
+                }
+                if (return_counts) {
+                    output_counts_ptr[static_cast<size_t>(it->second)] += 1.0f;
+                }
+            }
+        }
+
+        // compute indices individually when unique elements are known
+        if (sorted && return_inverse) {
+            for (size_t i = 0; i < num_elements; i++) {
+                auto it = indices.find(input_ptr[i]);
+                output_indices_ptr[i] = it->second;
+            }
+        }
+
+        // fill a tail with the latest unique element used as an end mark
+        size_t num_unique_elements = indices.size();
+        if ((num_elements - num_unique_elements) > 0) {
+            std::fill(output_uniques_ptr + num_unique_elements,
+                output_uniques_ptr + num_elements,
+                output_uniques_ptr[num_unique_elements - 1]);
+        }
+
+        // fill a tail for output buffer with counts
+        if (return_counts && (num_elements - num_unique_elements) > 0) {
+                std::fill(output_counts_ptr + num_unique_elements,
+                    output_counts_ptr + num_elements, 0.f);
+        }
+
+        return OK;
+    }
+
+private:
+    // attributes
+    bool sorted;
+    bool return_inverse;
+    bool return_counts;
+
+    size_t num_elements = 0;
+};
+
+REG_FACTORY_FOR(ImplFactory<UniqueImpl>, Unique);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
index f5972af..6db8210 100644 (file)
@@ -3,7 +3,6 @@
 
 set(TARGET_NAME "GNAPlugin")
 
-disable_deprecated_warnings()
 
 file(GLOB_RECURSE SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
@@ -14,11 +13,10 @@ file(GLOB_RECURSE HEADERS
 
 find_package(libGNA)
 
-include_directories(
+set(TARGET_INCLUDE_DIRS
         ${CMAKE_SOURCE_DIR}/src/inference_engine
         ${CMAKE_SOURCE_DIR}/include
-        ${CMAKE_CURRENT_SOURCE_DIR}
-        ${libGNA_INCLUDE_DIRS})
+        ${CMAKE_CURRENT_SOURCE_DIR})
 
 add_definitions(-D_NO_MKL_)
 
@@ -31,10 +29,10 @@ if (LINUX)
 endif()
 
 #saving rpath to GNA shared library be used by CI
-log_rpath_remove_top(GNA FALSE "/gna${libGNA_LIBRARY}" TRUE)
-
-target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} ${libGNA_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
 
+target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} ${CMAKE_THREAD_LIBS_INIT} libGNA)
+target_include_directories(${TARGET_NAME} PUBLIC ${TARGET_INCLUDE_DIRS})
 
 set(TEST_SOURCES
         "${CMAKE_CURRENT_SOURCE_DIR}/gna_plugin.cpp"
@@ -53,5 +51,7 @@ add_library(${TARGET_NAME}_test_static STATIC ${TEST_SOURCES} ${HEADERS})
 target_compile_definitions(${TARGET_NAME}_test_static
         PUBLIC -DINTEGER_LOW_P
                -DUSE_STATIC_IE)
+target_link_libraries(${TARGET_NAME}_test_static PUBLIC libGNA::API)
+target_include_directories(${TARGET_NAME}_test_static PUBLIC ${TARGET_INCLUDE_DIRS})
 
 set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)
index a3d0be7..50b8316 100644 (file)
@@ -1661,7 +1661,6 @@ void AmIntelDnn::WriteDnnText(const char *filename, intel_dnn_number_type_t numb
                                 out_wfile << std::setprecision(12)
                                          << ptr_weight[row * num_filter_coefficients + col] << "\n";
                             }
-                            out_wfile << "\n";
                         }
 #endif
                     } else {
index 84f6c1d..e45bc1f 100644 (file)
@@ -31,10 +31,10 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal {
         }
 
         // copy inputs blobs since we need to have them in separate address space to allow simultaneous infer requests
-        _outputs[_networkOutputs.rbegin()->first] = plg->GetOutputBlob(networkOutputs.begin()->second->getPrecision());
+        _outputs[_networkOutputs.rbegin()->first] = plg->GetOutputBlob(networkOutputs.begin()->second->getTensorDesc().getPrecision());
         for (auto input : _networkInputs) {
             _inputs[input.first] =
-                plg->GetInputBlob(input.first, networkInputs.begin()->second->getPrecision());
+                plg->GetInputBlob(input.first, networkInputs.begin()->second->getTensorDesc().getPrecision());
         }
     }
     /**
index e9d149b..aceb42a 100644 (file)
@@ -22,6 +22,7 @@
 #include <details/ie_cnn_network_tools.h>
 #include <ie_util_internal.hpp>
 #include <iomanip>
+#include <graph_transformer.h>
 
 #include "gna_pass_manager.hpp"
 #include "gna_layer_info.hpp"
@@ -59,17 +60,21 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
     auto diagLayer = std::make_shared<ScaleShiftLayer>(LayerParams({diagName, "ScaleShift", Precision::FP32}));
 
     // TODO: diagonal size
-    std::vector<float> weightsValues(nextLayer->outData[0]->dims[0], fillValue);
-    diagLayer->_weights = make_shared_blob<float>(nextLayer->outData[0]->precision, Layout::C, weightsValues);
-    auto newDims = nextLayer->outData[0]->getDims();
-    auto dataPtr = std::make_shared<Data>(diagName,
-                                          TensorDesc(nextLayer->outData[0]->precision,
-                                                     newDims,
-                                                     nextLayer->outData[0]->layout));
+    auto dimsIndex = nextLayer->outData[0]->getTensorDesc().getDims().size() - 1;
+    std::vector<float> weightsValues(nextLayer->outData[0]->getTensorDesc().getDims()[dimsIndex], fillValue);
+    diagLayer->_weights = make_shared_blob<float>(
+            TensorDesc(
+                nextLayer->outData[0]->getTensorDesc().getPrecision(),
+                SizeVector({weightsValues.size()}),
+                Layout::C));
+    diagLayer->_weights->allocate();
+    CopyVectorToBlob(diagLayer->_weights, weightsValues);
+    auto dataPtr = std::make_shared<Data>(diagName, nextLayer->outData[0]->getTensorDesc());
+
     auto diagonalWithQuant = quantized ?
                              InferenceEngine::injectData<QuantizedLayerParams>(diagLayer) : diagLayer;
 
-    dataPtr->creatorLayer = diagonalWithQuant;
+    dataPtr->getCreatorLayer() = diagonalWithQuant;
     diagonalWithQuant->outData.push_back(dataPtr);
 
     // actual insertion
@@ -88,16 +93,11 @@ static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer,
     CNNLayerPtr copyLayer = std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
 
     auto inputData = nextLayer->insData[beforeIdx].lock();
-    auto newDims = inputData->getDims();
-    auto dataPtr = std::make_shared<Data>(copyName,
-                                          TensorDesc(inputData->precision,
-                                                     inputData->getDims(),
-                                                     inputData->layout));
-
+    auto dataPtr = std::make_shared<Data>(copyName, inputData->getTensorDesc());
     auto copyWithQuant = quantized ?
                          InferenceEngine::injectData<QuantizedLayerParams>(copyLayer) :
                          copyLayer;
-    dataPtr->creatorLayer = copyWithQuant;
+    dataPtr->getCreatorLayer() = copyWithQuant;
     copyWithQuant->outData.push_back(dataPtr);
     CNNNetworkInsertLayer(prevLayer, nextLayer, copyWithQuant);
     return copyWithQuant;
@@ -217,26 +217,23 @@ void InsertDiagonalLayerPass::run() {
 }
 
 void HandleMultipleActivationsForTheLayerPass::run() {
-    // found layer followed by with multiple activations
+    // found layer followed by multiple activations
     for (auto & l : *pLayers) {
         std::set<CNNLayerPtr> activations;
-        std::set<CNNLayerPtr> identities;
 
         for (auto && odata : l->outData) {
             for (auto && inputTo : odata->getInputTo()) {
                 LayerInfo info(inputTo.second);
 
-                if (info.isIdentity()) {
-                    identities.insert(inputTo.second);
-                } else if (info.isActivation()) {
+                if (info.isActivation()) {
                     activations.insert(inputTo.second);
                 }
             }
         }
         // single or not activations case
-        if (activations.size() + identities.size() < 2) continue;
+        if (activations.size() < 2) continue;
 
-        // insert diagonals, but not for identity activations
+        // insert diagonals one per each activation
         for (auto && activation : activations) {
             insertDiagonalLayerBetween(l, activation, getPassManager(), 0.0f);
         }
@@ -286,14 +283,14 @@ void SubstitutePReluPass::run() {
         CNNLayer* next = nullptr;
         if (layer == nullptr) return next;
         if (layer->outData.size() != 1) return next;
-        return layer->outData[0]->inputTo.begin()->second.get();
+        return layer->outData[0]->getInputTo().begin()->second.get();
     };
 
     // TODO: unit tests for bad cases
     for (auto & l : *pLayers) {
         // assume l is starting layer, that is followed by eltwise_sum(relu, negate/relu/scale/negate)
         if (l->outData.size() != 1) continue;
-        auto &outputLayers = l->outData[0]->inputTo;
+        auto &outputLayers = l->outData[0]->getInputTo();
         if (outputLayers.size() != 2) continue;
 
         // one of followed layers need to be generic relu
@@ -328,8 +325,8 @@ void SubstitutePReluPass::run() {
         if (!LayerInfo(sum).isEltwiseSum()) continue;
         if (sum->insData.size() != 2) continue;
 
-        auto s1 = sum->insData[0].lock()->creatorLayer.lock().get();
-        auto s2 = sum->insData[1].lock()->creatorLayer.lock().get();
+        auto s1 = sum->insData[0].lock()->getCreatorLayer().lock().get();
+        auto s2 = sum->insData[1].lock()->getCreatorLayer().lock().get();
 
         if (s1 != static_cast<InferenceEngine::CNNLayer *>(first) &&
             s2 != static_cast<InferenceEngine::CNNLayer *>(first)) {
@@ -345,10 +342,10 @@ void SubstitutePReluPass::run() {
         // pointing relu to output of eltwise_summ
         relu1->outData = sum->outData;
         // changing creator layer
-        relu1->outData[0]->creatorLayer = relu1;
+        relu1->outData[0]->getCreatorLayer() = relu1;
         // pointing back to relu if any
-        if (!relu1->outData[0]->inputTo.empty()) {
-            auto summOutputLayer = relu1->outData[0]->inputTo.begin()->second;
+        if (!relu1->outData[0]->getInputTo().empty()) {
+            auto summOutputLayer = relu1->outData[0]->getInputTo().begin()->second;
             summOutputLayer->insData.clear();
             summOutputLayer->insData.push_back(relu1->outData[0]);
         }
@@ -382,10 +379,10 @@ void ReversePermutationsPass::run() {
         if (layer->outData.empty()) {
             return nullptr;
         }
-        if (layer->outData.front()->inputTo.size() != 1) {
+        if (layer->outData.front()->getInputTo().size() != 1) {
             return nullptr;
         }
-        auto next = layer->outData.front()->inputTo.begin()->second;
+        auto next = layer->outData.front()->getInputTo().begin()->second;
 
         if (LayerInfo(next).isReshape()) return nextLayerSkipReshape(next);
 
@@ -470,22 +467,17 @@ void InsertIdentityLayerPass::run() {
             CNNLayerPtr activationLayer =
                 std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
             auto inputData = l->insData[0].lock();
-            auto newDims = inputData->dims;
-            std::reverse(begin(newDims), end(newDims));
 
-            auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers),
-                                                  TensorDesc(inputData->precision,
-                                                             newDims,
-                                                             inputData->layout));
+            auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
             auto activationLayerWithQuant = quantized ?
                                             InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
                                             activationLayer;
-            dataPtr->creatorLayer = activationLayerWithQuant;
+            dataPtr->getCreatorLayer() = activationLayerWithQuant;
             activationLayerWithQuant->outData.push_back(dataPtr);
             // wether 1 identity or all outputs TODO possible grouping here, need to implement special groupped inserter
             bool notAll = false;
             for (auto && nextData  : prev->outData) {
-                for (auto && nextLayer : nextData->inputTo) {
+                for (auto && nextLayer : nextData->getInputTo()) {
                     if (nextLayer.second.get() == l.get())
                         continue;
                     if (getCandidatesForIdentityInsertion(nextLayer.second).empty()) {
@@ -613,20 +605,27 @@ void InsertConcatAligningFilterPass::run() {
                     identityIdx += num_rows_in + 1;
                 }
 
-                concatAligningFilter->_weights = make_shared_blob<float>(concatInput->precision, Layout::C, filterWeights);
+                concatAligningFilter->_weights = make_shared_blob<float>(
+                                        TensorDesc(
+                                            concatInput->getTensorDesc().getPrecision(),
+                                            SizeVector({filterWeights.size()}),
+                                            Layout::C));
+                concatAligningFilter->_weights->allocate();
+
+                CopyVectorToBlob(concatAligningFilter->_weights, filterWeights);
 
                 // modifying output rows to be used - to avoid modification to original concat we are store num of elements in params
                 dims[1] = num_rows_out;
 
                 auto outData = std::make_shared<Data>(filterName,
-                                                      TensorDesc(concatInput->precision,
+                                                      TensorDesc(concatInput->getPrecision(),
                                                                  dims,
-                                                                 concatInput->layout));
+                                                                 concatInput->getLayout()));
 
                 auto filterWithQuant = quantized ?
                                        InferenceEngine::injectData<QuantizedLayerParams>(concatAligningFilter) :
                                        concatAligningFilter;
-                outData->creatorLayer = filterWithQuant;
+                outData->getCreatorLayer() = filterWithQuant;
                 filterWithQuant->outData.push_back(outData);
 
                 CNNNetworkInsertLayer(prevLayer, l, filterWithQuant);
@@ -665,8 +664,8 @@ void ReorderConcatInputsPass::run() {
             THROW_GNA_EXCEPTION << "no concat layer after concat-aligning layer" << l->name << ", but was: " << concat->type;
         }
         // 3stage locate first input in concat
-        if (concat->insData.size() != 2) {
-            THROW_GNA_EXCEPTION << "unsupported concat layer: " << concat->name;
+        if (concat->insData.size() < 2) {
+            THROW_GNA_EXCEPTION << "Concat layer has unsupported number of incoming layers: " << concat->name;
         }
         auto inputsToConcatFirst = CNNNetGetPrevLayersSkip(concat, [](CNNLayerPtr origin){
             return !LayerInfo(origin).isReshape();
@@ -712,7 +711,7 @@ void ReorderConcatInputsPass::run() {
 
         auto linkOutData = std::make_shared<Data>(linkName,
                                               TensorDesc(Precision::FP32,
-                                                         {1},
+                                                         SizeVector({1}),
                                                          Layout::C));
         linkOutData->getCreatorLayer() = link;
 
@@ -763,7 +762,6 @@ void InsertSplitAligningFilterPass::run() {
 
 
                 auto inputData = splitOutput;
-                auto newDims = splitOutput->dims;
 
                 size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
                 size_t newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
@@ -772,12 +770,12 @@ void InsertSplitAligningFilterPass::run() {
                 // encodes offset to beginning of split layer input
                 filterLayer->params["offset"] = std::to_string(aligned64_offset);
 
-                auto dims = splitOutput->getDims();
+                auto dims = splitOutput->getTensorDesc().getDims();
                 if (dims.size() > 3) {
                     THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
                 }
-                auto num_rows_out = dims[1]  * (dims.size() != 2 ? dims[2] : 1);
 
+                auto num_rows_out = dims[1]  * (dims.size() != 2 ? dims[2] : 1);
                 std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
 
                 auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
@@ -787,19 +785,22 @@ void InsertSplitAligningFilterPass::run() {
                     offset += newOutputSize + 1;
                 }
 
-                filterLayer->_weights = make_shared_blob<float>(inputData->precision, Layout::C, filterWeights);
-
-                std::reverse(begin(newDims), end(newDims));
+                filterLayer->_weights = make_shared_blob<float>(TensorDesc(
+                        inputData->getTensorDesc().getPrecision(),
+                        SizeVector({filterWeights.size()}),
+                        Layout::C));
+                filterLayer->_weights->allocate();
+                CopyVectorToBlob(filterLayer->_weights, filterWeights);
 
                 auto outData = std::make_shared<Data>(filterName,
-                                                      TensorDesc(splitOutput->precision,
-                                                                 newDims,
-                                                                 inputData->layout));
+                                                      TensorDesc(splitOutput->getTensorDesc().getPrecision(),
+                                                                 splitOutput->getTensorDesc().getDims(),
+                                                                 inputData->getTensorDesc().getLayout()));
 
                 auto filterWithQuant = quantized ?
                                        InferenceEngine::injectData<QuantizedLayerParams>(filterLayer) :
                                        filterLayer;
-                outData->creatorLayer = filterWithQuant;
+                outData->getCreatorLayer() = filterWithQuant;
                 filterWithQuant->outData.push_back(outData);
                 CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
             }
@@ -896,7 +897,6 @@ void SubstituteScaleShiftBroadCastPass::run() {
 }
 
 void UnrollLSTMCellPass::run() {
-    // TODO: iefode: refactor this code
     InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
         if (rnn.clip != 0.0f)
             return true;
@@ -919,6 +919,16 @@ void UnrollTIPass::run() {
     }
 }
 
+void RemoveConstPass::run() {
+    auto network = getPassManager()->getNetwork();
+    auto* implNetwork = dynamic_cast<details::CNNNetworkImpl*>(network.get());
+    if (!implNetwork) {
+        THROW_GNA_EXCEPTION << "Remove const layers pass can only work on cnnnetworkimpl type";
+    }
+    ConstTransformer transformer(implNetwork);
+    transformer.fullTrim();
+}
+
 void PassManager::run() {
     int index = 0;
 #ifdef PLOT
index 2ea35ac..77c0c78 100644 (file)
@@ -129,6 +129,12 @@ DECL_PASS(UnrollLSTMCell);
 */
 DECL_PASS(UnrollTI);
 
+/**
+* @brief removed const layer before reshape layer
+*/
+DECL_PASS(RemoveConst);
+
+
 class PassManager : public IPassManager, public std::enable_shared_from_this<PassManager> {
     Policy policy;
     InferenceEngine::CNNNetPtr network;
index c15343c..9967b92 100644 (file)
@@ -74,7 +74,7 @@ using namespace InferenceEngine::details;
 #define PAGE_SIZE_BYTES 4096
 
 #define FROM_IR_DIM(mem, idx)\
-((mem->dims.size() > idx - 1) ? mem->dims[idx - 1] : 1)
+((mem->getTensorDesc().getDims().size() > (idx) - 1) ? mem->getTensorDesc().getDims()[mem->getTensorDesc().getDims().size() - (idx)] : 1)
 
 inline int16_t GNAPluginNS::ConvertFloatToInt16(float src) {
         float rounding_value = (src > 0) ? 0.5f : -0.5f;
@@ -248,7 +248,7 @@ void GNAPlugin::ExportScores(void *ptr_dst,
                             break;
                         }
                         case 4 : {
-                            *dst_ptr  = *reinterpret_cast<const int32_t*>(input_ptr);
+                            *dst_ptr = *reinterpret_cast<const int32_t *>(input_ptr);
                             break;
                         }
                         default:
@@ -370,7 +370,7 @@ void GNAPlugin::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) {
             THROW_GNA_EXCEPTION << "Input layer pointer for concat is unexpectedly absent";
         }
 
-        auto ptrConcatLayerInput = dataInput->creatorLayer.lock();
+        auto ptrConcatLayerInput = dataInput->getCreatorLayer().lock();
         if (!ptrConcatLayerInput) {
             THROW_GNA_EXCEPTION << "Input layer for concat is unexpectedly absent";
         }
@@ -378,8 +378,9 @@ void GNAPlugin::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) {
                 GNAPlugin::GNAConcatLayer::ConcatConnectedLayerInfo({ptrConcatLayerInput->name, concat_size}));
 
         size_t layer_size =
-                     InferenceEngine::details::product(begin(dataInput->dims),
-                                                      end(dataInput->dims)) * dataInput->precision.size();
+                    InferenceEngine::details::product(begin(
+                            dataInput->getTensorDesc().getDims()),
+                            end(dataInput->getTensorDesc().getDims())) * dataInput->getTensorDesc().getPrecision().size();
         concat_size += layer_size;
     }
     layerInfoItem.reserved_size = concat_size;
@@ -395,7 +396,7 @@ void GNAPlugin::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
     if (!dataInput) {
         THROW_GNA_EXCEPTION << "Input layer pointer for split/slice is unexpectedly absent";
     }
-    auto ptrSplitLayerInput = dataInput->creatorLayer.lock();
+    auto ptrSplitLayerInput = dataInput->getCreatorLayer().lock();
     if (!ptrSplitLayerInput) {
         THROW_GNA_EXCEPTION << "Input layer for split/slice is unexpectedly absent";
     }
@@ -417,10 +418,10 @@ void GNAPlugin::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
             }
 
             padding = std::max(padding, LayerInfo(ptrSplitLayerOutput).paddingSize())
-                                                        * dataOutput->precision.size();
+                                                        * dataOutput->getPrecision().size();
             output_layer_size =
-                    InferenceEngine::details::product(begin(dataOutput->dims),
-                                                     end(dataOutput->dims)) * dataOutput->precision.size();
+                    InferenceEngine::details::product(begin(dataOutput->getTensorDesc().getDims()),
+                                                     end(dataOutput->getTensorDesc().getDims())) * dataOutput->getTensorDesc().getPrecision().size();
 
             if (ptrSplitLayerOutput->type == "AffineFilter") {
                 size_t aligned64_offset = ptrSplitLayerOutput->GetParamAsInt("offset");
@@ -435,8 +436,10 @@ void GNAPlugin::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
     layerInfoItem.reserved_size = split_size;
     layerInfoItem.splitInputLayer =
                     GNAPlugin::GNASplitLayer::SplitConnectedLayerInfo({ptrSplitLayerInput->type, 0,
-                                                                    InferenceEngine::details::product(begin(dataInput->dims),
-                                                                    end(dataInput->dims)) * dataInput->precision.size()});
+                                                                    InferenceEngine::details::product(
+                                                                        begin(dataInput->getTensorDesc().getDims()),
+                                                                        end(dataInput->getTensorDesc().getDims()))
+                                                                        * dataInput->getTensorDesc().getPrecision().size()});
     split_connection.emplace(id, layerInfoItem);
 }
 
@@ -472,13 +475,29 @@ void GNAPlugin::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto inputs = layer->insData.begin()->lock();
     auto outputs = *layer->outData.begin();
 
-    uint32_t num_feature_map_rows = FROM_IR_DIM(inputs, 1) / convolution._stride_x;
-    uint32_t num_feature_map_columns = FROM_IR_DIM(inputs, 3) * convolution._stride_x / num_feature_maps;
+    uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
+    uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
+    uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
+    uint32_t n_dim_in = FROM_IR_DIM(inputs, 4);
+    uint32_t w_dim_out = FROM_IR_DIM(outputs, 1);
+    uint32_t h_dim_out = FROM_IR_DIM(outputs, 2);
 
-    uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
-    uint32_t num_columns_in = FROM_IR_DIM(inputs, 3);
-    uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
-    uint32_t num_padding = ALIGN(convolution._kernel_x * num_feature_map_columns * num_feature_maps, 8)
+    if (w_dim_in == 1) {  // swap dimensions if needed to support swapped 1D case
+        swap(h_dim_in, w_dim_in);
+        swap(h_dim_out, w_dim_out);
+        swap(convolution._kernel_x, convolution._kernel_y);
+        swap(convolution._stride_x, convolution._stride_y);
+    }
+
+    uint32_t num_feature_map_rows = w_dim_in / convolution._stride_x;
+    uint32_t num_feature_map_columns = c_dim_in * convolution._stride_x / num_feature_maps;
+
+    uint32_t num_rows_in = w_dim_in;
+    uint32_t num_columns_in = c_dim_in;
+    uint32_t num_rows_out = w_dim_out;
+
+    // padding of convolution kernel to be multiply of 8
+    uint32_t num_conv_kernel_padding = ALIGN(convolution._kernel_x * num_feature_map_columns * num_feature_maps, 8)
                                             - convolution._kernel_x * num_feature_map_columns * num_feature_maps;
     void *ptr_inputs;
     void *ptr_outputs;
@@ -486,7 +505,7 @@ void GNAPlugin::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
     void *ptr_biases;
 
     // TODO: questionable why for biases that are not in IR we inventing precision
-    auto biasPrecision = convolution._biases ? convolution._biases->precision() : outputs->precision;
+    auto biasPrecision = convolution._biases ? convolution._biases->getTensorDesc().getPrecision() : outputs->getTensorDesc().getPrecision();
 
     dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
     auto &currentComponent = dnnComponentsForLayer.back().second;
@@ -494,21 +513,22 @@ void GNAPlugin::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
 #ifdef PLOT
     std::cout << "IR layer : " << std::left << std::setw(20) << layer->name << " convolution_" << dnnComponentsForLayer.size() - 1 << std::endl;
 #endif
-    auto num_input_padding = ALIGN(num_feature_maps * num_feature_map_columns * num_feature_map_rows, 8)
-                                                        -  num_feature_maps * num_feature_map_columns * num_feature_map_rows;
+    // have to pad input to let last kernel meets it's corresponding input
+    auto num_inputs = num_feature_maps * num_feature_map_columns * num_feature_map_rows + num_conv_kernel_padding;
+    auto num_input_padding = ALIGN(num_inputs, 8)-  num_inputs;
     auto num_filter_rows = convolution._kernel_x / convolution._stride_x;
     dnn.InitConvolutional1DComponent(currentComponent,
                             1,
-                            num_feature_maps *  num_feature_map_columns * num_feature_map_rows + num_input_padding,
+                            num_inputs + num_input_padding,
                             1,
                             num_rows_out * convolution._out_depth,
-                            inputs->precision.size(),
-                            outputs->precision.size(),
-                            convolution._weights->precision().size(),
+                            inputs->getTensorDesc().getPrecision().size(),
+                            outputs->getTensorDesc().getPrecision().size(),
+                            convolution._weights->getTensorDesc().getPrecision().size(),
                             biasPrecision.size(),
                             convolution._out_depth,
                             num_filter_rows,
-                            num_feature_maps * num_feature_map_columns * num_filter_rows + num_padding,
+                            num_feature_maps * num_feature_map_columns * num_filter_rows + num_conv_kernel_padding,
 
                             num_feature_maps,  // interesting - why this is so in gna_example
                             num_feature_map_rows,
@@ -525,10 +545,10 @@ void GNAPlugin::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
     num_feature_maps = convolution._out_depth;  // = number of filters
 
     size_t num_data_bytes_out =
-                        InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
-                                                                                * outputs->precision.size();
+                        InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
+                                                                                * outputs->getPrecision().size();
 
-    size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->precision.size();
+    size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size();
 
     auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
 
@@ -563,10 +583,10 @@ void GNAPlugin::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
         transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
     }
 
-    if (num_padding == 0) {
+    if (num_conv_kernel_padding == 0) {
         gnamem->readonly().push_local_ptr(ptr_weights, transposedWeights.data(), convolution._weights->byteSize(), 64);
     } else {
-        auto elementsIn = convolution._kernel_x * num_feature_map_columns + num_padding;
+        auto elementsIn = convolution._kernel_x * num_feature_map_columns + num_conv_kernel_padding;
         auto paddedWeights = elementsIn * convolution._out_depth;
         auto paddedWeightsSize = paddedWeights * convolution.precision.size();
         auto elements_in_row = convolution._kernel_x * num_feature_map_columns;
@@ -619,11 +639,11 @@ void GNAPlugin::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
                             num_rows_in,
                             num_columns_in,
                             num_rows_out,
-                            input->precision.size(),
-                            outputs->precision.size(),
+                            input->getPrecision().size(),
+                            outputs->getPrecision().size(),
                             // TODO: only fp32 and Int16 tested
-                            quantized == nullptr ? input->precision.size() : 2,
-                            quantized == nullptr ? input->precision.size() : 4,
+                            quantized == nullptr ? input->getPrecision().size() : 2,
+                            quantized == nullptr ? input->getPrecision().size() : 4,
                             quantized == nullptr ? 1 : quantized->_weights_quant.scale,
                             quantized == nullptr ? 1 : quantized->_dst_quant.scale,
                             ptr_inputs,
@@ -636,11 +656,11 @@ void GNAPlugin::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
     std::cout << "IR layer : " << std::left << std::setw(20) << layer->name << " diagonal_"<< dnnComponentsForLayer.size() - 1 << std::endl;
 #endif
 
-    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
-        * outputs->precision.size();
+    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
+        * outputs->getPrecision().size();
 
-    size_t num_data_bytes_in = InferenceEngine::details::product(begin(input->dims), end(input->dims))
-        * input->precision.size();
+    size_t num_data_bytes_in = InferenceEngine::details::product(begin(input->getDims()), end(input->getDims()))
+        * input->getPrecision().size();
 
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
@@ -678,10 +698,25 @@ void GNAPlugin::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto inputs = layer->insData.begin()->lock();
     auto outputs = *layer->outData.begin();
 
-    uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
-    uint32_t num_columns_in = FROM_IR_DIM(inputs, 3);
-    uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
-    uint32_t num_columns_out = FROM_IR_DIM(outputs, 3);
+    uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
+    uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
+    uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
+    uint32_t n_dim_in = FROM_IR_DIM(inputs, 4);
+    uint32_t w_dim_out = FROM_IR_DIM(outputs, 1);
+    uint32_t h_dim_out = FROM_IR_DIM(outputs, 2);
+    uint32_t c_dim_out = FROM_IR_DIM(outputs, 3);
+    uint32_t n_dim_out = FROM_IR_DIM(outputs, 4);
+
+    if (w_dim_in == 1) {  // swap dimensions if needed to support swapped 1D case
+        swap(h_dim_in, w_dim_in);
+        swap(h_dim_out, w_dim_out);
+        swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
+    }
+
+    uint32_t num_rows_in = w_dim_in;
+    uint32_t num_columns_in = c_dim_in;
+    uint32_t num_rows_out = w_dim_out;
+    uint32_t num_columns_out = c_dim_out;
     uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
 
     void *ptr_inputs;
@@ -707,8 +742,8 @@ void GNAPlugin::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
                             num_columns_in * num_rows_in ,
                             1,
                             num_columns_out * num_rows_out,
-                            inputs->precision.size(),
-                            outputs->precision.size(),
+                            inputs->getPrecision().size(),
+                            outputs->getPrecision().size(),
                             pooling._kernel[X_AXIS],
                             pooling._kernel[X_AXIS],
                             num_columns_in,
@@ -717,10 +752,10 @@ void GNAPlugin::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
                             ptr_inputs,
                             ptr_outputs);
 
-    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
-        * outputs->precision.size();
+    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
+        * outputs->getPrecision().size();
 
-    size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->precision.size();
+    size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->getPrecision().size();
 
     connectInput(layer, ptr_inputs, num_data_bytes_in);
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
@@ -755,8 +790,8 @@ void GNAPlugin::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
                           num_columns_in,
                           ALIGN(num_rows_out, 8),
                           num_columns_out,
-                          inputs->precision.size(),
-                          outputs->precision.size(),
+                          inputs->getPrecision().size(),
+                          outputs->getPrecision().size(),
                           quantized == nullptr ? 1 : quantized->_dst_quant.scale,
                           num_rows_out + num_padding_out,
                           num_columns_out,
@@ -764,9 +799,9 @@ void GNAPlugin::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
                           ptr_outputs);
 
     size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
-                                                            begin(outputs->dims), end(outputs->dims)), 8)
-                                                                                * outputs->precision.size();
-    size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, 8) * inputs->precision.size();
+                                                            begin(outputs->getDims()), end(outputs->getDims())), 8)
+                                                                                * outputs->getPrecision().size();
+    size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
 
     connectInput(layer, ptr_inputs, num_data_bytes_in);
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
@@ -778,17 +813,25 @@ void GNAPlugin::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
     if (concatLayer == nullptr) {
         return;
     }
-    if (concatLayer->insData.size() != 2) {
+    if (concatLayer->insData.size() < 2) {
         THROW_GNA_EXCEPTION << "Concat layer has unsupported number of incoming layers.";
     }
 
-    auto prevInput0 = concatLayer->insData[0].lock();
-    auto prevInput1 = concatLayer->insData[1].lock();
-    if (!prevInput0 || !prevInput1) {
-        THROW_GNA_EXCEPTION << "Input layer for concat is unexpectedly absent";
+    for (std::size_t layerIndex = 0; layerIndex < concatLayer->insData.size(); layerIndex++) {
+        auto input = concatLayer->insData[layerIndex].lock();
+        if (!input) {
+            THROW_GNA_EXCEPTION << "Input layer " << layerIndex << " for concat is unexpectedly absent";
+        }
     }
-    if (prevInput0->precision.size() != prevInput1->precision.size()) {
-        THROW_GNA_EXCEPTION << "Different precision for Concat input layers are not supported";
+
+    std::size_t layerPrecisionSize = concatLayer->insData[0].lock()->getPrecision().size();
+    for (std::size_t layerIndex = 0; layerIndex < concatLayer->insData.size(); layerIndex++) {
+        auto currentSize = concatLayer->insData[layerIndex].lock()->getPrecision().size();
+        if (layerPrecisionSize != currentSize) {
+            THROW_GNA_EXCEPTION << "Different precision for Concat Layer '" << concatLayer->name << "' input layers." <<
+                "input 0 precision is '" << concatLayer->insData[0].lock()->getPrecision().name() << "' but input " << layerIndex <<
+                " precision is '" << concatLayer->insData[layerIndex].lock()->getPrecision().name() << "'";
+        }
     }
 
     auto& concatLayerInfo = concat_connection.find(concatLayer->name)->second;
@@ -871,7 +914,7 @@ void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
         uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[0]);
         uint32_t num_columns_in = 1;
 
-        uint32_t num_rows_out = FROM_IR_DIM(outputs, outputs->getDims().size() - cropLayer->axis[0]);
+        uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - cropLayer->axis[0]);
         uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
 
         void *ptr_inputs;
@@ -890,9 +933,9 @@ void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
                                 num_rows_in + num_padding,
                                 num_columns_in,
                                 num_rows_out,
-                                inputs->precision.size(),
+                                inputs->getPrecision().size(),
                                 4,
-                                quantized == nullptr ? inputs->precision.size() : 2,
+                                quantized == nullptr ? inputs->getPrecision().size() : 2,
                                 4,
                                 quantized == nullptr ? 1 : quantized->_weights_quant.scale,
                                 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
@@ -904,10 +947,10 @@ void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
         size_t num_data_bytes_out =
         InferenceEngine::details::product(
-                                          begin(outputs->dims), end(outputs->dims)) * 4;
+                                          begin(outputs->getDims()), end(outputs->getDims())) * 4;
 
         size_t num_data_bytes_in = num_columns_in *
-                ALIGN(num_rows_in, 8) * inputs->precision.size();
+                ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
 
         connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
         connectOutput(layer, ptr_outputs, num_data_bytes_out);
@@ -940,16 +983,16 @@ void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     if (quantized) {
         if (eltwise._operation == EltwiseLayer::Sum) {
-            if (inputs4Bytes->precision.size() != 4) {
+            if (inputs4Bytes->getPrecision().size() != 4) {
                 std::swap(inputs4Bytes, inputs2Bytes);
                 biasesLayerIdx = 0;
             }
-            IE_ASSERT(inputs2Bytes->precision.size() == 2);
-            IE_ASSERT(inputs4Bytes->precision.size() == 4);
+            IE_ASSERT(inputs2Bytes->getPrecision().size() == 2);
+            IE_ASSERT(inputs4Bytes->getPrecision().size() == 4);
         } else {
             // for mul both inputs should be 2 bytes precision
-            IE_ASSERT(inputs2Bytes->precision.size() == 2);
-            IE_ASSERT(inputs4Bytes->precision.size() == 2);
+            IE_ASSERT(inputs2Bytes->getPrecision().size() == 2);
+            IE_ASSERT(inputs4Bytes->getPrecision().size() == 2);
         }
     }
 
@@ -971,11 +1014,11 @@ void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
                             num_rows_in + num_padding,
                             num_columns_in,
                             num_rows_out + num_padding,
-                            inputs2Bytes->precision.size(),
-                            outputs->precision.size(),
+                            inputs2Bytes->getPrecision().size(),
+                            outputs->getPrecision().size(),
                             // TODO: only fp32 and Int16 tested
-                            quantized == nullptr ? inputs2Bytes->precision.size() : 2,
-                            quantized == nullptr ? inputs4Bytes->precision.size() : 4,
+                            quantized == nullptr ? inputs2Bytes->getPrecision().size() : 2,
+                            quantized == nullptr ? inputs4Bytes->getPrecision().size() : 4,
                             quantized == nullptr ? 1 : quantized->_weights_quant.scale,
                             quantized == nullptr ? 1 : quantized->_dst_quant.scale,
                             ptr_inputs,
@@ -989,10 +1032,10 @@ void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
 #endif
 
     size_t num_data_bytes_out =
-        InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims)) * outputs->precision.size();
+        InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size();
 
     size_t num_data_bytes_in =
-        num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->precision.size();
+        num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->getPrecision().size();
 
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
@@ -1033,12 +1076,13 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
 
     auto inputs = layer->insData.begin()->lock();
     auto outputs = *layer->outData.begin();
-    auto inputPrecision = quantized ? Precision(Precision::I16) : inputs->precision;
+    auto inputPrecision = quantized ? Precision(Precision::I16) : inputs->getPrecision();
 
     uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
     uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
     uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
     uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
+    uint32_t num_padding_out = isDiag ? num_padding : 0;
 
     void *ptr_inputs;
     void *ptr_outputs;
@@ -1046,7 +1090,7 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
     void *ptr_biases;
 
     // TODO: questionable why for biases that are no in Model we inventing precision
-    auto biasPrecision = weightable._biases ? weightable._biases->precision() : outputs->precision;
+    auto biasPrecision = weightable._biases ? weightable._biases->getTensorDesc().getPrecision() : outputs->getPrecision();
 
     // layer without biases might be connected to functional layer without activations
     auto prevLayer = CNNNetPrevLayer(layer);
@@ -1071,10 +1115,10 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
     dnn.InitAffineComponent(currentComponent,
                             num_rows_in + num_padding,
                             num_columns_in,
-                            num_rows_out,
+                            num_rows_out + num_padding_out,
                             inputPrecision.size(),
-                            outputs->precision.size(),
-                            weightable._weights->precision().size(),
+                            outputs->getPrecision().size(),
+                            weightable._weights->getTensorDesc().getPrecision().size(),
                             biasPrecision.size(),
                             quantized == nullptr ? 1 : quantized->_weights_quant.scale,
                             quantized == nullptr ? 1 : quantized->_dst_quant.scale,
@@ -1084,10 +1128,10 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
                             ptr_biases,
                             isDiag);
 
-    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
-        * outputs->precision.size();
+    size_t num_data_bytes_out =
+        num_columns_in * (num_rows_out + num_padding_out) * outputs->getPrecision().size();
 
-    size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->precision.size();
+    size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->getPrecision().size();
 
     auto connectionInfo = connectInput(layer, useBiasConnection ? ptr_biases : ptr_inputs, num_data_bytes_in);
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
@@ -1107,7 +1151,7 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
         /**
          * TODO: weights transpose happened after quantisation might result in poor quality for in 8 - move this to passes
          */
-        if (weightable._weights->precision() == Precision::I8) {
+        if (weightable._weights->getTensorDesc().getPrecision() == Precision::I8) {
             THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute operation for 8 bit weights for layer: " << layer->name;
         }
 
@@ -1149,7 +1193,7 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
         }
     } else {
         if (transpose) {
-            THROW_GNA_EXCEPTION << "transpozed weights with non zero padding not yet supported";
+            THROW_GNA_EXCEPTION << "transposed weights with non zero padding not yet supported";
         }
         auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
         auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
@@ -1167,15 +1211,15 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
 
     if (weightable._biases) {
         gnamem->readonly().push_ptr(ptr_biases,
-                         weightable._biases->cbuffer().as<const void *>(),
-                         weightable._biases->byteSize(),
-                         64);
+            weightable._biases->cbuffer().as<const void *>(),
+            weightable._biases->byteSize(),
+            64);
     } else {
         // in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
         if (useBiasConnection) {
-            gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in, 64);
+            gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
         } else {
-            gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
+            gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
         }
     }
 }
@@ -1232,7 +1276,7 @@ void GNAPlugin::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
     uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
     uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
 
-    auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->precision() : outputs->precision;
+    auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
     dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
     auto &currentComponent = dnnComponentsForLayer.back().second;
 #ifdef  PLOT
@@ -1243,9 +1287,9 @@ void GNAPlugin::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
                             num_rows_in + num_padding,
                             num_columns_in,
                             num_rows_out,
-                            inputs->precision.size(),
-                            outputs->precision.size(),
-                            filterLayer->_weights->precision().size(),
+                            inputs->getPrecision().size(),
+                            outputs->getPrecision().size(),
+                            filterLayer->_weights->getTensorDesc().getPrecision().size(),
                             biasPrecision.size(),
                             quantized == nullptr ? 1 : quantized->_weights_quant.scale,
                             quantized == nullptr ? 1 : quantized->_dst_quant.scale,
@@ -1257,10 +1301,10 @@ void GNAPlugin::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     size_t num_data_bytes_out =
         InferenceEngine::details::product(
-            begin(outputs->dims), end(outputs->dims)) * 4;
+            begin(outputs->getDims()), end(outputs->getDims())) * 4;
 
     size_t num_data_bytes_in = num_columns_in *
-        ALIGN(num_rows_in, 8) * inputs->precision.size();
+        ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
 
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
@@ -1327,16 +1371,16 @@ void GNAPlugin::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
 #ifdef  PLOT
     gnalog() << "IR layer : " << std::left << std::setw(20) << layer->name << (" affine_") << dnnComponentsForLayer.size() - 1 << std::endl;
 #endif
-    auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->precision() : outputs->precision;
+    auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
     dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
     auto &currentComponent = dnnComponentsForLayer.back().second;
     dnn.InitAffineComponent(currentComponent,
                             num_rows_in + num_padding,
                             num_columns_in,
                             num_rows_out,
-                            inputs->precision.size(),
-                            outputs->precision.size(),
-                            filterLayer->_weights->precision().size(),
+                            inputs->getPrecision().size(),
+                            outputs->getPrecision().size(),
+                            filterLayer->_weights->getTensorDesc().getPrecision().size(),
                             biasPrecision.size(),
                             quantized == nullptr ? 1 : quantized->_weights_quant.scale,
                             quantized == nullptr ? 1 : quantized->_dst_quant.scale,
@@ -1348,10 +1392,10 @@ void GNAPlugin::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     size_t num_data_bytes_out =
                 InferenceEngine::details::product(
-                                        begin(outputs->dims), end(outputs->dims)) * 4;
+                                        begin(outputs->getDims()), end(outputs->getDims())) * 4;
 
     size_t num_data_bytes_in = num_columns_in *
-                            ALIGN(num_rows_in, 8) * inputs->precision.size();
+                            ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
 
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
@@ -1418,19 +1462,24 @@ void GNAPlugin::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     auto orientation = (num_cnn_rows_out > 0) ? kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
 
-    if (inputs->dims.size() == 4) {
-        num_columns = FROM_IR_DIM(inputs, 3) * FROM_IR_DIM(inputs, 1);
+    if (inputs->getDims().size() == 4) {
+        uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
+        uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
+        uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
+        uint32_t n_dim_in = FROM_IR_DIM(inputs, 4);
+
+        num_columns = (w_dim_in == 1) ? h_dim_in * c_dim_in : w_dim_in * c_dim_in;
         num_rows = 1;
     } else {
         num_columns = FROM_IR_DIM(inputs, 2);
         num_rows = FROM_IR_DIM(inputs, 1);
     }
 
-    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
-        * outputs->precision.size();
+    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
+        * outputs->getPrecision().size();
 
-    size_t num_data_bytes_in = InferenceEngine::details::product(begin(inputs->dims), end(inputs->dims))
-        * inputs->precision.size();
+    size_t num_data_bytes_in = InferenceEngine::details::product(begin(inputs->getDims()), end(inputs->getDims()))
+        * inputs->getPrecision().size();
 
     static caseless_unordered_map<std::string, DnnActivationType> supportedActivations = {
         {"sigmoid", kActSigmoid},
@@ -1460,7 +1509,7 @@ void GNAPlugin::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     intel_pwl_segment_t *ptr_pwl_segments_target = nullptr;
 
-    if (!inputs->precision.is_float()) {
+    if (!inputs->getPrecision().is_float()) {
         // TODO: generalize activation function code
         // now that scale factors are known, create PWL approximations to activation functions
         float input_scale_factor = dnn.OutputScaleFactor(prevComponent);
@@ -1499,8 +1548,8 @@ void GNAPlugin::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
                                      orientation,
                                      num_rows,
                                      num_columns,
-                                     inputs->precision.size(),
-                                     outputs->precision.size(),
+                                     inputs->getPrecision().size(),
+                                     outputs->getPrecision().size(),
                                      ptr_pwl_segments.size(),
                                      output_scale_factor,
                                      ptr_inputs,
@@ -1632,6 +1681,7 @@ GNAPluginNS::GNAPlugin::LayerType GNAPlugin::LayerTypeFromStr(const std::string
 }
 
 bool GNAPlugin::AreLayersSupported(ICNNNetwork& network, std::string& errMessage) {
+    IE_SUPPRESS_DEPRECATED_START
     CNNLayerSet inputLayers;
     InferenceEngine::InputsDataMap inputs;
     std::unordered_set<CNNLayer *> allLayers;
@@ -1640,9 +1690,12 @@ bool GNAPlugin::AreLayersSupported(ICNNNetwork& network, std::string& errMessage
     network.getInputsInfo(inputs);
     auto network_input_precision = inputs.begin()->second->getPrecision();
     auto batch_size = network.getBatchSize();
-    if (network_precision != Precision::FP32 && network_precision != Precision::FP16) {
-        errMessage = "The plugin does not support networks with " + std::string(network_precision.name()) + " format. Supported network precisions are FP32, "
-                                                                                                            "FP16\n";
+
+    if (network_precision != Precision::FP32 &&
+        network_precision != Precision::FP16 &&
+        network_precision != Precision::MIXED) {
+        errMessage = "The plugin does not support networks with " +
+            std::string(network_precision.name()) + " format. Supported network precisions are FP32, FP16, MIXED\n";
         return false;
     }
     if (network_input_precision != Precision::FP32 &&
@@ -1684,7 +1737,7 @@ bool GNAPlugin::AreLayersSupported(ICNNNetwork& network, std::string& errMessage
                                                     check_result =  false;
                                                 }
                                             }, false);
-
+    IE_SUPPRESS_DEPRECATED_END
     return check_result;
 }
 
@@ -1698,6 +1751,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     // network optimisation phases
     auto run_passes = [&] (CNNNetPtr network) {
         auto passes = make_shared<PassManager>(policy, network);
+        passes->registerPass<RemoveConstPass>();
         passes->registerPass<UnrollTIPass>();
         passes->registerPass<UnrollLSTMCellPass>();
         passes->registerPass<SubstitutePReluPass>();
@@ -1714,12 +1768,11 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
         passes->registerPass<InsertDiagonalLayerPass>();
         passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
         passes->registerPass<SubstituteScaleShiftBroadCastPass>();
-
         passes->run();
     };
 
     Config supported = Config({
-        {TargetDevice::eGNA, {Precision::FP32, Precision::FP16}, [&](InferenceEngine::ICNNNetwork &network) -> CNNNetworkPtr {
+        {TargetDevice::eGNA, {Precision::FP32, Precision::FP16, Precision::MIXED}, [&](InferenceEngine::ICNNNetwork &network) -> CNNNetworkPtr {
             if (gnaPrecision == Precision::I16) {
                 ModelQuantizer<QuantI16> q;
                 return q.quantize(network, run_passes, inputScaleFactors);
@@ -1734,7 +1787,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
         // TODO: need to have advanced precision matcher based on layers/biases
         {TargetDevice::eGNA, {Precision::MIXED}},
         {TargetDevice::eGNA, {Precision::I16}},
-        {TargetDevice::eCPU, {Precision::FP32}
+        {TargetDevice::eCPU, {Precision::FP32, Precision::MIXED}
 #define EMULATE_GNA_API_LAYERS
 #ifdef  EMULATE_GNA_API_LAYERS
             , [&](InferenceEngine::ICNNNetwork & network) {
@@ -1754,6 +1807,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     supported.setDefaultDevice(sw_fp32 ?  TargetDevice::eCPU : TargetDevice::eGNA);
 
     auto newNet = supported.find_configuration(network).convert(network);
+    auto inputLayers = CNNNetGetAllInputLayers(*newNet);
 
     auto sortedNet = CNNNetSortTopologicallyEx(*newNet, make_fuzed_order);
     std::vector<CNNLayerPtr> sortedNoMem;
@@ -1821,7 +1875,6 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
             THROW_GNA_EXCEPTION << "cannot infer topologies with more than one output";
         }
     }
-    outputDims = outputsDataMap.begin()->second->dims;
 
     for (auto && input : inputsDataMap) {
         get_ptr_inputs_global(input.first).resize(gna_lib_async_threads_num);
@@ -1834,6 +1887,12 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     for (auto layer = sortedNoMem.begin(); layer != sortedNoMem.end(); ++layer) {
         CreateLayerPrimitive(*layer);
     }
+    for (auto& inputLayer : inputLayers) {
+        auto layerInfo = LayerInfo(inputLayer);
+        if (layerInfo.isInput() && 0 == bytes_alllocated_for_input[inputLayer->name]) {
+            connectOutput(inputLayer, &get_ptr_inputs_global(inputLayer->name).front(), 0);
+        }
+    }
     if (dnnComponentsForLayer.empty()) {
         THROW_GNA_EXCEPTION << "No outputs found in dnn components structure";
     }
@@ -2055,16 +2114,17 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
     auto nnet = std::get<0>(*freeNnet).get();
     auto idx = static_cast<uint32_t>(std::distance(std::begin(nnets), freeNnet));
 
+    int inputNum = 0;
     for (auto &input : inputs) {
-        auto inputLayout = input.second->layout();
+        auto inputLayout = input.second->getTensorDesc().getLayout();
         if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != NCHW) {
             THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: "
-                                << input.second->layout();
+                                << input.second->getTensorDesc().getLayout();
         }
         if (inputLayout == NCHW) {
             inputLayout = NC;
         }
-        auto is2D = input.second->layout() == Layout::NC || input.second->layout() == Layout::CN;
+        auto is2D = input.second->getTensorDesc().getLayout() == Layout::NC || input.second->getTensorDesc().getLayout() == Layout::CN;
 
         if (!ptr_inputs_global_id.count(input.first)) {
             // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
@@ -2087,17 +2147,17 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
             THROW_GNA_EXCEPTION << "network not loaded : output orientation not set";
         }
 
-        auto dims = input.second->dims();
+        auto dims = input.second->getTensorDesc().getDims();
 
         ImportFrames(get_ptr_inputs_global(input.first)[idx],
                      input.second->cbuffer().as<float *>(),
-                     input.second->precision(),
-                     inputScaleFactors.size() <= idx ? 1.0 : inputScaleFactors[idx],
+                     input.second->getTensorDesc().getPrecision(),
+                     sw_fp32 ? 1.0f : inputScaleFactors[inputNum],
                      orientation_in[input.first],
-                     dims[dims.size() - 1],
-                     is2D ? dims[1] : dims[dims.size() - 1],
-                     is2D ? dims[0] : dims[0] * dims[1] * dims[2],
-                     is2D ? dims[0] : dims[0] * dims[1] * dims[2]);
+                     dims[0],
+                     is2D ? dims[dims.size() - 2] : dims[0],
+                     is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3],
+                     is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3]);
 
         bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
         if (((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
@@ -2106,11 +2166,12 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
             RotateFeatures(reinterpret_cast<uint8_t *>(get_ptr_inputs_global(input.first)[idx]),
                            gnadevice ? 2 : 4,
                            // TODO: only works for cnn4a and google command so far
-                           dims[dims.size() - 1],
-                           is2D ? dims[0] : dims[0] * dims[2],  // num_feature_vectors looks batch should be there
+                           dims[0],
+                           is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 3],  // num_feature_vectors looks batch should be there
                            num_rotate_rows,
                            num_rotate_columns);
         }
+        ++inputNum;
     }
 
     if (!gnadevice) {
@@ -2139,14 +2200,14 @@ void GNAPlugin::Wait(uint32_t idx) {
         dnn.WriteDnnText("Net_.txt", kDnnFloat);
         dnn.WriteInputAndOutputText();
     }
-    dnn.WriteInputAndOutputTextGNA(&std::get<0>(nnets.front())->obj);
+    dnn.WriteInputAndOutputTextGNA(&std::get<0>(nnets[idx])->obj);
 #endif
     if (result.size() != 1) {
         THROW_GNA_EXCEPTION << "Invalid number of outputs for infer request: " << result.size() << ",  only 1 supported";
     }
     auto & output = *result.begin()->second;
 
-    if (output.layout() == Layout::NC) {
+    if (output.getTensorDesc().getLayout() == Layout::NC) {
         // TODO: rotate can be incorporated with exporting - used only in unit tests so far
         // TODO: restore:
 //        if (orientation_out != kDnnInterleavedOrientation) {
@@ -2174,19 +2235,20 @@ void GNAPlugin::Wait(uint32_t idx) {
             }
         }
 
+        auto exportOutputDims = output.getTensorDesc().getDims();
         ExportScores(output.buffer(),
                      ptr_outputs_global[idx],
                      orientation_out,
-                     output.dims()[output.dims().size() - 1],
-                     output.dims()[1],
-                     output.dims()[0],
-                     output.dims()[0],
-                     output.dims()[0],
+                     exportOutputDims[0],
+                     exportOutputDims[exportOutputDims.size() - 2],
+                     exportOutputDims[exportOutputDims.size() - 1],
+                     exportOutputDims[exportOutputDims.size() - 1],
+                     exportOutputDims[exportOutputDims.size() - 1],
                      // TODO: create better getter consider multiple outputs case
                      gnadevice ? std::get<0>(nnets[idx])->obj.pLayers[output_layer_index].nBytesPerOutput : sizeof(float),
                      sizeof(float));
-    } else if (output.layout() != Layout::CN) {
-        THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC or Layout::CN. But was " << output.layout();
+    } else if (output.getTensorDesc().getLayout() != Layout::CN) {
+        THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC or Layout::CN. But was " << output.getTensorDesc().getLayout();
     }
 
     if (gnadevice) {
@@ -2198,9 +2260,10 @@ void GNAPlugin::Wait(uint32_t idx) {
         }
         num_infers++;
         if (f) {
-            for (int i = 0; i < output.dims()[1]; i++) {
-                for (int j = 0; j < output.dims()[0]; j++) {
-                    fprintf(f, "%d ", output.cbuffer().as<int32_t *>()[output.dims()[0] * i + j]);
+            auto dims = output.getTensorDesc().getDims();
+            for (int i = 0; i < dims[dims.size() - 2]; i++) {
+                for (int j = 0; j < dims[dims.size() - 1]; j++) {
+                    fprintf(f, "%d ", output.cbuffer().as<int32_t *>()[dims[dims.size() - 1] * i + j]);
                 }
                 fprintf(f, "\n");
             }
@@ -2209,14 +2272,15 @@ void GNAPlugin::Wait(uint32_t idx) {
 #endif
         ConvertToFloat(output.buffer(),
                        output.buffer(),
-                       output.dims()[0],
-                       output.dims()[1],
+                       output.getTensorDesc().getDims()[output.getTensorDesc().getDims().size() - 1],
+                       output.getTensorDesc().getDims()[output.getTensorDesc().getDims().size() - 2],
                        output_scale_factor);
 #ifdef PLOT
         if (f) {
-            for (int i = 0; i < output.dims()[1]; i++) {
-                for (int j = 0; j < output.dims()[0]; j++) {
-                    fprintf(f, "%.2f ", output.cbuffer().as<float *>()[output.dims()[0] * i + j]);
+            auto dims = output.getTensorDesc().getDims();
+            for (int i = 0; i < dims[dims.size() - 2]; i++) {
+                for (int j = 0; j < dims[dims.size() - 1]; j++) {
+                    fprintf(f, "%.2f ", output.cbuffer().as<float *>()[dims[dims.size() - 1] * i + j]);
                 }
                 fprintf(f, "\n");
             }
@@ -2257,7 +2321,8 @@ void GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::Bl
 Blob::Ptr GNAPlugin::GetOutputBlob(InferenceEngine::Precision precision) {
     // need to have intermediate blob for interleave conversion
     InferenceEngine::Blob::Ptr outputBlob;
-    outputBlob = make_blob_with_precision(precision, NC, outputDims);
+    auto outputDims = outputsDataMap.begin()->second->getTensorDesc().getDims();
+    outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, outputDims.size() == 2 ? NC : NCHW));
     outputBlob->allocate();
     return outputBlob;
 }
@@ -2266,8 +2331,8 @@ Blob::Ptr GNAPlugin::GetInputBlob(std::string name, InferenceEngine::Precision p
     InferenceEngine::Blob::Ptr inputBlob;
     // need to have intermediate blob for interleave conversion
     // TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
-    auto inputDims = inputsDataMap[name]->getDims();
-    inputBlob = make_blob_with_precision(precision, inputDims.size() == 2 ? NC : NCHW, inputDims);
+    auto inputDims = inputsDataMap[name]->getTensorDesc().getDims();
+    inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, inputDims.size() == 2 ? NC : NCHW));
     inputBlob->allocate();
     return inputBlob;
 }
@@ -2326,19 +2391,20 @@ InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::str
 
     num_bytes_per_output = header.output.element_size;
 
-
-    outputDims = SizeVector({header.output.elements_count / header.nGroup, header.nGroup});
-    auto inputDims = SizeVector({header.input.elements_count / header.nGroup, header.nGroup});
+    auto outputDims = SizeVector({header.nGroup, header.output.elements_count / header.nGroup});
+    auto inputDims = SizeVector({header.nGroup, header.input.elements_count / header.nGroup});
 
     inputsDataMap["input"] = std::make_shared<InputInfo>();
     inputsDataMap["input"]->setInputData(make_shared<Data>("input",
-                                                           inputDims,
-                                                           Precision::FP32,
-                                                           Layout::NC));
+                                                           TensorDesc(
+                                                                   Precision::FP32,
+                                                                   inputDims,
+                                                                   Layout::NC)));
     outputsDataMap["output"] = make_shared<Data>("output",
-                                                 outputDims,
-                                                 Precision::FP32,
-                                                 Layout::NC);
+                                                 TensorDesc(
+                                                         Precision::FP32,
+                                                         outputDims,
+                                                         Layout::NC));
 
     output_scale_factor = header.output.scaleFactor;
     inputScaleFactors.push_back(header.input.scaleFactor);
@@ -2375,20 +2441,20 @@ void GNAPlugin::Export(const std::string &fileName) {
     std::fstream outStream(fileName, ios_base::out | ios_base::binary);
 
     // TODO: nnet group parameter looks only used in application - so can we move this line into load network.
-    auto inputDims = inputsDataMap.begin()->second->getDims();
+    auto inputDims = inputsDataMap.begin()->second->getTensorDesc().getDims();
     if (inputDims.size() == 2) {
-        std::get<0>(nnets.front())->obj.nGroup = inputDims[1];
+        std::get<0>(nnets.front())->obj.nGroup = inputDims[0];
     }
 
     auto serial = GNAModelSerial(&std::get<0>(nnets.front())->obj,
                    {inputScaleFactors.front(),
                     ptr_inputs_global_storage.front()[0],
                     2,
-                    static_cast<uint32_t>(InferenceEngine::details::product(inputsDataMap.begin()->second->getDims()))},
+                    static_cast<uint32_t>(InferenceEngine::details::product(inputsDataMap.begin()->second->getTensorDesc().getDims()))},
                    {output_scale_factor,
                     ptr_outputs_global[0],
                     num_bytes_per_output,
-                    static_cast<uint32_t>(InferenceEngine::details::product(outputsDataMap.begin()->second->getDims()))})
+                    static_cast<uint32_t>(InferenceEngine::details::product(outputsDataMap.begin()->second->getTensorDesc().getDims()))})
         .SetInputRotation(dnn.num_rotate_rows, dnn.num_rotate_columns);
 
     for (auto && memoryConnection : memory_connection) {
@@ -2460,12 +2526,16 @@ void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
         if (inputScaleFactors.size() <= scaleForInput) {
             inputScaleFactors.resize(scaleForInput + 1, 1.f);
         }
-        inputScaleFactors[scaleForInput] = std::stod(value);
+        inputScaleFactors[scaleForInput] = InferenceEngine::CNNLayer::ie_parse_float(value);
     });
 
     if (inputScaleFactors.empty()) {
         if_set(GNA_CONFIG_KEY(SCALE_FACTOR), [&] {
-            inputScaleFactors.push_back(std::stod(value));
+            auto scaleFactor = InferenceEngine::CNNLayer::ie_parse_float(value);
+            if (fp32eq(scaleFactor, 0.0f)) {
+                THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported";
+            }
+            inputScaleFactors.push_back(scaleFactor);
         });
     }
 
@@ -2573,6 +2643,10 @@ void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
             THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
         }
     });
+
+    if (sw_fp32 && gna_lib_async_threads_num > 1) {
+        THROW_GNA_EXCEPTION << "GNA plugin not support async mode on GNA_SW_FP32!";
+    }
 }
 
 /**
@@ -2616,7 +2690,7 @@ void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
 intel_dnn_component_t * GNAPlugin::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
     if (current->insData.empty()) return nullptr;
 
-    auto prev_layer = current->insData.front().lock()->creatorLayer.lock();
+    auto prev_layer = current->insData.front().lock()->getCreatorLayer().lock();
 
     return findDnnLayer(prev_layer);
 }
index f0d9fc3..e4ae5cc 100644 (file)
@@ -475,19 +475,6 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
                      uint32_t num_bytes_per_element_input,
                      uint32_t num_bytes_per_element);
 
-    friend void GNAPluginNS::ConvertToInt16(int16_t *ptr_dst,
-                    const float *ptr_src,
-                    const uint32_t num_rows,
-                    const uint32_t num_columns,
-                    const float scale_factor);
-    friend void GNAPluginNS::ConvertToFloat(float *ptr_dst,
-                    int32_t *ptr_src,
-                    const uint32_t num_rows,
-                    const uint32_t num_columns,
-                    const float scale_factor);
-
-    friend int16_t GNAPluginNS::ConvertFloatToInt16(float src);
-
     template <typename T, typename U>
     void copyInputData(T *dst,
                     const U *src,
@@ -509,8 +496,6 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     intel_dnn_component_t * find_first_unused_input(InferenceEngine::CNNLayerPtr current);
     std::map<std::string, int> bytes_alllocated_for_input;
     InferenceEngine::InputsDataMap inputsDataMap;
-
-    InferenceEngine::SizeVector outputDims;
     InferenceEngine::OutputsDataMap outputsDataMap;
 };
 }  // namespace GNAPluginNS
index efe3490..528e789 100644 (file)
@@ -46,6 +46,7 @@ class Config {
     }
 
     inline Endpoint find_configuration(InferenceEngine::ICNNNetwork &network) {
+        IE_SUPPRESS_DEPRECATED_START
         auto device = network.getTargetDevice();
         auto targetDevice = device == InferenceEngine::TargetDevice::eDefault ? _defaultDevice : device;
         auto res = std::find_if(std::begin(supported), std::end(supported), [&](Endpoint &e) {
@@ -62,7 +63,7 @@ class Config {
                                << InferenceEngine::TargetDeviceInfo::name(network.getTargetDevice())
                                << ".\nSupported target device: " << InferenceEngine::TargetDeviceInfo::name(InferenceEngine::TargetDevice::eGNA);
         }
-
+        IE_SUPPRESS_DEPRECATED_END
         return *res;
     }
 };
index 6ea717e..82d2d8c 100644 (file)
@@ -13,7 +13,7 @@ using namespace GNAPluginNS;
 
 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
     try {
-        plugin = make_ie_compatible_plugin({2, 0, "GNAPlugin", "GNAPlugin"}, make_shared<GNAPluginInternal>());
+        plugin = make_ie_compatible_plugin({2, 1, "GNAPlugin", "GNAPlugin"}, make_shared<GNAPluginInternal>());
         return OK;
     }
     catch (std::exception &ex) {
index 130a094..dab1e15 100644 (file)
@@ -35,6 +35,10 @@ class GNAPluginInternal  : public InferenceEngine::InferencePluginInternal {
         return plg->GetName();
     }
 
+    InferenceEngine::ICNNNetwork&  RemoveConstLayers(InferenceEngine::ICNNNetwork &network) override {
+        return network;
+    }
+
     /**
      * @deprecated Use the version with config parameter
      */
index 416b590..b68be9e 100644 (file)
@@ -42,7 +42,7 @@ inline GnaLog & gnawarn() {
 #ifdef __PRETTY_FUNCTION__
 #undef __PRETTY_FUNCTION__
 #endif
-#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+#ifdef _WIN32
 # define __PRETTY_FUNCTION__ __FUNCSIG__
 #else
 # define __PRETTY_FUNCTION__ __FUNCTION__
index 3f3fcf3..8637545 100644 (file)
@@ -376,7 +376,9 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
             }
         } else {
                 if (LayerInfo(*cnnLayer).isActivation() ||
-                        LayerInfo(*cnnLayer).isCopy()) {
+                    LayerInfo(*cnnLayer).isCopy() ||
+                    LayerInfo(*cnnLayer).isReshape() ||
+                    LayerInfo(*cnnLayer).isPermute()) {
                 // precision of activation layers is always equal input precision
                 for (auto &&outData : cnnLayer->outData) {
                     outData->setPrecision(Desc::mandatory().getInputPrecision());
index 8e704fd..6f46c31 100644 (file)
@@ -14,6 +14,7 @@
 #define MAX_VAL_1B_WEIGHT 127
 #define MAX_VAL_2B_WEIGHT 16384
 #define MAX_VAL_2B_FEAT 16384
+#define MAX_VAL_4B_BIAS 1073741824
 #ifdef DEBUG
 #define QUANTWARNING(...) (fprintf(stderr, __VA_ARGS__))
 #else
index b6d3b03..aa8e458 100644 (file)
@@ -287,7 +287,19 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
         }
 
         if (!sourceQuantParams) {
-            THROW_GNA_EXCEPTION << "Concat quantization for this case need to be implemented!!! \n";
+            auto in0LayerInfo = LayerInfo(in0);
+            auto in1LayerInfo = LayerInfo(in1);
+            if (in0LayerInfo.isActivation()) {
+                quantParams0->_weights_quant = quantParams1->_dst_quant;
+                quantParams0->_dst_quant = quantParams1->_dst_quant;
+                sourceQuantParams = quantParams1;
+            } else if (in1LayerInfo.isActivation()) {
+                quantParams1->_weights_quant = quantParams0->_dst_quant;
+                quantParams1->_dst_quant = quantParams0->_dst_quant;
+                sourceQuantParams = quantParams0;
+            } else {
+                THROW_GNA_EXCEPTION << "Concat quantization for this case need to be implemented!!! \n";
+            }
         }
 
         if (!fp32eq(quantParams0->_dst_quant.scale, quantParams1->_dst_quant.scale) && concatIdxToUpdate == -1) {
@@ -368,6 +380,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
             InferenceEngine::getInjectedData<QuantizedLayerParams>(*InferenceEngine::CNNNetPrevLayer(wl).get());
 
         auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*wl);
+        quant->_src_quant.scale = quantDataForInputLayer->_dst_quant.scale;
         // TODO: pass 8 bits somehow
         if (quant->_weights_quant.scale == 1.0f) {
             size_t scaleRange = 0;
@@ -381,6 +394,14 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
             quant->_weights_quant.scale =
                 ScaleFactorForQuantization(wl->_weights->buffer().as<float *>(), scaleRange, wl->_weights->size());
 
+            if (wl->_biases) {
+                quant->_bias_quant.scale = ScaleFactorForQuantization(wl->_biases->buffer().as<float *>(),
+                                                                      MAX_VAL_4B_BIAS,
+                                                                      wl->_biases->size());
+                quant->_bias_quant.scale = std::min(quant->_weights_quant.scale * quant->_src_quant.scale, quant->_bias_quant.scale);
+                quant->_weights_quant.scale = quant->_bias_quant.scale / quant->_src_quant.scale;
+            }
+
             // TODO: findout why ???
             if (weightsSize == 1) {
                 quant->_weights_quant.scale *= MAX_OUT_MULTIPLIER;
@@ -398,8 +419,6 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
         }
 
 
-        quant->_src_quant.scale = quantDataForInputLayer->_dst_quant.scale;
-
         double tmp_dst_quant_scale = quant->_weights_quant.scale * quantDataForInputLayer->_dst_quant.scale;
 
         if (weightsSize == 1 &&
index 91eb189..fdb1b0f 100644 (file)
@@ -8,16 +8,16 @@ file(GLOB SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
 )
 
-add_library(${TARGET_NAME} SHARED ${SOURCES})
+file(GLOB_RECURSE HEADERS
+        ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
+
+ie_add_plugin(NAME ${TARGET_NAME}
+              DEVICE_NAME "HETERO"
+              SOURCES ${SOURCES} ${HEADERS}
+              VERSION_DEFINES_FOR hetero_plugin.cpp)
 
 target_include_directories(${TARGET_NAME} PRIVATE
     "${IE_MAIN_SOURCE_DIR}/src/inference_engine"
 )
 
-target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_PLUGIN)
-
-target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
-
-set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
-
-add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
+target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ade)
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ade_util.hpp"
+#include "hetero_ade_util.hpp"
 
 #include <unordered_map>
 #include <utility>
 
 using namespace InferenceEngine;
 
-IE_SUPPRESS_DEPRECATED_START
-
-IHeteroDeviceLoader::~IHeteroDeviceLoader() {
-}
-
-IE_SUPPRESS_DEPRECATED_START
-
 StatusCode HeteroDeviceLoader::LoadNetwork(
     const std::string &device,
     IExecutableNetwork::Ptr &ret,
@@ -7,6 +7,8 @@
 #include "hetero_async_infer_request.hpp"
 #include "ie_util_internal.hpp"
 #include "hetero_device_loader.hpp"
+#include "hetero_fallback_policy.hpp"
+#include "hetero_graph_splitter.hpp"
 
 #include <vector>
 #include <map>
 #include <array>
 
 #include <ie_plugin_dispatcher.hpp>
-#include <ie_graph_splitter.hpp>
-#include "fallback_policy.hpp"
 #include "details/caseless.hpp"
 #include "ie_plugin_config.hpp"
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "cpp_interfaces/base/ie_inference_plugin_api.hpp"
 #include "cpp_interfaces/impl/ie_plugin_internal.hpp"
 #include "hetero/hetero_plugin_config.hpp"
@@ -298,8 +299,22 @@ void HeteroExecutableNetwork::load(InferenceEngine::ICNNNetwork &network_,
     for (auto &&d : descs) {
         IExecutableNetwork::Ptr ret;
         ResponseDesc resp;
+
+        InputsDataMap subnetworkInputs;
+        d._clonedNetwork->getInputsInfo(subnetworkInputs);
+        bool isInputSubnetwork = (subnetworkInputs.end() != std::find_first_of(
+            subnetworkInputs.begin(), subnetworkInputs.end(),
+            externalInputsData.begin(), externalInputsData.end(),
+            [] (const InputsDataMap::value_type& lhs, const InputsDataMap::value_type& rhs) {
+                return lhs.first == rhs.first;
+            }));
+
+        auto cfg = config;
+        cfg[IE_INTERNAL_CONFIG_KEY(SUBNETWORK_WITH_NETWORK_INPUTS)] = isInputSubnetwork
+                                                                    ? CONFIG_VALUE(YES)
+                                                                    : CONFIG_VALUE(NO);
         IE_SUPPRESS_DEPRECATED_START
-        StatusCode status = d._deviceLoader->LoadNetwork(d._device, ret, *d._clonedNetwork, config, &resp);
+        StatusCode status = d._deviceLoader->LoadNetwork(d._device, ret, *d._clonedNetwork, cfg, &resp);
         IE_SUPPRESS_DEPRECATED_END
         if (status != OK) {
             THROW_IE_EXCEPTION << resp.msg;
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "fallback_policy.hpp"
+#include "hetero_fallback_policy.hpp"
 #include "hetero_device_loader.hpp"
 #include "details/ie_cnn_network_iterator.hpp"
 #include "ie_layers.h"
 
 using namespace InferenceEngine;
 
-IE_SUPPRESS_DEPRECATED_START
-
-QueryNetworkResult::QueryNetworkResult() : rc(OK) {
-}
-
-const QueryNetworkResult & QueryNetworkResult::operator= (const QueryNetworkResult & q) {
-    supportedLayers = q.supportedLayers;
-    supportedLayersMap = q.supportedLayersMap;
-    rc = q.rc;
-    resp = q.resp;
-
-    return *this;
-}
-
-QueryNetworkResult & QueryNetworkResult::operator= (QueryNetworkResult && q) {
-    supportedLayers = q.supportedLayers;
-    supportedLayersMap = q.supportedLayersMap;
-    rc = q.rc;
-    resp = q.resp;
-
-    return *this;
-}
-
-QueryNetworkResult::QueryNetworkResult(const QueryNetworkResult & instance) :
-    supportedLayers(instance.supportedLayers),
-    supportedLayersMap(instance.supportedLayersMap),
-    rc(instance.rc),
-    resp(instance.resp) {
-}
-
-QueryNetworkResult::~QueryNetworkResult() {
-}
-
-IE_SUPPRESS_DEPRECATED_END
-
 void dla_layer_colorer(const CNNLayerPtr layer,
                        ordered_properties &printed_properties,
                        ordered_properties &node_properties) {
@@ -2,7 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ie_graph_splitter.hpp"
+#include "hetero_graph_splitter.hpp"
+#include "hetero_ade_util.hpp"
 
 #include <cassert>
 #include <unordered_map>
@@ -11,8 +12,6 @@
 #include <vector>
 #include <string>
 
-#include <ade_util.hpp>
-
 #include <ade/typed_graph.hpp>
 #include <ade/helpers/subgraphs.hpp>
 
@@ -24,7 +24,7 @@ using LayersSet = std::unordered_set<CNNLayerPtr>;
 /// @param checkers - list of supported plugins
 ///
 /// @return list of subgraphs
-INFERENCE_ENGINE_API_CPP(std::vector<LayersSet>)
+std::vector<LayersSet>
 splitGraph(ICNNNetwork& network,
            const std::vector<std::string>& plugins);
 
@@ -32,7 +32,7 @@ splitGraph(ICNNNetwork& network,
 /// refences between subgraps
 ///
 /// @param subgraphs - list of subgraphs
-INFERENCE_ENGINE_API_CPP(void)
+void
 sortSubgraphs(std::vector<LayersSet>& subgraphs);
 
 }  // namespace InferenceEngine
index e76f717..ecd2962 100644 (file)
@@ -2,7 +2,155 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <hetero/hetero_plugin.hpp>
+#include "ie_metric_helpers.hpp"
+#include "hetero_plugin.hpp"
+#include <memory>
+#include <vector>
+#include <map>
+#include <string>
+#include "ie_plugin_config.hpp"
+#include "hetero/hetero_plugin_config.hpp"
+#include <cpp_interfaces/base/ie_plugin_base.hpp>
+#include "hetero_plugin_base.hpp"
+#include "hetero_executable_network.hpp"
+#include "hetero_fallback_policy.hpp"
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::PluginConfigParams;
+using namespace InferenceEngine::HeteroConfigParams;
+using namespace HeteroPlugin;
+using namespace std;
+
+static Version heteroPluginDescription = {
+        {2, 1},  // plugin API version
+        CI_BUILD_NUMBER,
+        "heteroPlugin"  // plugin description message
+};
+
+void Engine::GetVersion(const Version *&versionInfo)noexcept {
+    versionInfo = &heteroPluginDescription;
+}
+
+Engine::Engine() {
+    _pluginName = "HETERO";
+    _config[InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = "YES";
+    _config[KEY_HETERO_DUMP_GRAPH_DOT] = NO;
+}
+
+InferenceEngine::ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const ICore * core, InferenceEngine::ICNNNetwork &network,
+                                                                           const std::map<std::string, std::string> &config) {
+    // TODO(amalyshe) do we need here verification of input precisions?
+    std::map<std::string, std::string> tconfig;
+    tconfig = config;
+
+    // we must not override the parameter, but need to copy everything from plugin config
+    for (auto && c : _config) {
+        if (tconfig.find(c.first) == tconfig.end()) {
+            tconfig[c.first] = c.second;
+        }
+    }
+
+    return std::make_shared<HeteroExecutableNetwork>(network, core, tconfig, _extensions, _deviceLoaders, error_listener);
+}
+
+void Engine::SetConfig(const std::map<std::string, std::string> &config) {
+    if (_config.find("TARGET_FALLBACK") == _config.end()) {
+        _config["TARGET_FALLBACK"] = "";
+    }
+
+    for (auto &&i : config) {
+        _config[i.first] = i.second;
+    }
+}
+
+IE_SUPPRESS_DEPRECATED_START
+void Engine::SetDeviceLoader(const std::string &device,
+                             IHeteroDeviceLoader::Ptr pLoader) {
+    _deviceLoaders[device] = pLoader;
+}
+IE_SUPPRESS_DEPRECATED_END
+
+void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
+    _extensions.push_back(extension);
+}
+
+void Engine::SetAffinity(InferenceEngine::ICNNNetwork &network,
+                         const std::map<std::string, std::string> &config) {
+    FallbackPolicy fbPolicy(_deviceLoaders, _config[KEY_HETERO_DUMP_GRAPH_DOT] == YES, GetCore());
+    fbPolicy.init(_config["TARGET_FALLBACK"], config, _extensions);
+    fbPolicy.setAffinity(fbPolicy.getAffinities(config, network), network);
+}
+
+void Engine::SetLogCallback(IErrorListener &listener) {
+    error_listener = &listener;
+
+    IE_SUPPRESS_DEPRECATED_START
+    for (auto& device_loader : _deviceLoaders)
+        device_loader.second->SetLogCallback(*error_listener);
+    IE_SUPPRESS_DEPRECATED_END
+}
+
+void Engine::QueryNetwork(const ICNNNetwork &network, const std::map<std::string, std::string>& config, QueryNetworkResult &res) const {
+    auto _deviceLoaders_ = _deviceLoaders;
+
+    auto it = _config.find(KEY_HETERO_DUMP_GRAPH_DOT);
+    IE_ASSERT(it !=  _config.end());
+    FallbackPolicy fbPolicy(_deviceLoaders_, it->second == YES, GetCore());
+    it = config.find("TARGET_FALLBACK");
+    if (it == config.end()) {
+        it = _config.find("TARGET_FALLBACK");
+
+        if (it == _config.end()) {
+            THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
+        }
+    }
+    fbPolicy.init(it->second, config, _extensions);
+    res = fbPolicy.getAffinities(config, network);
+}
+
+Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter> & options) const {
+    if (METRIC_KEY(SUPPORTED_METRICS) == name) {
+        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, std::vector<std::string>{
+            METRIC_KEY(SUPPORTED_METRICS),
+            METRIC_KEY(SUPPORTED_CONFIG_KEYS)});
+    } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
+        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, std::vector<std::string>{
+            HETERO_CONFIG_KEY(DUMP_GRAPH_DOT),
+            "TARGET_FALLBACK",
+            CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)});
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported Plugin metric: " << name;
+    }
+}
+
+Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter> & options) const {
+    if (name == HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)) {
+        auto it = _config.find(KEY_HETERO_DUMP_GRAPH_DOT);
+        IE_ASSERT(it != _config.end());
+        bool dump = it->second == YES;
+        return { dump };
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported config key: " << name;
+    }
+}
+
+namespace HeteroPlugin {
+
+InferenceEngine::StatusCode CreateHeteroPluginEngine(
+        InferenceEngine::IInferencePlugin *&plugin,
+        InferenceEngine::ResponseDesc *resp) noexcept {
+    try {
+        plugin = new HeteroPluginBase<Engine>(
+                {{2, 1}, "heteroPlugin", "heteroPlugin"},
+                std::make_shared<Engine>());
+        return OK;
+    }
+    catch (std::exception &ex) {
+        return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
+    }
+}
+
+}  // namespace HeteroPlugin
 
 INFERENCE_PLUGIN_API(InferenceEngine::StatusCode) CreatePluginEngine(
         InferenceEngine::IInferencePlugin *&plugin,
index f3514ed..f4b34ad 100644 (file)
@@ -6,6 +6,10 @@ set (TARGET_NAME "inference_engine")
 
 if (WIN32)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")
+elseif(ENABLE_LTO)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto")
+  set(CMAKE_AR  "gcc-ar")
+  set(CMAKE_RANLIB "gcc-ranlib")
 endif()
 
 file (GLOB LIBRARY_SRC
@@ -18,7 +22,6 @@ file (GLOB LIBRARY_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/*.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/built-in/*.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/const_infer/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/hetero/*.cpp
       )
 
 file (GLOB LIBRARY_HEADERS
@@ -31,7 +34,6 @@ file (GLOB LIBRARY_HEADERS
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/base/*.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/impl/*.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/interface/*.hpp
-       ${CMAKE_CURRENT_SOURCE_DIR}/hetero/*.hpp
       )
 
 if( (NOT DEFINED ENABLE_SSE42) OR ENABLE_SSE42)
@@ -43,21 +45,39 @@ if( (NOT DEFINED ENABLE_SSE42) OR ENABLE_SSE42)
            ${LIBRARY_HEADERS}
            ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.hpp
           )
+
+    file (GLOB SSE_SRC
+           ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.cpp
+          )
+    file (GLOB SSE_HEADERS
+           ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.hpp
+          )
+
     include_directories(${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42)
+
     if (WIN32)
-        set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/blob_transform_sse42.cpp"
-                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_data_sse42.cpp"
-                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp" PROPERTIES COMPILE_FLAGS /arch:SSE2)
+        if("${CMAKE_CXX_COMPILER_ID}" STREQUAL MSVC)
+            set_source_files_properties(${SSE_SRC}
+                PROPERTIES COMPILE_FLAGS /arch:SSE4.2
+                )       
+        elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL Intel)
+            set_source_files_properties(${SSE_SRC}
+                PROPERTIES COMPILE_FLAGS /arch:SSE4.2 /QxSSE4.2 /Qvc14
+                )       
+        elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL Clang)
+            set_source_files_properties(${SSE_SRC}
+                PROPERTIES COMPILE_FLAGS -msse4.2
+                )       
+        endif()
     else()
-        set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/blob_transform_sse42.cpp"
-                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_data_sse42.cpp"
-                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp" PROPERTIES COMPILE_FLAGS -msse4.2)
+        set_source_files_properties(${SSE_SRC}
+                PROPERTIES COMPILE_FLAGS -msse4.2
+                )
     endif()
     add_definitions(-DHAVE_SSE=1)
 endif()
 
 addVersionDefines(ie_version.cpp CI_BUILD_NUMBER)
-addVersionDefines(hetero/hetero_plugin.cpp CI_BUILD_NUMBER)
 
 set (PUBLIC_HEADERS_DIR "${IE_MAIN_SOURCE_DIR}/include")
 
@@ -82,7 +102,7 @@ add_library(${TARGET_NAME} SHARED
             ${PUBLIC_HEADERS})
 set_ie_threading_interface_for(${TARGET_NAME})
 
-target_link_libraries(${TARGET_NAME} PRIVATE fluid ngraph ade ${INTEL_ITT_LIBS} pugixml PUBLIC ${CMAKE_DL_LIBS})
+target_link_libraries(${TARGET_NAME} PRIVATE fluid ngraph ${INTEL_ITT_LIBS} pugixml PUBLIC ${CMAKE_DL_LIBS})
 
 if(WIN32)
     #To disable min/max macro in windows.h
@@ -91,8 +111,7 @@ endif()
 
 # Properties->C/C++->General->Additional Include Directories
 target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
-                                          PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
-                                                  "${IE_MAIN_SOURCE_DIR}/src/dumper")
+                                          PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
 
 target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src")
 target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${IE_MAIN_SOURCE_DIR}/thirdparty/ngraph/src")
@@ -102,6 +121,10 @@ if(ENABLE_MKL_DNN)
     target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/cpu/xbyak")
 endif()
 
+if(ENABLE_UNICODE_PATH_SUPPORT)
+    target_compile_definitions(${TARGET_NAME} PUBLIC ENABLE_UNICODE_PATH_SUPPORT)
+endif()
+
 set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
 
 # Static library used for unit tests which are always built
@@ -132,6 +155,10 @@ if(WIN32)
     target_compile_definitions(${TARGET_NAME}_s PRIVATE -DNOMINMAX)
 endif()
 
+if(ENABLE_UNICODE_PATH_SUPPORT)
+    target_compile_definitions(${TARGET_NAME}_s PUBLIC ENABLE_UNICODE_PATH_SUPPORT)
+endif()
+
 set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s)
 
 target_link_libraries(${TARGET_NAME}_s PRIVATE fluid
@@ -145,7 +172,7 @@ target_link_libraries(${TARGET_NAME}_s PRIVATE fluid
 add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
 
 ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
-                    POSSIBLE_PLUGINS clDNNPlugin dliaPlugin GNAPlugin MKLDNNPlugin myriadPlugin)
+                    POSSIBLE_PLUGINS MultiDevicePlugin HeteroPlugin clDNNPlugin dliaPlugin GNAPlugin MKLDNNPlugin myriadPlugin HDDLPlugin)
 
 # export targets
 export(TARGETS ${TARGET_NAME} NAMESPACE IE:: FILE "${CMAKE_BINARY_DIR}/targets.cmake")
index 08a356d..28f4832 100644 (file)
@@ -6,7 +6,9 @@
 
 #include <utility>
 #include <memory>
+#include <vector>
 #include "inference_engine.hpp"
+#include "ie_memcpy.h"
 
 template <InferenceEngine::Precision::ePrecision precision>
 class BlobFactory {
@@ -82,8 +84,39 @@ InferenceEngine::Blob::Ptr make_custom_blob(Args &&... args) {
 }
 
 /**
+ * Create blob with custom precision
+ * @tparam T - type off underlined elements
+ * @param args
+ * @return
+ */
+template <class T>
+InferenceEngine::Blob::Ptr make_custom_blob(InferenceEngine::Layout layout, InferenceEngine::SizeVector size) {
+    return InferenceEngine::make_shared_blob<T>(InferenceEngine::TensorDesc(
+            InferenceEngine::Precision::fromType<T>(),
+            size,
+            layout));
+}
+
+/**
  * @brief Creates a TBlob<> object from a Data node
  * @param Data reference to a smart pointer of the Data node
  * @return Smart pointer to TBlob<> with the relevant C type to the precision of the data node
  */
 INFERENCE_ENGINE_API_CPP(InferenceEngine::Blob::Ptr) CreateBlobFromData(const InferenceEngine::DataPtr &data);
+
+/**
+ * Copy data from vector to Blob
+ * @tparam T type of data in vector
+ * @return
+ */
+template <typename T> void CopyVectorToBlob(const InferenceEngine::Blob::Ptr outputBlob, const std::vector<T>& inputVector) {
+    if (outputBlob->size() != inputVector.size())
+        THROW_IE_EXCEPTION << "Size mismatch between dims and vector";
+    if (outputBlob->element_size() != sizeof(T))
+        THROW_IE_EXCEPTION << "Element size mismatch between blob and vector";
+    ie_memcpy(
+            outputBlob->buffer().as<T *>(),
+            outputBlob->byteSize(),
+            &inputVector[0],
+            inputVector.size() * sizeof(T));
+}
index 2e4fb74..0873ff1 100644 (file)
@@ -158,6 +158,92 @@ static inline void blob_copy_4d(Blob::Ptr src, Blob::Ptr dst) {
     }
 }
 
+template <InferenceEngine::Precision::ePrecision PRC>
+static void blob_copy_5d_t(Blob::Ptr src, Blob::Ptr dst) {
+    using data_t = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
+
+    const auto &src_blk_desc = src->getTensorDesc().getBlockingDesc();
+    const auto &dst_blk_desc = dst->getTensorDesc().getBlockingDesc();
+
+    data_t *src_ptr = src->buffer().as<data_t*>() + src_blk_desc.getOffsetPadding();
+    data_t *dst_ptr = dst->buffer().as<data_t*>() + dst_blk_desc.getOffsetPadding();
+
+    SizeVector dims = src->getTensorDesc().getDims();  // == dst's dims
+
+    const size_t N = dims[0];
+    const size_t C = dims[1];
+    const size_t D = dims[2];
+    const size_t H = dims[3];
+    const size_t W = dims[4];
+
+    const Layout src_l = src->getTensorDesc().getLayout();
+    const auto &src_strides = src_blk_desc.getStrides();
+    const auto N_src_stride = src_strides[0];
+    const auto C_src_stride = src_l == NDHWC ? src_strides[4] : src_strides[1];
+    const auto D_src_stride = src_l == NDHWC ? src_strides[1] : src_strides[2];
+    const auto H_src_stride = src_l == NDHWC ? src_strides[2] : src_strides[3];
+    const auto W_src_stride = src_l == NDHWC ? src_strides[3] : src_strides[4];
+
+    const Layout dst_l = dst->getTensorDesc().getLayout();
+    const auto &dst_strides = dst_blk_desc.getStrides();
+    const auto N_dst_stride = dst_strides[0];
+    const auto C_dst_stride = dst_l == NDHWC ? dst_strides[4] : dst_strides[1];
+    const auto D_dst_stride = dst_l == NDHWC ? dst_strides[1] : dst_strides[2];
+    const auto H_dst_stride = dst_l == NDHWC ? dst_strides[2] : dst_strides[3];
+    const auto W_dst_stride = dst_l == NDHWC ? dst_strides[3] : dst_strides[4];
+
+    if (src_l != dst_l) {
+        for (int n = 0; n < N; n++) {
+            for (int c = 0; c < C; c++) {
+                for (int d = 0; d < D; d++) {
+                    for (int h = 0; h < H; h++) {
+                        for (int w = 0; w < W; w++) {
+                            dst_ptr[n * N_dst_stride +
+                                    c * C_dst_stride +
+                                    d * D_dst_stride +
+                                    h * H_dst_stride +
+                                    w * W_dst_stride]
+                                =
+                            src_ptr[n * N_src_stride +
+                                    c * C_src_stride +
+                                    d * D_src_stride +
+                                    h * H_src_stride +
+                                    w * W_src_stride];
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        for (int i = 0; i < N*C*D*H*W; i++) {
+            dst_ptr[i] = src_ptr[i];
+        }
+    }
+}
+
+static inline void blob_copy_5d(Blob::Ptr src, Blob::Ptr dst) {
+    switch (src->getTensorDesc().getPrecision()) {
+        case Precision::FP32:
+        case Precision::I32:
+            blob_copy_5d_t<Precision::FP32>(src, dst);
+            break;
+
+        case Precision::FP16:
+        case Precision::U16:
+        case Precision::I16:
+            blob_copy_5d_t<Precision::U16>(src, dst);
+            break;
+
+        case Precision::U8:
+        case Precision::I8:
+            blob_copy_5d_t<Precision::U8>(src, dst);
+            break;
+
+        default:
+            THROW_IE_EXCEPTION << "Unsupported blob transformation for precision " << src->getTensorDesc().getPrecision();
+    }
+}
+
 void blob_copy(Blob::Ptr src, Blob::Ptr dst) {
     if (src->buffer() == nullptr)
         THROW_IE_EXCEPTION << "Cannot copy blob data. Source is not allocated.";
@@ -174,8 +260,10 @@ void blob_copy(Blob::Ptr src, Blob::Ptr dst) {
 
     if (src->getTensorDesc().getDims().size() == 4)
         blob_copy_4d(src, dst);
+    else if (src->getTensorDesc().getDims().size() == 5)
+        blob_copy_5d(src, dst);
     else
-        THROW_IE_EXCEPTION << "Unimplemented blob transformation. Only 4d supported.";
+        THROW_IE_EXCEPTION << "Unimplemented blob transformation. Only 4d or 5d supported.";
 }
 
 }  // namespace InferenceEngine
index 99af91c..d7c0dca 100644 (file)
@@ -14,7 +14,7 @@
 using namespace InferenceEngine;
 
 Builder::Layer::Layer(const std::string& type, const std::string& name):
-        name(name), type(type), id((std::numeric_limits<idx_t>::max)()) {}
+    id((std::numeric_limits<idx_t>::max)()), type(type), name(name) {}
 
 Builder::Layer::Layer(const ILayer::CPtr& layer) {
     id = layer->getId();
index 9691ee3..d2b179c 100644 (file)
@@ -13,6 +13,7 @@
 #include "debug.h"
 #include "graph_tools.hpp"
 #include <vector>
+#include <math.h>
 #include "network_serializer.h"
 
 using namespace std;
@@ -248,12 +249,16 @@ StatusCode CNNNetworkImpl::setBatchSize(size_t size, ResponseDesc* responseDesc)
             return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) << "Cannot set batch for 1D/3D input";
         }
 
+        std::string constType = "Const";
         for (auto layer : _data) {
             SizeVector dims = layer.second->getDims();
             // Calculates original size for batch = 1
-            size_t diff = dims.at(0) / originalBatchSize;
-            dims.at(0) = size * diff;
-            layer.second->setDims(dims);
+            CNNLayerPtr layerT = layer.second->getCreatorLayer().lock();
+            if (!layerT || !equal(layerT->type, constType)) {
+                float diff = static_cast<float>(dims.at(0)) / static_cast<float>(originalBatchSize);
+                dims.at(0) = static_cast<size_t>(std::ceil(size * diff));
+                layer.second->setDims(dims);
+            }
         }
         return OK;
     } catch (const InferenceEngineException& e) {
index 5633cb2..7561cb8 100644 (file)
@@ -955,6 +955,11 @@ void CNNNetworkInt8Normalizer::QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr
 
             // debug scales. Need to compare with actual values in FP32 scoring
             target_layer->blobs["ext-scale"] = target_layer->blobs["o-scale"];
+        } else {
+            // we do not have statistics here, we cannot calculate requantizatin scales,
+            // next layer will be calculated in fp32
+            // it's time to return forcedly edge to fp32 as well
+            target_layer->outData[0]->setPrecision(Precision::FP32);
         }
 
         // Normalizing the weights
@@ -1619,9 +1624,7 @@ void precisionColoring(const CNNLayerPtr layer,
 }
 
 void CNNNetworkInt8Normalizer::NormalizeNetwork(ICNNNetwork& network, ICNNNetworkStats& netStats) {
-    IE_SUPPRESS_DEPRECATED_START
-    CNNNetwork cnnn(&network);
-    IE_SUPPRESS_DEPRECATED_END
+    CNNNetwork cnnn(ICNNNetwork::Ptr(&network, [](void *) {}));
 
     int maxSign = 0x7F;
     int maxUnsign = 0xFF;
index b6138ed..3cc1cc2 100644 (file)
@@ -13,6 +13,7 @@
 #include <map>
 #include <vector>
 #include <ie_parameter.hpp>
+#include <ie_api.h>
 
 namespace InferenceEngine {
 
@@ -70,7 +71,7 @@ public:
     virtual ~IInferencePluginAPI() = default;
 };
 
-class DeviceIDParser {
+class INFERENCE_ENGINE_API_CLASS(DeviceIDParser) {
     std::string deviceName;
     std::string deviceID;
 
@@ -81,6 +82,7 @@ public:
     std::string getDeviceName() const;
 
     static std::vector<std::string> getHeteroDevices(std::string fallbackDevice);
+    static std::vector<std::string> getMultiDevices(std::string devicesList);
 };
 
 }  // namespace InferenceEngine
index aba3c13..eb8fd8d 100644 (file)
@@ -42,6 +42,7 @@ public:
     Task();
 
     explicit Task(const std::function<void()> &function);
+    virtual ~Task() = default;
 
     /**
      * @brief Executes the task with catching all exceptions. It doesn't check that task is running
index 1608293..d5f2018 100644 (file)
@@ -21,6 +21,7 @@ public:
     typedef std::shared_ptr<TaskSynchronizer> Ptr;
 
     TaskSynchronizer() : _taskCount(0) {}
+    virtual ~TaskSynchronizer() = default;
 
     virtual void lock() {
         auto taskID = _addTaskToQueue();
index 18ed1c1..414f65d 100644 (file)
@@ -101,13 +101,13 @@ public:
     void waitAllAsyncTasks() {
         try {
             while (!_listAsyncTasks.empty()) {
-                _listAsyncTasks.remove_if([this](StagedTask::Ptr task) -> bool {
+                _listAsyncTasks.remove_if([](StagedTask::Ptr task) -> bool {
                     auto sts = task->getStatus();
                     return !task->isOnWait() && (Task::Status::TS_DONE == sts || Task::Status::TS_ERROR == sts ||
                                                  Task::Status::TS_INITIAL == sts);
                 });
                 auto findIter = std::find_if(_listAsyncTasks.begin(), _listAsyncTasks.end(),
-                                             [this](StagedTask::Ptr task) { return !task->isOnWait(); });
+                                             [](StagedTask::Ptr task) { return !task->isOnWait(); });
                 if (findIter != _listAsyncTasks.end()) {
                     try {
                         (*findIter)->wait(-1);
index 7d5a9fd..10f11e7 100644 (file)
@@ -3,6 +3,7 @@
 //
 
 #pragma once
+
 #include <string>
 #include <cpp_interfaces/interface/ie_imemory_state_internal.hpp>
 
@@ -16,7 +17,7 @@ class MemoryStateInternal : public IMemoryStateInternal {
     std::string name;
     Blob::Ptr state;
 
- public:
+public:
     explicit MemoryStateInternal(std::string name) : name(name) {
     }
     std::string GetName() const override {
index 387c19b..cebc688 100644 (file)
@@ -3,6 +3,9 @@
 //
 
 #pragma once
+
+#include <ie_blob.h>
+
 #include <string>
 #include <memory>
 
@@ -11,7 +14,7 @@ namespace InferenceEngine {
  * @brief minimal interface for memory state implementation
  */
 class IMemoryStateInternal {
- public:
+public:
     using Ptr = std::shared_ptr<IMemoryStateInternal>;
 
     virtual ~IMemoryStateInternal() = default;
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_internal_plugin_config.hpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_internal_plugin_config.hpp
new file mode 100644 (file)
index 0000000..8be8cae
--- /dev/null
@@ -0,0 +1,33 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header for properties that are passed from IE to plguins
+ *        or from one plugin to another
+ * @file ie_internal_plugin_config.hpp
+ */
+#pragma once
+
+#include <string>
+#include <tuple>
+#include <vector>
+
+namespace InferenceEngine {
+
+namespace InternalPluginConfigParams {
+
+/**
+* @brief shortcut for defining internal configuration keys
+*/
+#define IE_INTERNAL_CONFIG_KEY(name) InferenceEngine::InternalPluginConfigParams::_IE_INTERNAL_CONFIG_KEY(name)
+#define _IE_INTERNAL_CONFIG_KEY(name) KEY_##name
+#define DECLARE_IE_INTERNAL_CONFIG_KEY(name) static constexpr auto _IE_INTERNAL_CONFIG_KEY(name) = #name
+
+/**
+ * @brief This key should be used to mark input executable subnetworks
+ */
+DECLARE_IE_INTERNAL_CONFIG_KEY(SUBNETWORK_WITH_NETWORK_INPUTS);
+
+}  // namespace InternalPluginConfigParams
+}  // namespace InferenceEngine
index 2e9200d..6f90705 100644 (file)
@@ -84,7 +84,9 @@ inline std::ostream & operator << (std::ostream &out, const std::vector<T> &vec)
  * @param s - string to trim
  */
 inline void ltrim(std::string &s) {
-    s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
+    s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c){
+        return !std::isspace(c);
+    }));
 }
 
 /**
@@ -92,7 +94,9 @@ inline void ltrim(std::string &s) {
  * @param s - string to trim
  */
 inline void rtrim(std::string &s) {
-    s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
+    s.erase(std::find_if(s.rbegin(), s.rend(), [](int c) {
+        return !std::isspace(c);
+    }).base(), s.end());
 }
 
 /**
index eab19b7..ab7dbc1 100644 (file)
@@ -702,9 +702,6 @@ inline CNNNetPtr CNNNetCopy(const ICNNNetwork &input, const Copier &cp) {
     auto net = std::make_shared<details::CNNNetworkImpl>();
 
     // setting base args
-    IE_SUPPRESS_DEPRECATED_START
-    net->setTargetDevice(input.getTargetDevice());
-    IE_SUPPRESS_DEPRECATED_END
     net->setPrecision(input.getPrecision());
 
     char name[1024];
index 609a133..3c8e540 100644 (file)
@@ -24,6 +24,7 @@ namespace InferenceEngine {
 class INFERENCE_ENGINE_API_CLASS(ConstTransformer) {
 public:
     explicit ConstTransformer(details::CNNNetworkImpl* _network);
+    virtual ~ConstTransformer() = default;
 
     /**
      * @brief calculates const layers, combines const subgraph into a single const layers
diff --git a/inference-engine/src/inference_engine/hetero/hetero_plugin.cpp b/inference-engine/src/inference_engine/hetero/hetero_plugin.cpp
deleted file mode 100644 (file)
index c8528f8..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "ie_metric_helpers.hpp"
-#include "hetero_plugin.hpp"
-#include <memory>
-#include <vector>
-#include <map>
-#include <string>
-#include "ie_plugin_config.hpp"
-#include "hetero/hetero_plugin_config.hpp"
-#include <cpp_interfaces/base/ie_plugin_base.hpp>
-#include "hetero_plugin_base.hpp"
-#include "inference_engine.hpp"
-#include "hetero_executable_network.hpp"
-#include "fallback_policy.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::PluginConfigParams;
-using namespace InferenceEngine::HeteroConfigParams;
-using namespace HeteroPlugin;
-using namespace std;
-
-IE_SUPPRESS_DEPRECATED_START
-
-IHeteroInferencePlugin::~IHeteroInferencePlugin() {
-}
-
-IE_SUPPRESS_DEPRECATED_START
-
-static Version heteroPluginDescription = {
-        {2, 0},  // plugin API version
-        CI_BUILD_NUMBER,
-        "heteroPlugin"  // plugin description message
-};
-
-void Engine::GetVersion(const Version *&versionInfo)noexcept {
-    versionInfo = &heteroPluginDescription;
-}
-
-Engine::Engine() {
-    _pluginName = "HETERO";
-    _config[InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = "YES";
-    _config[KEY_HETERO_DUMP_GRAPH_DOT] = NO;
-}
-
-InferenceEngine::ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const ICore * core, InferenceEngine::ICNNNetwork &network,
-                                                                           const std::map<std::string, std::string> &config) {
-    // TODO(amalyshe) do we need here verification of input precisions?
-    std::map<std::string, std::string> tconfig;
-    tconfig = config;
-
-    // we must not override the parameter, but need to copy everything from plugin config
-    for (auto && c : _config) {
-        if (tconfig.find(c.first) == tconfig.end()) {
-            tconfig[c.first] = c.second;
-        }
-    }
-
-    return std::make_shared<HeteroExecutableNetwork>(network, core, tconfig, _extensions, _deviceLoaders, error_listener);
-}
-
-void Engine::SetConfig(const std::map<std::string, std::string> &config) {
-    if (_config.find("TARGET_FALLBACK") == _config.end()) {
-        _config["TARGET_FALLBACK"] = "";
-    }
-
-    for (auto &&i : config) {
-        _config[i.first] = i.second;
-    }
-}
-
-IE_SUPPRESS_DEPRECATED_START
-void Engine::SetDeviceLoader(const std::string &device,
-                             IHeteroDeviceLoader::Ptr pLoader) {
-    _deviceLoaders[device] = pLoader;
-}
-IE_SUPPRESS_DEPRECATED_END
-
-void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
-    _extensions.push_back(extension);
-}
-
-void Engine::SetAffinity(InferenceEngine::ICNNNetwork &network,
-                         const std::map<std::string, std::string> &config) {
-    FallbackPolicy fbPolicy(_deviceLoaders, _config[KEY_HETERO_DUMP_GRAPH_DOT] == YES, GetCore());
-    fbPolicy.init(_config["TARGET_FALLBACK"], config, _extensions);
-    fbPolicy.setAffinity(fbPolicy.getAffinities(config, network), network);
-}
-
-void Engine::SetLogCallback(IErrorListener &listener) {
-    error_listener = &listener;
-
-    IE_SUPPRESS_DEPRECATED_START
-    for (auto& device_loader : _deviceLoaders)
-        device_loader.second->SetLogCallback(*error_listener);
-    IE_SUPPRESS_DEPRECATED_END
-}
-
-void Engine::QueryNetwork(const ICNNNetwork &network, const std::map<std::string, std::string>& config, QueryNetworkResult &res) const {
-    auto _deviceLoaders_ = _deviceLoaders;
-
-    auto it = _config.find(KEY_HETERO_DUMP_GRAPH_DOT);
-    IE_ASSERT(it !=  _config.end());
-    FallbackPolicy fbPolicy(_deviceLoaders_, it->second == YES, GetCore());
-    it = _config.find("TARGET_FALLBACK");
-    if (it == _config.end()) {
-        it = config.find("TARGET_FALLBACK");
-
-        if (it == config.end()) {
-            THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
-        }
-    }
-    fbPolicy.init(it->second, config, _extensions);
-    res = fbPolicy.getAffinities(config, network);
-}
-
-Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter> & options) const {
-    if (METRIC_KEY(SUPPORTED_METRICS) == name) {
-        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, std::vector<std::string>{
-            METRIC_KEY(SUPPORTED_METRICS),
-            METRIC_KEY(SUPPORTED_CONFIG_KEYS)});
-    } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
-        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, std::vector<std::string>{
-            HETERO_CONFIG_KEY(DUMP_GRAPH_DOT),
-            "TARGET_FALLBACK",
-            CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)});
-    } else {
-        THROW_IE_EXCEPTION << "Unsupported Plugin metric: " << name;
-    }
-}
-
-Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter> & options) const {
-    if (name == HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)) {
-        auto it = _config.find(KEY_HETERO_DUMP_GRAPH_DOT);
-        IE_ASSERT(it != _config.end());
-        bool dump = it->second == YES;
-        return { dump };
-    } else {
-        THROW_IE_EXCEPTION << "Unsupported config key: " << name;
-    }
-}
-
-namespace HeteroPlugin {
-
-InferenceEngine::StatusCode CreateHeteroPluginEngine(
-        InferenceEngine::IInferencePlugin *&plugin,
-        InferenceEngine::ResponseDesc *resp) noexcept {
-    try {
-        plugin = new HeteroPluginBase<Engine>(
-                {{2, 0}, "heteroPlugin", "heteroPlugin"},
-                std::make_shared<Engine>());
-        return OK;
-    }
-    catch (std::exception &ex) {
-        return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
-    }
-}
-
-}  // namespace HeteroPlugin
index 6283695..9fbc310 100644 (file)
@@ -72,6 +72,7 @@ static InferenceEngine::Builder::ConverterRegister _reg_converter_##__type(#__ty
 
 class INodeConverter {
 public:
+    virtual ~INodeConverter() = default;
     virtual CNNLayer::Ptr createLayer(const std::shared_ptr<ngraph::Node>& layer, const Precision &precision) const = 0;
     virtual bool canCreate(const std::shared_ptr<ngraph::Node>& node) const = 0;
 
@@ -97,6 +98,7 @@ public:
 class BaseConverter {
 public:
     explicit BaseConverter(const std::string& type): type(type) {}
+    virtual ~BaseConverter() = default;
 
     virtual CNNLayer::Ptr createLayer(const std::shared_ptr<const ILayer>& layer, Precision precision) = 0;
     virtual bool canCreate(const std::string& nodeType) const = 0;
index 83351a1..8a02da4 100644 (file)
@@ -134,7 +134,7 @@ StatusCode CNNNetReaderImpl::ReadNetwork(pugi::xml_document& xmlDoc) {
 
         _version = GetFileVersion(root);
         if (_version < 2) THROW_IE_EXCEPTION << "deprecated IR version: " << _version;
-        if (_version > 6) THROW_IE_EXCEPTION << "cannot parse future versions: " << _version;
+        if (_version > 7) THROW_IE_EXCEPTION << "cannot parse future versions: " << _version;
         _parser = parserCreator->create(_version);
         network = _parser->Parse(root);
         name = network->getName();
index 7c01e37..94c882d 100644 (file)
@@ -25,6 +25,7 @@ namespace details {
 struct FormatParserCreator {
     using Ptr = std::shared_ptr<FormatParserCreator>;
     virtual std::shared_ptr<IFormatParser> create(size_t version) = 0;
+    virtual ~FormatParserCreator() = default;
 };
 
 struct V2FormatParserCreator : public FormatParserCreator {
index 3146ec7..06dce03 100644 (file)
@@ -8,12 +8,11 @@
 #include "details/ie_exception_conversion.hpp"
 #include "cpp_interfaces/base/ie_plugin_base.hpp"
 #include "details/ie_so_pointer.hpp"
+#include "multi-device/multi_device_config.hpp"
 
-#include "hetero/hetero_plugin.hpp"
 #include "ie_util_internal.hpp"
 #include "file_utils.h"
 #include "ie_icore.hpp"
-#include "cpp_interfaces/ie_itask_executor.hpp"
 
 #include <fstream>
 #include <sstream>
@@ -84,25 +83,31 @@ std::vector<std::string> DeviceIDParser::getHeteroDevices(std::string fallbackDe
     return deviceNames;
 }
 
-class Core::Impl : public ICore {
-    void RegisterHeteroPlugin() {
-        IInferencePlugin * plugin = nullptr;
-        ResponseDesc resp;
-        HeteroPlugin::CreateHeteroPluginEngine(plugin, &resp);
+std::vector<std::string> DeviceIDParser::getMultiDevices(std::string devicesList) {
+    std::vector<std::string> deviceNames;
+    auto trim_request_info = [] (std::string device_with_requests){
+        auto opening_bracket = device_with_requests.find_first_of('(');
+        return device_with_requests.substr(0, opening_bracket);
+    };
+    std::string device;
+    char delimiter = ',';
+    size_t pos = 0;
+    // in addition to the list of devices, every device can have a #requests in the brackets e.g. "CPU(100)"
+    // we skip the #requests info here
+    while ((pos = devicesList.find(delimiter)) != std::string::npos) {
+        auto d = devicesList.substr(0, pos);
+        deviceNames.push_back(trim_request_info(d));
+        devicesList.erase(0, pos + 1);
+    }
 
-        IInferencePluginAPI * iplugin_api_ptr = getInferencePluginAPIInterface(plugin);
-        IE_ASSERT(iplugin_api_ptr != nullptr);
+    if (!devicesList.empty())
+        deviceNames.push_back(trim_request_info(devicesList));
 
-        // set reference to ICore interface
-        iplugin_api_ptr->SetCore(this);
+    return deviceNames;
+}
 
-        std::string name = iplugin_api_ptr->GetName();
-        plugins[name] = InferencePlugin(InferenceEnginePluginPtr(plugin));
-
-        // put info about HETERO plugin to registry as well
-        pluginRegistry[name] = { "", { }, { } };
-    }
 
+class Core::Impl : public ICore {
     ITaskExecutor::Ptr          _taskExecutor = nullptr;
     mutable std::map<std::string, InferencePlugin, details::CaselessLess<std::string> > plugins;
 
@@ -115,13 +120,6 @@ class Core::Impl : public ICore {
     IErrorListener * listener = nullptr;
 
 public:
-    /**
-     * @brief Constructs Impl with HETERO plugin only
-     */
-    Impl() {
-        RegisterHeteroPlugin();
-    }
-
     ~Impl() override;
 
     /**
@@ -385,6 +383,9 @@ std::map<std::string, Version> Core::GetVersions(const std::string & deviceName)
         if (deviceName.find("HETERO:") == 0) {
             deviceNames = DeviceIDParser::getHeteroDevices(deviceName.substr(7));
             deviceNames.push_back("HETERO");
+        } else  if (deviceName.find("MULTI") == 0) {
+            deviceNames.push_back("MULTI");
+            deviceNames = DeviceIDParser::getMultiDevices(deviceName.substr(6));
         } else {
             deviceNames.push_back(deviceName);
         }
@@ -413,6 +414,9 @@ ExecutableNetwork Core::LoadNetwork(CNNNetwork network, const std::string & devi
     if (deviceName_.find("HETERO:") == 0) {
         deviceName_ = "HETERO";
         config_["TARGET_FALLBACK"] = deviceName.substr(7);
+    } else if (deviceName_.find("MULTI:") == 0) {
+        deviceName_ = "MULTI";
+        config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
     } else {
         DeviceIDParser parser(deviceName_);
         deviceName_ = parser.getDeviceName();
@@ -430,6 +434,9 @@ void Core::AddExtension(IExtensionPtr extension, const std::string & deviceName_
     if (deviceName_.find("HETERO") == 0) {
         THROW_IE_EXCEPTION << "HETERO device does not support extensions. Please, set extensions directly to fallback devices";
     }
+    if (deviceName_.find("MULTI") == 0) {
+        THROW_IE_EXCEPTION << "MULTI device does not support extensions. Please, set extensions directly to fallback devices";
+    }
 
     DeviceIDParser parser(deviceName_);
     std::string deviceName = parser.getDeviceName();
@@ -442,6 +449,9 @@ ExecutableNetwork Core::ImportNetwork(const std::string &modelFileName, const st
     if (deviceName_.find("HETERO") == 0) {
         THROW_IE_EXCEPTION << "HETERO device does not support ImportNetwork";
     }
+    if (deviceName_.find("MULTI") == 0) {
+        THROW_IE_EXCEPTION << "MULTI device does not support ImportNetwork";
+    }
 
     DeviceIDParser parser(deviceName_);
     std::string deviceName = parser.getDeviceName();
@@ -462,6 +472,9 @@ QueryNetworkResult Core::QueryNetwork(const ICNNNetwork &network, const std::str
     auto config_ = config;
     std::string deviceName_ = deviceName;
 
+    if (deviceName_.find("MULTI") == 0) {
+        THROW_IE_EXCEPTION << "MULTI device does not support QueryNetwork";
+    }
 
     if (deviceName_.find("HETERO:") == 0) {
         deviceName_ = "HETERO";
@@ -494,6 +507,18 @@ void Core::SetConfig(const std::map<std::string, std::string> & config_, const s
         }
     }
 
+    // MULTI case
+    {
+        if (deviceName_.find("MULTI:") == 0) {
+            THROW_IE_EXCEPTION << "SetConfig is supported only for MULTI itself (without devices). "
+                                  "You can configure the devices with SetConfig before creating the MULTI on top.";
+        }
+
+        if (config_.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES) != config_.end()) {
+            THROW_IE_EXCEPTION << "Please, specify DEVICE_PRIORITIES to the LoadNetwork directly, "
+                                  "as you will need to pass the same DEVICE_PRIORITIES anyway.";
+        }
+    }
 
     if (deviceName_.empty()) {
         _impl->SetConfigForPlugins(config_, std::string());
@@ -521,6 +546,13 @@ Parameter Core::GetConfig(const std::string & deviceName_, const std::string & n
                                   "GetConfig is also possible for the individual devices before creating the HETERO on top.";
         }
     }
+    // MULTI case
+    {
+        if (deviceName_.find("MULTI:") == 0) {
+            THROW_IE_EXCEPTION << "You can only GetConfig of the MULTI itself (without devices). "
+                                  "GetConfig is also possible for the individual devices before creating the MULTI on top.";
+        }
+    }
 
     DeviceIDParser device(deviceName_);
     std::string deviceName = device.getDeviceName();
@@ -550,6 +582,14 @@ Parameter Core::GetMetric(const std::string & deviceName_, const std::string & n
         }
     }
 
+    // MULTI case
+    {
+        if (deviceName_.find("MULTI:") == 0) {
+            THROW_IE_EXCEPTION
+                    << "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
+                       "To get individual devices's metrics call GetMetric for each device separately";
+        }
+    }
 
     DeviceIDParser device(deviceName_);
     std::string deviceName = device.getDeviceName();
@@ -612,10 +652,6 @@ void Core::RegisterPlugins(const std::string & xmlConfigFile) {
 }
 
 void Core::UnregisterPlugin(const std::string & deviceName_) {
-    if (deviceName_.find("HETERO") == 0) {
-        THROW_IE_EXCEPTION << "HETERO device cannot be unregistered from Inference Engine";
-    }
-
     DeviceIDParser parser(deviceName_);
     std::string deviceName = parser.getDeviceName();
 
index 9c4c319..678b0ba 100644 (file)
@@ -73,7 +73,7 @@ const Precision& Data::getPrecision() const {
 }
 
 const TensorDesc& Data::getTensorDesc() const {
-    if ((tensorDesc.getDims().size() == 0 && tensorDesc.getDims() != dims) ||
+    if ((tensorDesc.getDims().size() == 0 && tensorDesc.getDims() != dims && dims[0] != 1) ||
             (tensorDesc.getLayout() == Layout::ANY && layout != Layout::ANY) ||
             (!tensorDesc.getPrecision() && precision)) {
         THROW_IE_EXCEPTION << "Tensor descriptor is empty!";
index fb06cf3..e078e0c 100644 (file)
@@ -33,11 +33,17 @@ FindPluginResponse InferenceEngine::findPlugin(const FindPluginRequest& req) {
             pluginVec.push_back("myriadPlugin");
 #endif
             break;
+        case TargetDevice::eHDDL:
+            pluginVec.push_back("HDDLPlugin");
+            break;
         case TargetDevice::eGNA:
 #ifdef ENABLE_GNA
             pluginVec.push_back("GNAPlugin");
 #endif
             break;
+        case TargetDevice::eMULTI:
+            pluginVec.push_back("MultiDevicePlugin");
+            break;
         case TargetDevice::eHETERO:
             pluginVec.push_back("HeteroPlugin");
             break;
index 62d9924..51db65c 100644 (file)
@@ -8,7 +8,6 @@
 #include "ie_layer_parsers.h"
 #include "xml_parse_utils.h"
 #include "ie_blob_proxy.hpp"
-#include "range_iterator.hpp"
 #include <fstream>
 #include <sstream>
 #include "ie_icnn_network_stats.hpp"
@@ -82,6 +81,8 @@ void FormatParser::ParseGenericParams(pugi::xml_node& node, LayerParseParameters
             LayerParseParameters::LayerPortData port;
             port.precision = prms.precision;
             ParsePort(port, _cn);
+            if (prms.type == "Const")
+                prms.precision = port.precision;
             layerParsePrms.addOutputPort(port);
         }
     }
@@ -192,6 +193,7 @@ FormatParser::FormatParser(size_t version): _version(version) {
         std::make_shared<LayerCreator<ShuffleChannelsLayer>>("ShuffleChannels"),
         std::make_shared<LayerCreator<DepthToSpaceLayer>>("DepthToSpace"),
         std::make_shared<LayerCreator<SpaceToDepthLayer>>("SpaceToDepth"),
+        std::make_shared<LayerCreator<SparseFillEmptyRowsLayer>>("SparseFillEmptyRows"),
         std::make_shared<LayerCreator<ReverseSequenceLayer>>("ReverseSequence"),
         std::make_shared<LayerCreator<CNNLayer>>("Squeeze"),
         std::make_shared<LayerCreator<CNNLayer>>("Unsqueeze"),
@@ -251,7 +253,10 @@ FormatParser::FormatParser(size_t version): _version(version) {
         std::make_shared<LayerCreator<ReduceLayer>>("ReduceSum"),
         std::make_shared<LayerCreator<ReduceLayer>>("ReduceSumSquare"),
         std::make_shared<LayerCreator<CNNLayer>>("GatherTree"),
-        std::make_shared<LayerCreator<TopKLayer>>("TopK")
+        std::make_shared<LayerCreator<TopKLayer>>("TopK"),
+        std::make_shared<LayerCreator<UniqueLayer>>("Unique"),
+        std::make_shared<LayerCreator<NonMaxSuppressionLayer>>("NonMaxSuppression"),
+        std::make_shared<LayerCreator<ScatterLayer>>("ScatterUpdate")
     };
     creators.emplace_back(_version < 6 ? std::make_shared<LayerCreator<QuantizeLayer>>("Quantize") :
             std::make_shared<LayerCreator<QuantizeLayer>>("FakeQuantize"));
index a6fcf44..67b6b07 100644 (file)
@@ -10,7 +10,8 @@
 
 #include <memory>
 #include <string>
-#include <ie_plugin_ptr.hpp>
+#include "ie_plugin_ptr.hpp"
+#include "cpp_interfaces/ie_itask_executor.hpp"
 
 namespace InferenceEngine {
 
index 3fc177b..0c77a67 100644 (file)
@@ -27,7 +27,7 @@ namespace InferenceEngine {
 class IParser {
 public:
     using Ptr = std::shared_ptr<IParser>;
-
+    virtual ~IParser() = default;
     virtual std::shared_ptr<ngraph::Function> parse(const pugi::xml_node &root, const Blob::CPtr& weights) = 0;
 };
 
@@ -35,6 +35,7 @@ class IRParser {
 public:
     explicit IRParser(size_t version);
     std::shared_ptr<ngraph::Function> parse(const pugi::xml_node &root, const Blob::CPtr& weights);
+    virtual ~IRParser() = default;
 
 private:
     IParser::Ptr parser;
index 2867cd1..e4ef8a6 100644 (file)
@@ -107,10 +107,11 @@ using WBlob = TBlob<uint8_t>::Ptr;
 
 class BodyParser {
 public:
-    BodyParser(pugi::xml_node &net_node, size_t ir_version) :
-        body(net_node), parser(FormatParser(ir_version)) {}
+    BodyParser(pugi::xml_node &net_node, size_t ir_version, Precision prec) :
+            body(net_node), parser(FormatParser(ir_version)), default_precision(prec) {}
 
     void parse(PortSet in_request, PortSet out_request) {
+        body.append_attribute("precision").set_value(default_precision.name());
         auto net = parser.Parse(body);
 
         for (const auto &pi : in_request)
@@ -148,6 +149,7 @@ public:
 private:
     pugi::xml_node &body;
     FormatParser parser;
+    Precision default_precision;
 
     PortMap inputs;
     PortMap outputs;
@@ -163,7 +165,7 @@ CNNLayer::Ptr TILayerCreator::CreateLayer(pugi::xml_node& node, LayerParseParame
     auto all_inputs = allRequiredInputs(node);
     auto all_outputs = allRequiredOutputs(node);
 
-    auto parser = std::make_shared<BodyParser>(body, layerParsePrms.underIRVersion);
+    auto parser = std::make_shared<BodyParser>(body, layerParsePrms.underIRVersion, layerParsePrms.prms.precision);
     parser->parse(all_inputs, all_outputs);
 
     auto ins = parser->getInsMap();
index f5d6e10..85d7454 100644 (file)
@@ -8,7 +8,6 @@
 #include <memory>
 #include "ie_format_parser.h"
 #include "xml_parse_utils.h"
-#include "range_iterator.hpp"
 #include "details/caseless.hpp"
 #include <vector>
 #include <string>
index b7f5ff5..f37889f 100644 (file)
@@ -189,10 +189,6 @@ LayerValidator::Ptr LayerValidators::getValidator(const std::string& type) {
     return _validators[type];
 }
 
-void LayerValidators::addImpl(const std::string& type, const LayerValidator::Ptr& validator) {
-    _validators[type] = validator;
-}
-
 LayerValidators* LayerValidators::_instance = nullptr;
 
 GeneralValidator::GeneralValidator(const std::string& _type) : LayerValidator(_type) {}
@@ -224,7 +220,7 @@ void FullyConnectedValidator::checkCorrespondence(const CNNLayer* layer,
 FullyConnectedValidator::FullyConnectedValidator(const std::string& _type) : LayerValidator(_type) {}
 
 void FullyConnectedValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
-    checkNumOfInput(inShapes, {1});
+    checkNumOfInput(inShapes, {1, 2, 3});
 }
 
 void CropValidator::parseParams(CNNLayer* layer) {
@@ -437,7 +433,7 @@ void ConvolutionValidator::checkCorrespondence(const CNNLayer* layer,
 }
 
 void ConvolutionValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
-    checkNumOfInput(inShapes, {1});
+    checkNumOfInput(inShapes, {1, 2, 3});
 }
 
 void DeconvolutionValidator::parseParams(CNNLayer* layer) {
@@ -498,7 +494,7 @@ void DeconvolutionValidator::checkCorrespondence(const CNNLayer* layer,
 }
 
 void DeconvolutionValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
-    checkNumOfInput(inShapes, {1});
+    checkNumOfInput(inShapes, {1, 2, 3});
 }
 
 void DeformableConvolutionValidator::parseParams(CNNLayer* layer) {
@@ -543,7 +539,7 @@ void DeformableConvolutionValidator::checkCorrespondence(const CNNLayer* layer,
 }
 
 void DeformableConvolutionValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
-    checkNumOfInput(inShapes, {2});
+    checkNumOfInput(inShapes, {2, 3, 4});
 }
 
 PoolingValidator::PoolingValidator(const std::string& _type) : LayerValidator(_type) {}
@@ -858,8 +854,6 @@ void EltwiseValidator::parseParams(CNNLayer* layer) {
         casted->_operation = EltwiseLayer::Pow;
     } else if (op == "mean") {
         casted->_operation = EltwiseLayer::Mean;
-    } else if (op == "select") {
-        casted->_operation = EltwiseLayer::Select;
     } else {
         THROW_IE_EXCEPTION << "Unsupported element wise operation: " << op;
     }
@@ -1447,6 +1441,52 @@ void SpaceToDepthValidator::checkShapes(const CNNLayer* layer, const vector<Size
 }
 
 
+SparseFillEmptyRowsValidator::SparseFillEmptyRowsValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void SparseFillEmptyRowsValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<SparseFillEmptyRowsLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of SparseFillEmptyRows class";
+    }
+}
+
+void SparseFillEmptyRowsValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void SparseFillEmptyRowsValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const SparseFillEmptyRowsLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of SparseFillEmptyRows class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 4)
+        THROW_IE_EXCEPTION << layer->name << " SparseFillEmptyRows must have 4 inputs, but actually it has: " << numInputs;
+
+    // Check dimensions of a tensor with input indices
+    if (inShapes[0].size() != 2)
+        THROW_IE_EXCEPTION << layer->name << " Input indices of SparseFillEmptyRows must be 2-D tensor";
+    if (inShapes[0][1] != 2)
+        THROW_IE_EXCEPTION << layer->name << " Input indices must be two-dimensional";
+
+    // Check dimensions of a tensor with input values
+    if (inShapes[1].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Input values of SparseFillEmptyRows must be 1-D tensor";
+    if (inShapes[1][0] != inShapes[0][0])
+        THROW_IE_EXCEPTION << layer->name << " Number of input indices and values must match";
+
+    // Check dimensions of a tensor with a dense shape
+    if (inShapes[2].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Dense shape of SparseFillEmptyRows must be 1-D tensor";
+    // TODO: check that dense shape value is set
+
+    // Check dimensions of a tensor with default value
+    if (inShapes[3].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Default value of SparseFillEmptyRows must be 1-D tensor";
+}
+
+
 ReverseSequenceValidator::ReverseSequenceValidator(const std::string& _type) : LayerValidator(_type) {}
 
 void ReverseSequenceValidator::parseParams(CNNLayer* layer) {
@@ -1665,59 +1705,31 @@ static RNNSequenceLayer::Direction direction_from(string direction_name) {
            RNNSequenceLayer::FWD;
 }
 
-template<>
-std::vector<std::string>
-RNNBaseValidator<RNNSequenceLayer::LSTM>::def_acts = {"sigmoid", "tanh", "tanh"};
-template<>
-std::vector<float>
-RNNBaseValidator<RNNSequenceLayer::LSTM>::def_alpha = {0, 0, 0};
-template<>
-std::vector<float>
-RNNBaseValidator<RNNSequenceLayer::LSTM>::def_beta = {0, 0, 0};
-template<>
-size_t
-RNNBaseValidator<RNNSequenceLayer::LSTM>::G = 4;
-template<>
-size_t
-RNNBaseValidator<RNNSequenceLayer::LSTM>::NS = 2;
-
-template<>
-std::vector<std::string>
-RNNBaseValidator<RNNSequenceLayer::GRU>::def_acts = {"sigmoid", "tanh"};
-template<>
-std::vector<float>
-RNNBaseValidator<RNNSequenceLayer::GRU>::def_alpha = {0, 0};
-template<>
-std::vector<float>
-RNNBaseValidator<RNNSequenceLayer::GRU>::def_beta = {0, 0};
-template<>
-size_t
-RNNBaseValidator<RNNSequenceLayer::GRU>::G = 3;
-template<>
-size_t
-RNNBaseValidator<RNNSequenceLayer::GRU>::NS = 1;
-
-template<>
-std::vector<std::string>
-RNNBaseValidator<RNNSequenceLayer::RNN>::def_acts = {"tanh"};
-template<>
-std::vector<float>
-RNNBaseValidator<RNNSequenceLayer::RNN>::def_alpha = {0};
-template<>
-std::vector<float>
-RNNBaseValidator<RNNSequenceLayer::RNN>::def_beta = {0};
-template<>
-size_t
-RNNBaseValidator<RNNSequenceLayer::RNN>::G = 1;
-template<>
-size_t
-RNNBaseValidator<RNNSequenceLayer::RNN>::NS = 1;
-
-template<RNNSequenceLayer::CellType CELL>
-RNNBaseValidator<CELL>::RNNBaseValidator(const std::string& _type) : LayerValidator(_type) {}
+RNNBaseValidator::RNNBaseValidator(const std::string& _type, RNNSequenceLayer::CellType CELL) : LayerValidator(_type) {
+    if (RNNSequenceLayer::LSTM == CELL) {
+        def_acts = {"sigmoid", "tanh", "tanh"};
+        def_alpha = {0, 0, 0};
+        def_beta = {0, 0, 0};
+        G = 4;
+        NS = 2;
+    } else if (RNNSequenceLayer::GRU == CELL) {
+        def_acts = {"sigmoid", "tanh"};
+        def_alpha = {0, 0};
+        def_beta = {0, 0};
+        G = 3;
+        NS = 1;
+    } else if (RNNSequenceLayer::RNN == CELL) {
+        def_acts = {"tanh"};
+        def_alpha = {0};
+        def_beta = {0};
+        G = 1;
+        NS = 1;
+    } else {
+        IE_ASSERT(false);
+    }
+}
 
-template<RNNSequenceLayer::CellType CELL>
-void RNNBaseValidator<CELL>::parseParams(CNNLayer* layer) {
+void RNNBaseValidator::parseParams(CNNLayer* layer) {
     auto rnn = dynamic_cast<RNNCellBase*>(layer);
     if (!rnn)
         THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
@@ -1735,8 +1747,7 @@ void RNNBaseValidator<CELL>::parseParams(CNNLayer* layer) {
     }
 }
 
-template<RNNSequenceLayer::CellType CELL>
-void RNNBaseValidator<CELL>::checkParams(const InferenceEngine::CNNLayer *layer) {
+void RNNBaseValidator::checkParams(const InferenceEngine::CNNLayer *layer) {
     auto rnn = dynamic_cast<const RNNCellBase*>(layer);
     if (!rnn)
         THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
@@ -1761,8 +1772,7 @@ void RNNBaseValidator<CELL>::checkParams(const InferenceEngine::CNNLayer *layer)
                            << "but provided " << rnn->activation_beta.size();
 }
 
-template<RNNSequenceLayer::CellType CELL>
-void RNNBaseValidator<CELL>::checkCorrespondence(const CNNLayer* layer,
+void RNNBaseValidator::checkCorrespondence(const CNNLayer* layer,
          const map<string, Blob::Ptr>& blobs,
          const vector<SizeVector>& inShapes) const {
     auto rnn = dynamic_cast<const RNNCellBase*>(layer);
@@ -1799,11 +1809,11 @@ void RNNBaseValidator<CELL>::checkCorrespondence(const CNNLayer* layer,
 }
 
 template<RNNSequenceLayer::CellType CELL>
-RNNSequenceValidator<CELL>::RNNSequenceValidator(const std::string& _type) : RNNBaseValidator<CELL>(_type) {}
+RNNSequenceValidator<CELL>::RNNSequenceValidator(const std::string& _type) : RNNBaseValidator(_type, CELL) {}
 
 template<RNNSequenceLayer::CellType CELL>
 void RNNSequenceValidator<CELL>::parseParams(CNNLayer* layer) {
-    RNNBaseValidator<CELL>::parseParams(layer);
+    RNNBaseValidator::parseParams(layer);
 
     auto casted = dynamic_cast<RNNSequenceLayer*>(layer);
     if (!casted)
@@ -1817,7 +1827,7 @@ void RNNSequenceValidator<CELL>::parseParams(CNNLayer* layer) {
 
 template<RNNSequenceLayer::CellType CELL>
 void RNNSequenceValidator<CELL>::checkParams(const InferenceEngine::CNNLayer *layer) {
-    RNNBaseValidator<CELL>::checkParams(layer);
+    RNNBaseValidator::checkParams(layer);
 
     auto casted = dynamic_cast<const RNNSequenceLayer*>(layer);
     if (!casted)
@@ -1873,7 +1883,7 @@ template class details::RNNSequenceValidator<RNNSequenceLayer::GRU>;
 template class details::RNNSequenceValidator<RNNSequenceLayer::LSTM>;
 
 template<RNNSequenceLayer::CellType CELL>
-RNNCellValidator<CELL>::RNNCellValidator(const std::string& _type) : RNNBaseValidator<CELL>(_type) {}
+RNNCellValidator<CELL>::RNNCellValidator(const std::string& _type) : RNNBaseValidator(_type, CELL) {}
 
 template<RNNSequenceLayer::CellType CELL>
 void RNNCellValidator<CELL>::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
@@ -2739,4 +2749,242 @@ void TopKValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>&
         THROW_IE_EXCEPTION << layer->name << " TopK can take only 2 inputs, but actually it has: " << numInputs;
 }
 
+
+UniqueValidator::UniqueValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void UniqueValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<UniqueLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Unique class";
+    }
+
+    casted->sorted = layer->GetParamAsBool("sorted");
+    casted->return_inverse = layer->GetParamAsBool("return_inverse");
+    casted->return_counts = layer->GetParamAsBool("return_counts");
+}
+
+void UniqueValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    size_t numInputs = inShapes.size();
+    if (numInputs != 1)
+        THROW_IE_EXCEPTION << layer->name << " Unique can take only 1 input, but actually it has: " << numInputs;
+}
+
+
+NMSValidator::NMSValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void NMSValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<NonMaxSuppressionLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of NonMaxSuppression class";
+    }
+
+    casted->center_point_box = layer->GetParamAsBool("center_point_box", false);
+}
+
+void NMSValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void NMSValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    size_t numInputs = inShapes.size();
+    if (numInputs < 2 || numInputs > 5)
+        THROW_IE_EXCEPTION << layer->name << " NonMaxSuppression can take 2 - 5 inputs, but actually it has: " << numInputs;
+
+    if (inShapes[0].size() != 3 || inShapes[0][2] != 4)
+        THROW_IE_EXCEPTION << layer->name << " 'boxes' should be with shape [num_batches, spatial_dimension, 4]";
+
+    if (inShapes[1].size() != 3)
+        THROW_IE_EXCEPTION << layer->name << " 'scores' should be with shape [num_batches, num_classes, spatial_dimension]";
+
+    if (inShapes[0][0] != inShapes[1][0])
+        THROW_IE_EXCEPTION << layer->name << " num_batches is different in 'boxes' and 'scores' tensors";
+
+    if (inShapes[0][1] != inShapes[1][2])
+        THROW_IE_EXCEPTION << layer->name << " spatial_dimension is different in 'boxes' and 'scores' tensors";
+
+    if (numInputs > 2 && !(inShapes[2].size() == 1 && inShapes[2][0] == 1))
+        THROW_IE_EXCEPTION << layer->name << " 'max_output_boxes_per_class' should be scalar";
+
+    if (numInputs > 3 && !(inShapes[3].size() == 1 && inShapes[3][0] == 1))
+        THROW_IE_EXCEPTION << layer->name << " 'iou_threshold' should be scalar";
+
+    if (numInputs > 4 && !(inShapes[4].size() == 1 && inShapes[4][0] == 1))
+        THROW_IE_EXCEPTION << layer->name << " 'score_threshold' should be scalar";
+}
+
+
+ScatterValidator::ScatterValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void ScatterValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<ScatterLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ScatterLayer class";
+    }
+
+    casted->axis = casted->GetParamAsInt("axis", 0);
+}
+
+void ScatterValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const ScatterLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ScatterLayer class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 3)
+        THROW_IE_EXCEPTION << layer->name << " Scatter can take only 3 inputs, but actually it has: " << numInputs;
+
+    if (!(-static_cast<int>(inShapes[0].size()) <= casted->axis && casted->axis < static_cast<int>(inShapes[0].size())))
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
+
+    if (inShapes[0].size() == 0 || (inShapes[0].size() == 1 && inShapes[0][0] == 1))
+        THROW_IE_EXCEPTION << layer->name << " 'Data' tensor rank should be >= 1";
+
+    if (inShapes[1].size() == 0 || (inShapes[1].size() == 1 && inShapes[1][0] == 1))
+        THROW_IE_EXCEPTION << layer->name << " 'Indexes' tensor rank should be >= 1";
+
+    if (inShapes[1].size() == 0 || (inShapes[1].size() == 1 && inShapes[1][0] == 1))
+        THROW_IE_EXCEPTION << layer->name << " 'Updates' tensor rank should be >= 1";
+
+    if (inShapes[1] != inShapes[2])
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'indexes' and 'updates' tensors dimension";
+
+    const size_t SCATTER_DATA = 0;
+    const size_t SCATTER_INDEXES = 1;
+    const size_t SCATTER_UPDATES = 2;
+
+    Precision inIdxPrecision = layer->insData[SCATTER_INDEXES].lock()->getTensorDesc().getPrecision();
+    if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indexes' precision. Only FP32 or I32 are supported!";
+
+    if (layer->insData[SCATTER_DATA].lock()->getTensorDesc().getPrecision() !=
+        layer->insData[SCATTER_UPDATES].lock()->getTensorDesc().getPrecision())
+        THROW_IE_EXCEPTION << layer->name << " Precision should be equal for input tensors 'Data' and 'Updates'";
+}
+
+
+#define REG_LAYER_VALIDATOR_FOR_TYPE(__validator, __type) \
+_validators[#__type] = std::make_shared<__validator>(#__type)
+
+LayerValidators::LayerValidators() {
+    REG_LAYER_VALIDATOR_FOR_TYPE(ActivationValidator, Activation);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ArgMaxValidator, ArgMax);
+    REG_LAYER_VALIDATOR_FOR_TYPE(BatchNormalizationValidator, BatchNormalization);
+    REG_LAYER_VALIDATOR_FOR_TYPE(CTCGreedyDecoderValidator, CTCGreedyDecoder);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ClampValidator, Clamp);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ConcatValidator, Concat);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ConstValidator, Const);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ConvolutionValidator, Convolution);
+    REG_LAYER_VALIDATOR_FOR_TYPE(CopyValidator, Copy);
+    REG_LAYER_VALIDATOR_FOR_TYPE(CropValidator, Crop);
+    REG_LAYER_VALIDATOR_FOR_TYPE(DeconvolutionValidator, Deconvolution);
+    REG_LAYER_VALIDATOR_FOR_TYPE(DeformableConvolutionValidator, DeformableConvolution);
+    REG_LAYER_VALIDATOR_FOR_TYPE(DetectionOutputValidator, DetectionOutput);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ELUValidator, ELU);
+    REG_LAYER_VALIDATOR_FOR_TYPE(EltwiseValidator, Eltwise);
+    REG_LAYER_VALIDATOR_FOR_TYPE(FullyConnectedValidator, InnerProduct);
+    REG_LAYER_VALIDATOR_FOR_TYPE(FullyConnectedValidator, FullyConnected);
+    REG_LAYER_VALIDATOR_FOR_TYPE(GRNValidator, GRN);
+    REG_LAYER_VALIDATOR_FOR_TYPE(InputValidator, Input);
+    REG_LAYER_VALIDATOR_FOR_TYPE(InterpValidator, Interp);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MVNValidator, MVN);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MemoryValidator, Memory);
+    REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, Norm);
+    REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, LRN);
+    REG_LAYER_VALIDATOR_FOR_TYPE(NormalizeValidator, Normalize);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PReLUValidator, PReLU);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PSROIPoolingValidator, PSROIPooling);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PermuteValidator, Permute);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PoolingValidator, Pooling);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PowerValidator, Power);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PowerFileValidator, PowerFile);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PriorBoxClusteredValidator, PriorBoxClustered);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PriorBoxValidator, PriorBox);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ProposalValidator, Proposal);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ROIPoolingValidator, ROIPooling);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReLUValidator, ReLU);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReLU6Validator, ReLU6);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RegionYoloValidator, RegionYolo);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReorgYoloValidator, ReorgYolo);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ResampleValidator, Resample);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReshapeValidator, Reshape);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReshapeValidator, Flatten);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ScaleShiftValidator, ScaleShift);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SigmoidValidator, Sigmoid);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SigmoidValidator, Logistic);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SimplerNMSValidator, SimplerNMS);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SoftMaxValidator, SoftMax);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SpatialTransformerValidator, SpatialTransformer);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SplitValidator, Split);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SplitValidator, Slice);
+    REG_LAYER_VALIDATOR_FOR_TYPE(GemmValidator, Gemm);
+    REG_LAYER_VALIDATOR_FOR_TYPE(PadValidator, Pad);
+    REG_LAYER_VALIDATOR_FOR_TYPE(GatherValidator, Gather);
+    REG_LAYER_VALIDATOR_FOR_TYPE(StridedSliceValidator, StridedSlice);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ShuffleChannelsValidator, ShuffleChannels);
+    REG_LAYER_VALIDATOR_FOR_TYPE(DepthToSpaceValidator, DepthToSpace);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SpaceToDepthValidator, SpaceToDepth);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SparseFillEmptyRowsValidator, SparseFillEmptyRows);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReverseSequenceValidator, ReverseSequence);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::RNN>, RNNCell);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::GRU>, GRUCell);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::LSTM>, LSTMCell);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::RNN>, RNNSequence);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::GRU>, GRUSequence);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::LSTM>, LSTMSequence);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SelectValidator, Select);
+    REG_LAYER_VALIDATOR_FOR_TYPE(SqueezeValidator, Squeeze);
+    REG_LAYER_VALIDATOR_FOR_TYPE(UnsqueezeValidator, Unsqueeze);
+    REG_LAYER_VALIDATOR_FOR_TYPE(RangeValidator, Range);
+    REG_LAYER_VALIDATOR_FOR_TYPE(FillValidator, Fill);
+    REG_LAYER_VALIDATOR_FOR_TYPE(BroadcastValidator, Broadcast);
+    REG_LAYER_VALIDATOR_FOR_TYPE(TanHValidator, TanH);
+    REG_LAYER_VALIDATOR_FOR_TYPE(TileValidator, Tile);
+    REG_LAYER_VALIDATOR_FOR_TYPE(UnpoolingValidator, Unpooling);
+    REG_LAYER_VALIDATOR_FOR_TYPE(UpsamplingValidator, Upsampling);
+    REG_LAYER_VALIDATOR_FOR_TYPE(OneHotValidator, OneHot);
+    REG_LAYER_VALIDATOR_FOR_TYPE(QuantizeValidator, Quantize);
+    REG_LAYER_VALIDATOR_FOR_TYPE(BinaryConvolutionValidator, BinaryConvolution);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Abs);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Acos);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Acosh);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Asin);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Asinh);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Atan);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Atanh);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Ceil);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Cos);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Cosh);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Erf);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Floor);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, HardSigmoid);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Log);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Neg);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Reciprocal);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Selu);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Sign);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Sin);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Sinh);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Softplus);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Softsign);
+    REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Tan);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceAnd);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceL1);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceL2);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceLogSum);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceLogSumExp);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceMax);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceMean);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceMin);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceOr);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceProd);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceSum);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceSumSquare);
+    REG_LAYER_VALIDATOR_FOR_TYPE(GatherTreeValidator, GatherTree);
+    REG_LAYER_VALIDATOR_FOR_TYPE(TopKValidator, TopK);
+    REG_LAYER_VALIDATOR_FOR_TYPE(UniqueValidator, Unique);
+    REG_LAYER_VALIDATOR_FOR_TYPE(NMSValidator, NonMaxSuppression);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ScatterValidator, ScatterUpdate);
+}
+
 }  // namespace InferenceEngine
index 0072f01..2a295a8 100644 (file)
@@ -22,11 +22,12 @@ struct InOutDims {
 /**
  * @brief Contains methods to validate layer of specific type
  */
-class INFERENCE_ENGINE_API_CLASS(LayerValidator) {
+class LayerValidator {
 public:
     using Ptr = std::shared_ptr<LayerValidator>;
 
     explicit LayerValidator(const std::string& _type) : _type(_type) {}
+    virtual ~LayerValidator() = default;
 
     /**
      * @brief It parses map of params <string,string> and applies to the layer's fields.
@@ -65,7 +66,7 @@ protected:
 /**
  * @brief Contains all validators, registered for specific layer type
  */
-class INFERENCE_ENGINE_API_CLASS(LayerValidators) {
+class LayerValidators {
 public:
     static LayerValidators* getInstance();
 
@@ -75,17 +76,15 @@ public:
 
     LayerValidator::Ptr getValidator(const std::string& type);
 
-    void addImpl(const std::string& type, const LayerValidator::Ptr& validator);
-
 private:
-    LayerValidators() = default;
+    LayerValidators();
 
 private:
     static LayerValidators* _instance;
     InferenceEngine::details::caseless_unordered_map<std::string, LayerValidator::Ptr> _validators;
 };
 
-static void getInOutShapes(const CNNLayer* layer, InOutDims& inOutShapes) {
+inline static void getInOutShapes(const CNNLayer* layer, InOutDims& inOutShapes) {
     inOutShapes.inDims.clear();
     inOutShapes.outDims.clear();
     if (layer) {
@@ -108,7 +107,7 @@ public:
     explicit GeneralValidator(const std::string& _type);
 };
 
-class INFERENCE_ENGINE_API_CLASS(ConvolutionValidator) : public LayerValidator {
+class ConvolutionValidator : public LayerValidator {
 public:
     void parseParams(CNNLayer* layer) override;
 
@@ -123,7 +122,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(DeconvolutionValidator) : public ConvolutionValidator {
+class DeconvolutionValidator : public ConvolutionValidator {
 public:
     void parseParams(CNNLayer* layer) override;
 
@@ -138,7 +137,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(DeformableConvolutionValidator) : public ConvolutionValidator {
+class DeformableConvolutionValidator : public ConvolutionValidator {
 public:
     void parseParams(CNNLayer* layer) override;
 
@@ -153,7 +152,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PoolingValidator) : public LayerValidator {
+class PoolingValidator : public LayerValidator {
 public:
     void parseParams(CNNLayer* layer) override;
 
@@ -164,7 +163,7 @@ public:
     explicit PoolingValidator(const std::string& _type);
 };
 
-class INFERENCE_ENGINE_API_CLASS(FullyConnectedValidator) : public LayerValidator {
+class FullyConnectedValidator : public LayerValidator {
 public:
     explicit FullyConnectedValidator(const std::string& _type);
 
@@ -179,7 +178,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(CropValidator) : public LayerValidator {
+class CropValidator : public LayerValidator {
 public:
     explicit CropValidator(const std::string& _type);
 
@@ -190,7 +189,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(TileValidator) : public LayerValidator {
+class TileValidator : public LayerValidator {
 public:
     explicit TileValidator(const std::string& _type);
 
@@ -201,7 +200,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(BatchNormalizationValidator) : public LayerValidator {
+class BatchNormalizationValidator : public LayerValidator {
 public:
     explicit BatchNormalizationValidator(const std::string& _type);
 
@@ -212,7 +211,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PowerValidator) : public LayerValidator {
+class PowerValidator : public LayerValidator {
 public:
     explicit PowerValidator(const std::string& _type);
 
@@ -223,7 +222,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PReLUValidator) : public LayerValidator {
+class PReLUValidator : public LayerValidator {
 public:
     explicit PReLUValidator(const std::string& _type);
 
@@ -234,7 +233,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ScaleShiftValidator) : public LayerValidator {
+class ScaleShiftValidator : public LayerValidator {
 public:
     explicit ScaleShiftValidator(const std::string& _type);
 
@@ -245,7 +244,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ReshapeValidator) : public LayerValidator {
+class ReshapeValidator : public LayerValidator {
 public:
     explicit ReshapeValidator(const std::string& _type);
 
@@ -254,7 +253,7 @@ public:
     void checkParams(const CNNLayer* layer) override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(EltwiseValidator) : public LayerValidator {
+class EltwiseValidator : public LayerValidator {
 public:
     explicit EltwiseValidator(const std::string& _type);
 
@@ -265,7 +264,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ClampValidator) : public LayerValidator {
+class ClampValidator : public LayerValidator {
 public:
     explicit ClampValidator(const std::string& _type);
 
@@ -274,7 +273,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ReLUValidator) : public LayerValidator {
+class ReLUValidator : public LayerValidator {
 public:
     explicit ReLUValidator(const std::string& _type);
 
@@ -285,7 +284,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(MVNValidator) : public LayerValidator {
+class MVNValidator : public LayerValidator {
 public:
     explicit MVNValidator(const std::string& _type);
 
@@ -296,7 +295,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(GRNValidator) : public LayerValidator {
+class GRNValidator : public LayerValidator {
 public:
     explicit GRNValidator(const std::string& _type);
 
@@ -307,7 +306,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SoftMaxValidator) : public LayerValidator {
+class SoftMaxValidator : public LayerValidator {
 public:
     explicit SoftMaxValidator(const std::string& _type);
 
@@ -318,7 +317,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(NormValidator) : public LayerValidator {
+class NormValidator : public LayerValidator {
 public:
     explicit NormValidator(const std::string& _type);
 
@@ -329,7 +328,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SplitValidator) : public LayerValidator {
+class SplitValidator : public LayerValidator {
 public:
     explicit SplitValidator(const std::string& _type);
 
@@ -340,7 +339,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ConcatValidator) : public LayerValidator {
+class ConcatValidator : public LayerValidator {
 public:
     explicit ConcatValidator(const std::string& _type);
 
@@ -351,7 +350,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(GemmValidator) : public LayerValidator {
+class GemmValidator : public LayerValidator {
 public:
     explicit GemmValidator(const std::string& _type);
 
@@ -362,7 +361,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PadValidator) : public LayerValidator {
+class PadValidator : public LayerValidator {
 public:
     explicit PadValidator(const std::string& _type);
 
@@ -373,7 +372,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(GatherValidator) : public LayerValidator {
+class GatherValidator : public LayerValidator {
 public:
     explicit GatherValidator(const std::string& _type);
 
@@ -384,7 +383,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(StridedSliceValidator) : public LayerValidator {
+class StridedSliceValidator : public LayerValidator {
 public:
     explicit StridedSliceValidator(const std::string& _type);
 
@@ -395,7 +394,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ShuffleChannelsValidator) : public LayerValidator {
+class ShuffleChannelsValidator : public LayerValidator {
 public:
     explicit ShuffleChannelsValidator(const std::string& _type);
 
@@ -406,7 +405,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(DepthToSpaceValidator) : public LayerValidator {
+class DepthToSpaceValidator : public LayerValidator {
 public:
     explicit DepthToSpaceValidator(const std::string& _type);
 
@@ -417,7 +416,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SpaceToDepthValidator) : public LayerValidator {
+class SpaceToDepthValidator : public LayerValidator {
 public:
     explicit SpaceToDepthValidator(const std::string& _type);
 
@@ -428,7 +427,18 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ReverseSequenceValidator) : public LayerValidator {
+class SparseFillEmptyRowsValidator : public LayerValidator {
+public:
+    explicit SparseFillEmptyRowsValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class ReverseSequenceValidator : public LayerValidator {
 public:
     explicit ReverseSequenceValidator(const std::string& _type);
 
@@ -439,7 +449,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SqueezeValidator) : public LayerValidator {
+class SqueezeValidator : public LayerValidator {
 public:
     explicit SqueezeValidator(const std::string& _type);
 
@@ -450,7 +460,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(UnsqueezeValidator) : public LayerValidator {
+class UnsqueezeValidator : public LayerValidator {
 public:
     explicit UnsqueezeValidator(const std::string& _type);
 
@@ -461,7 +471,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(RangeValidator) : public LayerValidator {
+class RangeValidator : public LayerValidator {
 public:
     explicit RangeValidator(const std::string& _type);
 
@@ -472,7 +482,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(FillValidator) : public LayerValidator {
+class FillValidator : public LayerValidator {
 public:
     explicit FillValidator(const std::string& _type);
 
@@ -483,7 +493,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(BroadcastValidator) : public LayerValidator {
+class BroadcastValidator : public LayerValidator {
 public:
     explicit BroadcastValidator(const std::string& _type);
 
@@ -494,10 +504,9 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-template<RNNSequenceLayer::CellType CELL>
-class INFERENCE_ENGINE_API_CLASS(RNNBaseValidator) : public LayerValidator {
+class RNNBaseValidator : public LayerValidator {
 public:
-    explicit RNNBaseValidator(const std::string& _type);
+    RNNBaseValidator(const std::string& _type, RNNSequenceLayer::CellType CELL);
 
     void parseParams(CNNLayer* layer) override;
 
@@ -508,27 +517,27 @@ public:
                              const std::vector<SizeVector>& inShapes) const override;
 
 protected:
-    static std::vector<std::string> def_acts;  // Default values for cell gate activations
-    static std::vector<float> def_alpha;  // Default activation alpha parameter
-    static std::vector<float> def_beta;   // Default activation beta parameter
-    static size_t G;   // gate number
-    static size_t NS;  // state number
+    std::vector<std::string> def_acts;  // Default values for cell gate activations
+    std::vector<float> def_alpha;  // Default activation alpha parameter
+    std::vector<float> def_beta;   // Default activation beta parameter
+    size_t G;   // gate number
+    size_t NS;  // state number
 };
 
 template<RNNSequenceLayer::CellType CELL>
-class INFERENCE_ENGINE_API_CLASS(RNNCellValidator) : public RNNBaseValidator<CELL> {
+class RNNCellValidator : public RNNBaseValidator {
 public:
     explicit RNNCellValidator(const std::string& _type);
 
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-extern template class INFERENCE_ENGINE_API_CLASS(RNNCellValidator)<RNNSequenceLayer::LSTM>;
-extern template class INFERENCE_ENGINE_API_CLASS(RNNCellValidator)<RNNSequenceLayer::GRU>;
-extern template class INFERENCE_ENGINE_API_CLASS(RNNCellValidator)<RNNSequenceLayer::RNN>;
+extern template class RNNCellValidator<RNNSequenceLayer::LSTM>;
+extern template class RNNCellValidator<RNNSequenceLayer::GRU>;
+extern template class RNNCellValidator<RNNSequenceLayer::RNN>;
 
 template<RNNSequenceLayer::CellType CELL>
-class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator) : public RNNBaseValidator<CELL> {
+class RNNSequenceValidator : public RNNBaseValidator {
 public:
     explicit RNNSequenceValidator(const std::string& _type);
 
@@ -539,11 +548,11 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-extern template class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator)<RNNSequenceLayer::LSTM>;
-extern template class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator)<RNNSequenceLayer::GRU>;
-extern template class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator)<RNNSequenceLayer::RNN>;
+extern template class RNNSequenceValidator<RNNSequenceLayer::LSTM>;
+extern template class RNNSequenceValidator<RNNSequenceLayer::GRU>;
+extern template class RNNSequenceValidator<RNNSequenceLayer::RNN>;
 
-class INFERENCE_ENGINE_API_CLASS(ArgMaxValidator) : public LayerValidator {
+class ArgMaxValidator : public LayerValidator {
 public:
     explicit ArgMaxValidator(const std::string& _type);
 
@@ -552,7 +561,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(CTCGreedyDecoderValidator) : public LayerValidator {
+class CTCGreedyDecoderValidator : public LayerValidator {
 public:
     explicit CTCGreedyDecoderValidator(const std::string& _type);
 
@@ -561,7 +570,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(DetectionOutputValidator) : public LayerValidator {
+class DetectionOutputValidator : public LayerValidator {
 public:
     explicit DetectionOutputValidator(const std::string& _type);
 
@@ -572,7 +581,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(InterpValidator) : public LayerValidator {
+class InterpValidator : public LayerValidator {
 public:
     explicit InterpValidator(const std::string& _type);
 
@@ -583,7 +592,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PermuteValidator) : public LayerValidator {
+class PermuteValidator : public LayerValidator {
 public:
     explicit PermuteValidator(const std::string& _type);
 
@@ -592,7 +601,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PriorBoxValidator) : public LayerValidator {
+class PriorBoxValidator : public LayerValidator {
 public:
     explicit PriorBoxValidator(const std::string& _type);
 
@@ -601,7 +610,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PriorBoxClusteredValidator) : public LayerValidator {
+class PriorBoxClusteredValidator : public LayerValidator {
 public:
     explicit PriorBoxClusteredValidator(const std::string& _type);
 
@@ -610,7 +619,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ProposalValidator) : public LayerValidator {
+class ProposalValidator : public LayerValidator {
 public:
     explicit ProposalValidator(const std::string& _type);
 
@@ -619,7 +628,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PSROIPoolingValidator) : public LayerValidator {
+class PSROIPoolingValidator : public LayerValidator {
 public:
     explicit PSROIPoolingValidator(const std::string& _type);
 
@@ -628,7 +637,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(RegionYoloValidator) : public LayerValidator {
+class RegionYoloValidator : public LayerValidator {
 public:
     explicit RegionYoloValidator(const std::string& _type);
 
@@ -637,7 +646,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ReorgYoloValidator) : public LayerValidator {
+class ReorgYoloValidator : public LayerValidator {
 public:
     explicit ReorgYoloValidator(const std::string& _type);
 
@@ -646,7 +655,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ResampleValidator) : public LayerValidator {
+class ResampleValidator : public LayerValidator {
 public:
     explicit ResampleValidator(const std::string& _type);
 
@@ -655,7 +664,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ROIPoolingValidator) : public LayerValidator {
+class ROIPoolingValidator : public LayerValidator {
 public:
     explicit ROIPoolingValidator(const std::string& _type);
 
@@ -664,7 +673,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SimplerNMSValidator) : public LayerValidator {
+class SimplerNMSValidator : public LayerValidator {
 public:
     explicit SimplerNMSValidator(const std::string& _type);
 
@@ -673,7 +682,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SpatialTransformerValidator) : public LayerValidator {
+class SpatialTransformerValidator : public LayerValidator {
 public:
     explicit SpatialTransformerValidator(const std::string& _type);
 
@@ -682,7 +691,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(OneHotValidator) : public LayerValidator {
+class OneHotValidator : public LayerValidator {
 public:
     explicit OneHotValidator(const std::string& _type);
 
@@ -693,7 +702,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(UpsamplingValidator) : public LayerValidator {
+class UpsamplingValidator : public LayerValidator {
 public:
     explicit UpsamplingValidator(const std::string& _type);
 
@@ -702,7 +711,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ActivationValidator) : public LayerValidator {
+class ActivationValidator : public LayerValidator {
 public:
     explicit ActivationValidator(const std::string& _type);
 
@@ -711,7 +720,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ConstValidator) : public LayerValidator {
+class ConstValidator : public LayerValidator {
 public:
     explicit ConstValidator(const std::string& _type);
 
@@ -720,7 +729,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ELUValidator) : public LayerValidator {
+class ELUValidator : public LayerValidator {
 public:
     explicit ELUValidator(const std::string& _type);
 
@@ -729,7 +738,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(InputValidator) : public LayerValidator {
+class InputValidator : public LayerValidator {
 public:
     explicit InputValidator(const std::string& _type);
 
@@ -738,7 +747,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(MemoryValidator) : public LayerValidator {
+class MemoryValidator : public LayerValidator {
 public:
     explicit MemoryValidator(const std::string& _type);
 
@@ -747,7 +756,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(NormalizeValidator) : public LayerValidator {
+class NormalizeValidator : public LayerValidator {
 public:
     explicit NormalizeValidator(const std::string& _type);
 
@@ -756,7 +765,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(CopyValidator) : public LayerValidator {
+class CopyValidator : public LayerValidator {
 public:
     explicit CopyValidator(const std::string& _type);
 
@@ -765,7 +774,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(PowerFileValidator) : public LayerValidator {
+class PowerFileValidator : public LayerValidator {
 public:
     explicit PowerFileValidator(const std::string& _type);
 
@@ -774,7 +783,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ReLU6Validator) : public LayerValidator {
+class ReLU6Validator : public LayerValidator {
 public:
     explicit ReLU6Validator(const std::string& _type);
 
@@ -783,7 +792,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SigmoidValidator) : public LayerValidator {
+class SigmoidValidator : public LayerValidator {
 public:
     explicit SigmoidValidator(const std::string& _type);
 
@@ -792,14 +801,14 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(TanHValidator) : public LayerValidator {
+class TanHValidator : public LayerValidator {
 public:
     explicit TanHValidator(const std::string& _type);
 
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(UnpoolingValidator) : public LayerValidator {
+class UnpoolingValidator : public LayerValidator {
 public:
     explicit UnpoolingValidator(const std::string& _type);
 
@@ -808,7 +817,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(QuantizeValidator) : public LayerValidator {
+class QuantizeValidator : public LayerValidator {
 public:
     explicit QuantizeValidator(const std::string& _type);
 
@@ -819,7 +828,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(BinaryConvolutionValidator) : public LayerValidator {
+class BinaryConvolutionValidator : public LayerValidator {
 public:
     void parseParams(CNNLayer* layer) override;
 
@@ -834,21 +843,21 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(SelectValidator) : public LayerValidator {
+class SelectValidator : public LayerValidator {
 public:
     explicit SelectValidator(const std::string& _type);
 
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(MathValidator) : public LayerValidator {
+class MathValidator : public LayerValidator {
 public:
     explicit MathValidator(const std::string& _type);
 
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(ReduceValidator) : public LayerValidator {
+class ReduceValidator : public LayerValidator {
 public:
     explicit ReduceValidator(const std::string& _type);
 
@@ -859,7 +868,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(GatherTreeValidator) : public LayerValidator {
+class GatherTreeValidator : public LayerValidator {
 public:
     explicit GatherTreeValidator(const std::string& _type);
 
@@ -870,7 +879,7 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(TopKValidator) : public LayerValidator {
+class TopKValidator : public LayerValidator {
 public:
     explicit TopKValidator(const std::string& _type);
 
@@ -879,130 +888,34 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-template<typename Validator>
-class ValidatorRegisterBase {
+class UniqueValidator : public LayerValidator {
 public:
-    explicit ValidatorRegisterBase(const std::string& type) {
-        LayerValidators::getInstance()->addImpl(type, std::make_shared<Validator>(type));
-    }
+    explicit UniqueValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class NMSValidator : public LayerValidator {
+public:
+    explicit NMSValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class ScatterValidator : public LayerValidator {
+public:
+    explicit ScatterValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-#define REG_LAYER_VALIDATOR_FOR_TYPE(__validator, __type) \
-static ValidatorRegisterBase<__validator> __reg__##__type(#__type)
-
-REG_LAYER_VALIDATOR_FOR_TYPE(ActivationValidator, Activation);
-REG_LAYER_VALIDATOR_FOR_TYPE(ArgMaxValidator, ArgMax);
-REG_LAYER_VALIDATOR_FOR_TYPE(BatchNormalizationValidator, BatchNormalization);
-REG_LAYER_VALIDATOR_FOR_TYPE(CTCGreedyDecoderValidator, CTCGreedyDecoder);
-REG_LAYER_VALIDATOR_FOR_TYPE(ClampValidator, Clamp);
-REG_LAYER_VALIDATOR_FOR_TYPE(ConcatValidator, Concat);
-REG_LAYER_VALIDATOR_FOR_TYPE(ConstValidator, Const);
-REG_LAYER_VALIDATOR_FOR_TYPE(ConvolutionValidator, Convolution);
-REG_LAYER_VALIDATOR_FOR_TYPE(CopyValidator, Copy);
-REG_LAYER_VALIDATOR_FOR_TYPE(CropValidator, Crop);
-REG_LAYER_VALIDATOR_FOR_TYPE(DeconvolutionValidator, Deconvolution);
-REG_LAYER_VALIDATOR_FOR_TYPE(DeformableConvolutionValidator, DeformableConvolution);
-REG_LAYER_VALIDATOR_FOR_TYPE(DetectionOutputValidator, DetectionOutput);
-REG_LAYER_VALIDATOR_FOR_TYPE(ELUValidator, ELU);
-REG_LAYER_VALIDATOR_FOR_TYPE(EltwiseValidator, Eltwise);
-REG_LAYER_VALIDATOR_FOR_TYPE(FullyConnectedValidator, InnerProduct);
-REG_LAYER_VALIDATOR_FOR_TYPE(FullyConnectedValidator, FullyConnected);
-REG_LAYER_VALIDATOR_FOR_TYPE(GRNValidator, GRN);
-REG_LAYER_VALIDATOR_FOR_TYPE(InputValidator, Input);
-REG_LAYER_VALIDATOR_FOR_TYPE(InterpValidator, Interp);
-REG_LAYER_VALIDATOR_FOR_TYPE(MVNValidator, MVN);
-REG_LAYER_VALIDATOR_FOR_TYPE(MemoryValidator, Memory);
-REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, Norm);
-REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, LRN);
-REG_LAYER_VALIDATOR_FOR_TYPE(NormalizeValidator, Normalize);
-REG_LAYER_VALIDATOR_FOR_TYPE(PReLUValidator, PReLU);
-REG_LAYER_VALIDATOR_FOR_TYPE(PSROIPoolingValidator, PSROIPooling);
-REG_LAYER_VALIDATOR_FOR_TYPE(PermuteValidator, Permute);
-REG_LAYER_VALIDATOR_FOR_TYPE(PoolingValidator, Pooling);
-REG_LAYER_VALIDATOR_FOR_TYPE(PowerValidator, Power);
-REG_LAYER_VALIDATOR_FOR_TYPE(PowerFileValidator, PowerFile);
-REG_LAYER_VALIDATOR_FOR_TYPE(PriorBoxClusteredValidator, PriorBoxClustered);
-REG_LAYER_VALIDATOR_FOR_TYPE(PriorBoxValidator, PriorBox);
-REG_LAYER_VALIDATOR_FOR_TYPE(ProposalValidator, Proposal);
-REG_LAYER_VALIDATOR_FOR_TYPE(ROIPoolingValidator, ROIPooling);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReLUValidator, ReLU);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReLU6Validator, ReLU6);
-REG_LAYER_VALIDATOR_FOR_TYPE(RegionYoloValidator, RegionYolo);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReorgYoloValidator, ReorgYolo);
-REG_LAYER_VALIDATOR_FOR_TYPE(ResampleValidator, Resample);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReshapeValidator, Reshape);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReshapeValidator, Flatten);
-REG_LAYER_VALIDATOR_FOR_TYPE(ScaleShiftValidator, ScaleShift);
-REG_LAYER_VALIDATOR_FOR_TYPE(SigmoidValidator, Sigmoid);
-REG_LAYER_VALIDATOR_FOR_TYPE(SigmoidValidator, Logistic);
-REG_LAYER_VALIDATOR_FOR_TYPE(SimplerNMSValidator, SimplerNMS);
-REG_LAYER_VALIDATOR_FOR_TYPE(SoftMaxValidator, SoftMax);
-REG_LAYER_VALIDATOR_FOR_TYPE(SpatialTransformerValidator, SpatialTransformer);
-REG_LAYER_VALIDATOR_FOR_TYPE(SplitValidator, Split);
-REG_LAYER_VALIDATOR_FOR_TYPE(SplitValidator, Slice);
-REG_LAYER_VALIDATOR_FOR_TYPE(GemmValidator, Gemm);
-REG_LAYER_VALIDATOR_FOR_TYPE(PadValidator, Pad);
-REG_LAYER_VALIDATOR_FOR_TYPE(GatherValidator, Gather);
-REG_LAYER_VALIDATOR_FOR_TYPE(StridedSliceValidator, StridedSlice);
-REG_LAYER_VALIDATOR_FOR_TYPE(ShuffleChannelsValidator, ShuffleChannels);
-REG_LAYER_VALIDATOR_FOR_TYPE(DepthToSpaceValidator, DepthToSpace);
-REG_LAYER_VALIDATOR_FOR_TYPE(SpaceToDepthValidator, SpaceToDepth);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReverseSequenceValidator, ReverseSequence);
-REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::RNN>, RNNCell);
-REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::GRU>, GRUCell);
-REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::LSTM>, LSTMCell);
-REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::RNN>, RNNSequence);
-REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::GRU>, GRUSequence);
-REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::LSTM>, LSTMSequence);
-REG_LAYER_VALIDATOR_FOR_TYPE(SelectValidator, Select);
-REG_LAYER_VALIDATOR_FOR_TYPE(SqueezeValidator, Squeeze);
-REG_LAYER_VALIDATOR_FOR_TYPE(UnsqueezeValidator, Unsqueeze);
-REG_LAYER_VALIDATOR_FOR_TYPE(RangeValidator, Range);
-REG_LAYER_VALIDATOR_FOR_TYPE(FillValidator, Fill);
-REG_LAYER_VALIDATOR_FOR_TYPE(BroadcastValidator, Broadcast);
-REG_LAYER_VALIDATOR_FOR_TYPE(TanHValidator, TanH);
-REG_LAYER_VALIDATOR_FOR_TYPE(TileValidator, Tile);
-REG_LAYER_VALIDATOR_FOR_TYPE(UnpoolingValidator, Unpooling);
-REG_LAYER_VALIDATOR_FOR_TYPE(UpsamplingValidator, Upsampling);
-REG_LAYER_VALIDATOR_FOR_TYPE(OneHotValidator, OneHot);
-REG_LAYER_VALIDATOR_FOR_TYPE(QuantizeValidator, Quantize);
-REG_LAYER_VALIDATOR_FOR_TYPE(BinaryConvolutionValidator, BinaryConvolution);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Abs);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Acos);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Acosh);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Asin);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Asinh);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Atan);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Atanh);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Ceil);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Cos);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Cosh);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Erf);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Floor);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, HardSigmoid);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Log);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Neg);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Reciprocal);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Selu);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Sign);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Sin);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Sinh);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Softplus);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Softsign);
-REG_LAYER_VALIDATOR_FOR_TYPE(MathValidator, Tan);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceAnd);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceL1);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceL2);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceLogSum);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceLogSumExp);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceMax);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceMean);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceMin);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceOr);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceProd);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceSum);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReduceValidator, ReduceSumSquare);
-REG_LAYER_VALIDATOR_FOR_TYPE(GatherTreeValidator, GatherTree);
-REG_LAYER_VALIDATOR_FOR_TYPE(TopKValidator, TopK);
 }  // namespace details
 }  // namespace InferenceEngine
index fd6b7df..771c52c 100644 (file)
@@ -11,6 +11,7 @@
 #include <math.h>
 #include "ie_layers_internal.hpp"
 #include "layer_transform.hpp"
+#include <cmath>
 
 namespace InferenceEngine {
 
@@ -38,10 +39,10 @@ Paddings getPaddingsInternal(const Layer &layer) {
                 return {PropertyVector<unsigned>(layer._kernel.size(), 0u),
                         PropertyVector<unsigned>(layer._kernel.size(), 0u)};
             } else {
-                if (insData.size() != 1 && layer.type != "DeformableConvolution")
-                    THROW_IE_EXCEPTION << "number of inputs should be equal 1";
-                if (insData.size() != 2 && layer.type == "DeformableConvolution")
-                    THROW_IE_EXCEPTION << "number of inputs should be equal 2";
+                if ((insData.size() > 3 || insData.empty()) && layer.type != "DeformableConvolution")
+                    THROW_IE_EXCEPTION << "number of inputs should be in range [1, 3]";
+                if ((insData.size() > 4 || insData.empty()) && layer.type == "DeformableConvolution")
+                    THROW_IE_EXCEPTION << "number of inputs should be in range [2, 4]";
                 auto firstInput = insData[0].lock();
                 if (!firstInput)
                     THROW_IE_EXCEPTION << "input is empty";
index d61bdd3..0588df9 100644 (file)
@@ -67,15 +67,16 @@ TensorDesc::TensorDesc(const Precision& precision, Layout layout): blockingDesc(
 
 TensorDesc::TensorDesc(const Precision &precision, SizeVector dims, const BlockingDesc &blockDesc)
         : dims(dims), blockingDesc(blockDesc), precision(precision)  {
+    if (dims.size() == 0 || blockingDesc.getBlockDims().size() == 0) {
+        layout = Layout::SCALAR;
+        return;
+    }
     if (dims.size() != *std::max_element(blockDesc.getOrder().begin(), blockDesc.getOrder().end()) + 1)
         THROW_IE_EXCEPTION << "Cannot create TensorDesc! Blocked dims are inconsistent with original dims.";
 
     layout = Layout::BLOCKED;
     if (dims.size() == blockingDesc.getBlockDims().size()) {
         switch (dims.size()) {
-            case 0:
-                layout = Layout::SCALAR;
-                break;
             case 1:
                 layout = Layout::C;
                 break;
@@ -123,7 +124,6 @@ TensorDesc::TensorDesc() {
 }
 
 void TensorDesc::setDims(const SizeVector &dims) {
-    this->dims = dims;
     if (layout == Layout::BLOCKED) {
         auto newDims = blockingDesc.getBlockDims();
         auto newOrder = blockingDesc.getOrder();
@@ -135,8 +135,12 @@ void TensorDesc::setDims(const SizeVector &dims) {
         }
         blockingDesc = BlockingDesc(newDims, newOrder);
     } else {
+        if (layout == Layout::SCALAR && (dims.size() > 1 || (dims.size() == 1 && dims[0] != 1)))
+            THROW_IE_EXCEPTION << "Cannot set dimensions for SCALAR layout!";
         blockingDesc = BlockingDesc(dims, layout);
     }
+    if (layout != Layout::SCALAR)
+        this->dims = dims;
 }
 
 bool TensorDesc::operator==(const TensorDesc &rhs) const {
@@ -173,6 +177,9 @@ size_t TensorDesc::offset(const SizeVector& v) const {
     if (layout == Layout::ANY)
         THROW_IE_EXCEPTION << "Cannot calculate offset for any format!";
 
+    if (layout == Layout::SCALAR)
+        return blockingDesc.getOffsetPadding();
+
     SizeVector off_v = v;
     const SizeVector& blockedDims = blockingDesc.getBlockDims();
     const SizeVector& strides = blockingDesc.getStrides();
@@ -298,7 +305,7 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding
         case Layout::NDHWC:
             checkDims(dims.size(), 5);
             l_order = {0, 2, 3, 4, 1};
-            l_dims = dims;
+            l_dims = {dims[0], dims[2], dims[3], dims[4], dims[1]};
             break;
         case Layout::CHW:
             checkDims(dims.size(), 3);
@@ -308,7 +315,7 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding
         case Layout::CN:
             checkDims(dims.size(), 2);
             l_order = {1, 0};
-            l_dims = {dims[1], dims[2]};
+            l_dims = {dims[1], dims[0]};
             break;
         case Layout::NC:
         case Layout::HW:
@@ -358,3 +365,4 @@ bool BlockingDesc::operator==(const BlockingDesc &rhs) const {
 bool BlockingDesc::operator!=(const BlockingDesc &rhs) const {
     return !(*this == rhs);
 }
+
index ea85d68..52d371e 100644 (file)
@@ -10,7 +10,7 @@ namespace InferenceEngine {
 namespace Metrics {
 
 template <typename T>
-class MetricType;
+struct MetricType;
 
 #define DECLARE_METRIC_KEY_IMPL(name, ...)          \
     struct name { };                                \
index 6dfdfc4..061bc41 100644 (file)
@@ -35,6 +35,16 @@ InferencePlugin PluginDispatcher::getPluginByDevice(const std::string& deviceNam
             InferenceEngine::ResponseDesc response;
             ptr->SetConfig({{"TARGET_FALLBACK", deviceName.substr(7, deviceName.length() - 7)}}, &response);
         }
+    } else if (deviceName.find("MULTI:") == 0) {
+        // MULTI found: everything after ':' to the options of the multi-device plugin
+        ptr = getSuitablePlugin(InferenceEngine::TargetDeviceInfo::fromStr("MULTI"));
+        if (ptr) {
+            InferenceEngine::ResponseDesc response;
+            if (deviceName.length() < 6)
+                THROW_IE_EXCEPTION << "Missing devices priorities for the multi-device case";
+            ptr->SetConfig({{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
+                                    deviceName.substr(6, deviceName.length() - 6)}}, &response);
+        }
     } else {
         ptr = getSuitablePlugin(InferenceEngine::TargetDeviceInfo::fromStr(deviceName));
     }
index 6588d4c..b8abf79 100644 (file)
@@ -462,7 +462,6 @@ static void initScratchLinear(const cv::GMatDesc& in,
                          cv::gapi::fluid::Buffer& scratch,
                                              int  lpi) {
     using alpha_type = typename Mapper::alpha_type;
-    using index_type = typename Mapper::index_type;
     static const auto unity = Mapper::unity;
 
     auto inSz = in.size;
index 489ab24..1a2f078 100644 (file)
@@ -149,6 +149,7 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
     static const fptr cloners[] = {
         &layerCloneImpl<SelectLayer               >,
         &layerCloneImpl<BatchNormalizationLayer   >,
+        &layerCloneImpl<TopKLayer                 >,
         &layerCloneImpl<PowerLayer                >,
         &layerCloneImpl<ScaleShiftLayer           >,
         &layerCloneImpl<PReLULayer                >,
@@ -163,6 +164,7 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
         &layerCloneImpl<ShuffleChannelsLayer      >,
         &layerCloneImpl<DepthToSpaceLayer         >,
         &layerCloneImpl<SpaceToDepthLayer         >,
+        &layerCloneImpl<SparseFillEmptyRowsLayer  >,
         &layerCloneImpl<ReverseSequenceLayer      >,
         &layerCloneImpl<RangeLayer                >,
         &layerCloneImpl<FillLayer                 >,
@@ -184,12 +186,17 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
         &layerCloneImpl<ConvolutionLayer          >,
         &layerCloneImpl<TensorIterator            >,
         &layerCloneImpl<RNNSequenceLayer          >,
-        &layerCloneImpl<RNNCellBase               >,
+        &layerCloneImpl<LSTMCell                  >,
+        &layerCloneImpl<GRUCell                   >,
+        &layerCloneImpl<RNNCell                   >,
         &layerCloneImpl<QuantizeLayer             >,
         &layerCloneImpl<BinaryConvolutionLayer    >,
         &layerCloneImpl<WeightableLayer           >,
         &layerCloneImpl<OneHotLayer               >,
-        &layerCloneImpl<CNNLayer                  >
+        &layerCloneImpl<CNNLayer                  >,
+        &layerCloneImpl<UniqueLayer               >,
+        &layerCloneImpl<NonMaxSuppressionLayer    >,
+        &layerCloneImpl<ScatterLayer              >
     };
     for (auto cloner : cloners) {
         auto cloned = cloner(&source);
@@ -611,8 +618,6 @@ struct NodePrinter {
                     operation = "Pow";
                 else if (eltwise->_operation == EltwiseLayer::Mean)
                     operation = "Mean";
-                else if (eltwise->_operation == EltwiseLayer::Select)
-                    operation = "Select";
 
                 printed_properties.emplace_back("operation", operation);
             }
index b5f8b32..bbdce7b 100644 (file)
@@ -6,6 +6,8 @@
 #include "graph_tools.hpp"
 #include "details/caseless.hpp"
 #include "ie_utils.hpp"
+#include "ie_plugin.hpp"
+#include "ie_ihetero_plugin.hpp"
 
 #include <ie_layers.h>
 
 using namespace InferenceEngine;
 using namespace details;
 
+IE_SUPPRESS_DEPRECATED_START
+
+IHeteroInferencePlugin::~IHeteroInferencePlugin() {
+}
+
+IHeteroDeviceLoader::~IHeteroDeviceLoader() {
+}
+
+QueryNetworkResult::QueryNetworkResult() : rc(OK) {
+}
+
+const QueryNetworkResult & QueryNetworkResult::operator= (const QueryNetworkResult & q) {
+    supportedLayers = q.supportedLayers;
+    supportedLayersMap = q.supportedLayersMap;
+    rc = q.rc;
+    resp = q.resp;
+
+    return *this;
+}
+
+QueryNetworkResult & QueryNetworkResult::operator= (QueryNetworkResult && q) {
+    supportedLayers = q.supportedLayers;
+    supportedLayersMap = q.supportedLayersMap;
+    rc = q.rc;
+    resp = q.resp;
+
+    return *this;
+}
+
+QueryNetworkResult::QueryNetworkResult(const QueryNetworkResult & instance) :
+    supportedLayers(instance.supportedLayers),
+    supportedLayersMap(instance.supportedLayersMap),
+    rc(instance.rc),
+    resp(instance.resp) {
+}
+
+QueryNetworkResult::~QueryNetworkResult() {
+}
+
+IE_SUPPRESS_DEPRECATED_END
+
 namespace {
 
 InferenceEngine::LayerComplexity getComplexity(const InferenceEngine::CNNLayerPtr &layer) {
index 8b84be6..3712ba7 100644 (file)
@@ -10,7 +10,7 @@ INFERENCE_ENGINE_API(const Version*) GetInferenceEngineVersion() noexcept {
     // Use local static variable to make sure it is always properly initialized
     // even if called from global constructor
     static Version inferenceEngineVersion = {
-        {2, 0},  // inference engine API version
+        {2, 1},  // inference engine API version
         CI_BUILD_NUMBER,
         "API"
     };
index e386575..b2fb318 100644 (file)
@@ -27,6 +27,7 @@ using AllLayers = std::tuple <
     DeformableConvolutionLayer*,
     DeconvolutionLayer*,
     ConvolutionLayer *,
+    TopKLayer*,
     PoolingLayer*,
     FullyConnectedLayer*,
     GemmLayer*,
@@ -36,6 +37,7 @@ using AllLayers = std::tuple <
     ShuffleChannelsLayer*,
     DepthToSpaceLayer*,
     SpaceToDepthLayer*,
+    SparseFillEmptyRowsLayer*,
     ReverseSequenceLayer*,
     RangeLayer*,
     FillLayer*,
@@ -65,13 +67,68 @@ using AllLayers = std::tuple <
     BinaryConvolutionLayer*,
     WeightableLayer*,
     OneHotLayer*,
-    CNNLayer*,
     MathLayer*,
-    ReduceLayer*
+    ReduceLayer*,
+    UniqueLayer*,
+    NonMaxSuppressionLayer*,
+    ScatterLayer*,
+    CNNLayer*
 >;
 
+
+/**
+ * @brief checks whether type inxed as P has a parent among element in range I..N
+ * can be used only for P < I
+ * */
+template <size_t P, size_t I, class Tuple, class Enable = void >
+struct is_base_of_any;
+
+template <size_t IBase,
+          size_t IDerived,
+          class Tuple>
+struct is_base_of_any<
+    IBase, IDerived, Tuple,
+    typename std::enable_if<IBase < std::tuple_size<Tuple>::value, void>::type > : public std::true_type {
+         using base = typename std::remove_pointer<typename std::tuple_element<IBase, Tuple>::type>::type;
+         using derived = typename std::remove_pointer<typename std::tuple_element<IDerived, Tuple>::type>::type;
+
+    static_assert(IDerived < IBase, "cannot match parent using incorrect indices");
+    static_assert(!std::is_base_of<derived, base>::value, "probing type is a parent of followed type");
+
+    // check that incoming type have parents in range I..N, and any of I..N not a child of derivedd type
+     static_assert((std::is_base_of<base, derived>::value || is_base_of_any<IBase + 1, IDerived, Tuple>::value), "parent matching failed");
+};
+
+// for matches any->after last
+template <
+    size_t IBase,
+    size_t IDerived,
+    class Tuple>
+struct is_base_of_any<
+    IBase, IDerived, Tuple, typename std::enable_if<IBase >= std::tuple_size<Tuple>::value, void>::type> : public std::false_type {
+};
+
+/**
+* @brief check wether type ordered from child to base within given list
+*/
+template <size_t P, class Tuple, class Enable = void>
+struct is_types_ordered_from_child_to_base {};
+
+template <size_t P, class Tuple>
+struct is_types_ordered_from_child_to_base <P, Tuple, typename std::enable_if<P != std::tuple_size<Tuple>::value - 2, void>::type> {
+  static constexpr bool value = is_base_of_any<P + 1, P, Tuple> :: value &&  is_types_ordered_from_child_to_base<P + 1, Tuple> :: value;
+};
+
+template <size_t P, class Tuple>
+struct is_types_ordered_from_child_to_base<P, Tuple, typename std::enable_if<P == std::tuple_size<Tuple>::value - 2, void>::type> {
+  static constexpr bool value = is_base_of_any<P + 1, P, Tuple> :: value;
+};
+
+static_assert(is_types_ordered_from_child_to_base<0, AllLayers>::value,
+    "All layers must be topologically sorted as so for any layer, it's father appeared later in a types list");
+
 template<typename InjectedType, typename T>
-void dynamic_cast_layer(const CNNLayer &source, CNNLayerPtr &target, T & /*, InjectedType value*/) {
+inline void dynamic_cast_layer(const CNNLayer &source, CNNLayerPtr &target, T & /*, InjectedType value*/) {
     if (target) {
         return;
     }
index 9721c5e..6a25411 100644 (file)
@@ -635,9 +635,9 @@ static CNNLayerPtr _pwr(std::string name, Precision prc, SizeVector dims, float
     res->power = 1.0;
     res->scale = scale;
     res->offset = shift;
-    res->params["power"] = std::to_string(res->power);
-    res->params["scale"] = std::to_string(res->scale);
-    res->params["shift"] = std::to_string(res->offset);
+    res->params["power"] = CNNLayer::ie_serialize_float(res->power);
+    res->params["scale"] = CNNLayer::ie_serialize_float(res->scale);
+    res->params["shift"] = CNNLayer::ie_serialize_float(res->offset);
 
     res->insData.resize(1);
     res->outData.resize(1);
@@ -747,8 +747,8 @@ static void _link_with_clip(CNNLayerPtr src, CNNLayerPtr dst, const float clip_v
         auto clip_prc = dst->precision;
         auto clip_shape = src->outData[src_port]->getTensorDesc().getDims();
         auto clip = _act(clip_name, clip_prc, clip_shape, "clamp");
-        clip->params["min"] = std::to_string(-clip_val);
-        clip->params["max"] = std::to_string(clip_val);
+        clip->params["min"] = CNNLayer::ie_serialize_float(-clip_val);
+        clip->params["max"] = CNNLayer::ie_serialize_float(clip_val);
 
         _link(src, clip, src_port, 0);
         _link(clip, dst, 0, dst_port);
@@ -1197,6 +1197,11 @@ std::vector<CNNLayerPtr> TopolSort(const TensorIterator::Body &net) {
     return TIBodySortTopologically(net);
 }
 
+void restore_net_consistency(ICNNNetwork &net) {
+    // At first all layers should be available via findByName() api.
+    // In other words all layers should be present in internal map<name, layer>
+    for (auto &l : TopolSort(net)) net.addLayer(l);
+}
 
 template <typename N, typename T>
 bool ApplyForAll(N &net, T action) {
@@ -1210,7 +1215,6 @@ bool ApplyForAll(N &net, T action) {
 }
 
 
-
 template <typename N, typename T, typename P>
 bool ApplyForAll_if(N &net, T action, P pred) {
     auto all_layers = TopolSort(net);
@@ -1224,14 +1228,19 @@ bool ApplyForAll_if(N &net, T action, P pred) {
 }
 
 bool CombineRNNSeq(ICNNNetwork &net) {
-    return ApplyForAll(net, convertToRNNSeq<ICNNNetwork>);
+    auto res = ApplyForAll(net, convertToRNNSeq<ICNNNetwork>);
+    restore_net_consistency(net);
+    return res;
 }
+
 bool CombineRNNSeq(TensorIterator::Body &net) {
     return ApplyForAll(net, convertToRNNSeq<TensorIterator::Body>);
 }
 
 bool UnrollTI(ICNNNetwork &net) {
-    return ApplyForAll(net, unrollTI);
+    auto res = ApplyForAll(net, unrollTI);
+    restore_net_consistency(net);
+    return res;
 }
 
 
@@ -1256,7 +1265,9 @@ bool UnrollRNN_if_impl(NET &net, const std::function<bool(const RNNCellBase&)> p
 }
 
 bool UnrollRNN_if(ICNNNetwork &net, const std::function<bool(const RNNCellBase&)> pred) {
-    return UnrollRNN_if_impl(net, pred);
+    auto res = UnrollRNN_if_impl(net, pred);
+    restore_net_consistency(net);
+    return res;
 }
 
 bool UnrollRNN_if(TensorIterator::Body &net, const std::function<bool(const RNNCellBase&)> pred) {
index db740be..bf15718 100644 (file)
@@ -121,11 +121,12 @@ void NetworkSerializer::serialize(
             }
         }
         if (!node->outData.empty()) {
-            pugi::xml_node input = layer.append_child("output");
+            pugi::xml_node output = layer.append_child("output");
             for (size_t oport = 0; oport < node->outData.size(); oport++) {
-                pugi::xml_node port = input.append_child("port");
+                pugi::xml_node port = output.append_child("port");
 
                 port.append_attribute("id").set_value(node->insData.size() + oport);
+                port.append_attribute("precision").set_value(node->outData[oport]->getPrecision().name());
 
                 for (const auto dim : node->outData[oport]->getDims()) {
                     port.append_child("dim").text().set(dim);
diff --git a/inference-engine/src/inference_engine/range_iterator.hpp b/inference-engine/src/inference_engine/range_iterator.hpp
deleted file mode 100644 (file)
index cf4578f..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-
-namespace InferenceEngine {
-
-/**
- * @Brief iterator for accesing standard c-style null terminated strings withing c++ algorithms
- * @tparam Char
- */
-template<typename Char>
-struct null_terminated_range_iterator : public std::iterator<std::forward_iterator_tag, Char> {
- public:
-    null_terminated_range_iterator() = delete;
-
-    // make a non-end iterator (well, unless you pass nullptr ;)
-    explicit null_terminated_range_iterator(Char *ptr) : ptr(ptr) {}
-
-    bool operator != (null_terminated_range_iterator const &that) const {
-        // iterators are equal if they point to the same location
-        return !(operator==(that));
-    }
-
-    bool operator == (null_terminated_range_iterator const &that) const {
-        // iterators are equal if they point to the same location
-        return ptr == that.ptr
-            // or if they are both end iterators
-            || (is_end() && that.is_end());
-    }
-
-    null_terminated_range_iterator<Char> &operator++() {
-        get_accessor()++;
-        return *this;
-    }
-
-    null_terminated_range_iterator<Char> &operator++(int) {
-        return this->operator++();
-    }
-
-    Char &operator*() {
-        return *get_accessor();
-    }
-
- protected:
-    Char *& get_accessor()  {
-        if (ptr == nullptr) {
-            throw std::logic_error("null_terminated_range_iterator dereference: pointer is zero");
-        }
-        return ptr;
-    }
-    bool is_end() const {
-        // end iterators can be created by the default ctor
-        return !ptr
-            // or by advancing until a null character
-            || !*ptr;
-    }
-
-    Char *ptr;
-};
-
-template<typename Char>
-struct null_terminated_range_iterator_end : public null_terminated_range_iterator<Char> {
- public:
-    // make an end iterator
-    null_terminated_range_iterator_end() :  null_terminated_range_iterator<Char>(nullptr) {
-        null_terminated_range_iterator<Char>::ptr = nullptr;
-    }
-};
-
-
-inline null_terminated_range_iterator<const char> null_terminated_string(const char *a) {
-    return null_terminated_range_iterator<const char>(a);
-}
-
-inline null_terminated_range_iterator<const char> null_terminated_string_end() {
-    return null_terminated_range_iterator_end<const char>();
-}
-
-}  // namespace InferenceEngine
index 2e9c33d..663bb80 100644 (file)
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "ie_built_in_impl.hpp"
+#include "precision_utils.h"
 #include <map>
 #include <memory>
 #include <string>
@@ -38,8 +39,31 @@ public:
             } else {
                 THROW_IE_EXCEPTION << "Second input must have allocated data";
             }
+        } else if (inBlobs[1]->getTensorDesc().getPrecision() == Precision::FP32) {
+            auto* buffer = inBlobs[1]->cbuffer().as<float*>();
+            if (buffer != nullptr) {
+                for (int i = 0; i < inBlobs[1]->size(); i++) {
+                    shapes.push_back(static_cast<int>(buffer[i]));
+                }
+            } else {
+                THROW_IE_EXCEPTION << "Second input must have allocated data";
+            }
+        } else if (inBlobs[1]->getTensorDesc().getPrecision() == Precision::FP16) {
+            auto* buffer = inBlobs[1]->cbuffer().as<uint16_t*>();
+            if (buffer != nullptr) {
+                for (int i = 0; i < inBlobs[1]->size(); i++) {
+                    shapes.push_back(static_cast<int>(PrecisionUtils::f16tof32(buffer[i])));
+                }
+            }
+        } else if (inBlobs[1]->getTensorDesc().getPrecision() == Precision::I64) {
+            auto *buffer = inBlobs[1]->cbuffer().as<int64_t *>();
+            if (buffer != nullptr) {
+                shapes.assign(buffer, buffer + inBlobs[1]->size());
+            } else {
+                THROW_IE_EXCEPTION << "Second input must have allocated data";
+            }
         } else {
-            THROW_IE_EXCEPTION << "Second input must have I32 precision";
+            THROW_IE_EXCEPTION << "Second input must have I32 or FP32 or FP16 precision";
         }
 
         outShapes = {shapes};
index 744093a..1b731e3 100644 (file)
@@ -39,6 +39,7 @@
 #include "ie_shuffle_channels_shape_infer.hpp"
 #include "ie_depth_to_space_shape_infer.hpp"
 #include "ie_space_to_depth_shape_infer.hpp"
+#include "ie_sparse_fill_empty_rows_shape_infer.hpp"
 #include "ie_reverse_sequence_shape_infer.hpp"
 #include "ie_one_hot_shape_infer.hpp"
 #include "ie_shape_shape_infer.hpp"
@@ -57,6 +58,9 @@
 #include "ie_reduce_shape_infer.hpp"
 #include "ie_gather_tree_shape_infer.hpp"
 #include "ie_topk_shape_infer.hpp"
+#include "ie_unique_shape_infer.hpp"
+#include "ie_scatter_shape_infer.hpp"
+#include "ie_non_max_suppression_shape_infer.hpp"
 #include <algorithm>
 #include <memory>
 #include <string>
@@ -121,6 +125,7 @@ REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, LogSoftMax);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, LRN);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Norm);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Normalize);
+REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Convert);
 // FIXME: Really Copy??? New MO doesn't generate this layer
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Copy);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Power);
@@ -178,6 +183,7 @@ REG_SHAPE_INFER_FOR_TYPE(StridedSliceShapeProp, StridedSlice);
 REG_SHAPE_INFER_FOR_TYPE(ShuffleChannelsShapeProp, ShuffleChannels);
 REG_SHAPE_INFER_FOR_TYPE(DepthToSpaceShapeProp, DepthToSpace);
 REG_SHAPE_INFER_FOR_TYPE(SpaceToDepthShapeProp, SpaceToDepth);
+REG_SHAPE_INFER_FOR_TYPE(SparseFillEmptyRowsShapeProp, SparseFillEmptyRows);
 REG_SHAPE_INFER_FOR_TYPE(ReverseSequenceShapeProp, ReverseSequence);
 REG_SHAPE_INFER_FOR_TYPE(SelectShapeProp, Select);
 REG_SHAPE_INFER_FOR_TYPE(SqueezeShapeProp, Squeeze);
@@ -203,6 +209,7 @@ REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Erf);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Floor);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, HardSigmoid);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Log);
+REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Exp);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Neg);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Reciprocal);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Selu);
@@ -226,6 +233,9 @@ REG_SHAPE_INFER_FOR_TYPE(ReduceShapeProp, ReduceSum);
 REG_SHAPE_INFER_FOR_TYPE(ReduceShapeProp, ReduceSumSquare);
 REG_SHAPE_INFER_FOR_TYPE(GatherTreeShapeProp, GatherTree);
 REG_SHAPE_INFER_FOR_TYPE(TopKShapeProp, TopK);
+REG_SHAPE_INFER_FOR_TYPE(UniqueShapeProp, Unique);
+REG_SHAPE_INFER_FOR_TYPE(NMSShapeProp, NonMaxSuppression);
+REG_SHAPE_INFER_FOR_TYPE(ScatterShapeProp, Scatter);
 
 }  // namespace ShapeInfer
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_non_max_suppression_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_non_max_suppression_shape_infer.hpp
new file mode 100644 (file)
index 0000000..3b62e2b
--- /dev/null
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for NonMaxSuppression layer
+ */
+class NMSShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit NMSShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        NonMaxSuppressionLayer nmsLayer(lp);
+        nmsLayer.params = params;
+        nmsLayer.type = _type;
+        validate(&nmsLayer, inBlobs, params, blobs);
+
+        outShapes.push_back({inShapes[1][0] * inShapes[1][1] * inShapes[1][2], 3});
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
index ee40144..a2662cd 100644 (file)
@@ -33,13 +33,33 @@ public:
         validate(&cnnLayer, inBlobs, params, blobs);
         SizeVector outShape;
         if (inBlobs.size() == 2) {
-            auto* buffer = inBlobs[1]->cbuffer().as<float*>();
-            if (buffer != nullptr) {
-                for (int i = 0; i < inBlobs[1]->size(); i++) {
-                    outShape.push_back(static_cast<unsigned long>(buffer[i]));
+            switch (inBlobs[1]->getTensorDesc().getPrecision()) {
+                case Precision::FP32: {
+                    auto *buffer = inBlobs[1]->cbuffer().as<float *>();
+
+                    if (buffer != nullptr) {
+                        for (int i = 0; i < inBlobs[1]->size(); i++) {
+                            outShape.push_back(static_cast<unsigned long>(buffer[i]));
+                        }
+                    } else {
+                        THROW_IE_EXCEPTION << "Second input must have allocated data";
+                    }
+                    break;
+                }
+                case Precision::I32: {
+                    auto *buffer = inBlobs[1]->cbuffer().as<int32_t *>();
+
+                    if (buffer != nullptr) {
+                        for (int i = 0; i < inBlobs[1]->size(); i++) {
+                            outShape.push_back(static_cast<unsigned long>(buffer[i]));
+                        }
+                    } else {
+                        THROW_IE_EXCEPTION << "Second input must have allocated data";
+                    }
+                    break;
                 }
-            } else {
-                THROW_IE_EXCEPTION << "Second input must have allocated data";
+                default:
+                    THROW_IE_EXCEPTION << "Unsupported second input precision";
             }
         } else {
             auto scale = cnnLayer.GetParamAsFloat("factor");
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_scatter_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_scatter_shape_infer.hpp
new file mode 100644 (file)
index 0000000..2d9efcb
--- /dev/null
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Scatter layer
+ */
+class ScatterShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit ScatterShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        ScatterLayer scatterLayer(lp);
+        scatterLayer.params = params;
+        scatterLayer.type = _type;
+        validate(&scatterLayer, inBlobs, params, blobs);
+
+        outShapes = {inShapes[0]};
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_sparse_fill_empty_rows_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_sparse_fill_empty_rows_shape_infer.hpp
new file mode 100644 (file)
index 0000000..353529a
--- /dev/null
@@ -0,0 +1,33 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for SparseFillEmptyRows layer
+ */
+class SparseFillEmptyRowsShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit SparseFillEmptyRowsShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        THROW_IE_EXCEPTION << "SparseFillEmptyRows is not re-shapeable layer.";
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
index 8846bcb..bf562cf 100644 (file)
@@ -54,7 +54,7 @@ public:
             THROW_IE_EXCEPTION << " Incorrect input parameters dimensions and axis number!";
 
         int *src_k = inBlobs[TOPK_K]->cbuffer().as<int *>();
-        if (src_k != nullptr)
+        if (src_k == nullptr)
             THROW_IE_EXCEPTION << " Only const input for 'k' is supported!";
 
         src_k += inBlobs[TOPK_K]->getTensorDesc().getBlockingDesc().getOffsetPadding();
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_unique_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_unique_shape_infer.hpp
new file mode 100644 (file)
index 0000000..06fe26a
--- /dev/null
@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Unique layer
+ */
+class UniqueShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit UniqueShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+        const std::map<std::string, std::string>& params,
+        const std::map<std::string, Blob::Ptr>& blobs,
+        std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        UniqueLayer unique_layer(lp);
+        unique_layer.params = params;
+        unique_layer.type = _type;
+        validate(&unique_layer, inBlobs, params, blobs);
+
+        // reshape available outputs
+        size_t num_output_edges = unique_layer.outData.size();
+        outShapes.resize(num_output_edges);
+        for (size_t i = 0; i < num_output_edges; i++) {
+            outShapes[i].resize(1);
+            outShapes[i][0] = inShapes[0][0];
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
index 076cb00..a344a71 100644 (file)
@@ -74,6 +74,10 @@ private:
                      const SizeVector &idx_dims) {
         T* idx_data = inBlobs[UNSQUEEZE_INDEXES]->cbuffer().as<T*>() +
                             inBlobs[UNSQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        if (!idx_data) {
+            outShape = data_dims;
+            return;
+        }
         size_t max = data_dims.size();
         for (size_t i = 0; i < idx_dims[0]; i++) {
             auto axis = static_cast<size_t>(castToInt32(idx_data[i]));
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/broadcast_offset.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/broadcast_offset.hpp
new file mode 100644 (file)
index 0000000..a085c7d
--- /dev/null
@@ -0,0 +1,71 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_precision.hpp>
+#include <precision_utils.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+class BroadcastOffset {
+        SizeVector dims;
+        SizeVector offset_v;
+
+        SizeVector getDims(const SizeVector& originDims, const SizeVector& outputDims) {
+            SizeVector d(outputDims.size(), 1);
+            for (int i = 0; i < originDims.size(); i++) {
+                d[d.size() - 1 - i] = originDims[originDims.size() - 1 - i];
+            }
+            return d;
+        }
+
+        SizeVector getOffset(const SizeVector& originDims, const SizeVector& outDims) {
+            SizeVector o(originDims.size());
+            if (originDims.size() != outDims.size())
+                THROW_IE_EXCEPTION << "Cannot calculate offsets! Incorrect patameters for eltwise broadcast!";
+            int k = 1;
+            for (int i = originDims.size() - 1; i >= 0; i--) {
+                o[i] = (originDims[i] == outDims[i]) ? k : 0;
+                k *= originDims[i];
+            }
+            return o;
+        }
+
+    public:
+        BroadcastOffset(const SizeVector& originDims, const SizeVector& outputDims) {
+            dims = getDims(originDims, outputDims);
+            offset_v = getOffset(dims, outputDims);
+        }
+
+        size_t offset(const SizeVector& v) const {
+            size_t off = 0;
+            if (v.size() != offset_v.size())
+                THROW_IE_EXCEPTION << "Cannot calculate offsets! Incorrect patameters for eltwise broadcast!";
+            for (size_t i = 0; i < v.size(); i++) {
+                off += v[i] * offset_v[i];
+            }
+            return off;
+        }
+
+        SizeVector offset_dims(size_t l) const {
+            size_t n_dims = dims.size();
+            SizeVector pos(n_dims);
+            for (int rd = 1; rd <= n_dims; ++rd) {
+                const size_t d = n_dims - rd;
+                const size_t cur_dim = dims[d];
+                pos[d] = l % cur_dim;
+                l /= cur_dim;
+            }
+            return pos;
+        }
+};
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
\ No newline at end of file
index 8435ba6..7116098 100644 (file)
 #include <string>
 #include <vector>
 #include <ie_layers.h>
+#include <ie_precision.hpp>
+#include <precision_utils.h>
+#include "ie_const_infer_impl.hpp"
 
 namespace InferenceEngine {
 namespace ShapeInfer {
 
 /**
  *@brief Implementation of Const inference for TBD layer
+ *
+ *   Table of output data type value with given input parameters
+ *
+ *
+ *              U8       I32        I64        FP16        FP32
+ *     =============================================================
+ *     U8   ==  U8       I32        I64        FP16        FP32
+ *          ==
+ *     I32  ==  I32      I32        I64        FP32        FP32
+ *          ==
+ *     I64  ==  I64      I64        I64        FP32        FP32
+ *          ==
+ *     FP16 ==  FP16     FP32       FP32       FP16        FP32
+ *          ==
+ *     FP32 ==  FP32     FP32       FP32       FP32        FP32
+ *
+ *     There is a special case with FP16 precision. Convert input data to FP32 and add. After that
+ *     convert output data to FP16, if both of input parameters have FP16 precision or one - FP16 and another - U8.
  */
+
 class AddConstInfer : public ConstInferImpl {
 public:
     explicit AddConstInfer(const std::string& type) : ConstInferImpl(type) {}
 
+    struct fp16tofp32{
+        inline float operator()(ie_fp16 value){
+            return static_cast<float>(PrecisionUtils::f16tof32(value));
+        }
+    };
+
+    struct fp32tofp16{
+        inline ie_fp16 operator()(float value){
+            return static_cast<float>(PrecisionUtils::f32tof16(value));
+        }
+    };
+
+    template<typename dataType>
+    struct noConversion{
+        inline dataType operator()(dataType value){
+            return value;
+        }
+    };
+
+    template <typename inDatatype1, typename inDatatype2, typename outDatatype, class ConversionInData1, class ConversionInData2, class ConversionOutData>
+    void add(const std::vector<Blob::CPtr>& inData,
+             const std::map<std::string, std::string>& params,
+             const std::map<std::string, Blob::Ptr>& blobs,
+             std::vector<Blob::Ptr>& outData) {
+        auto *firstBlobBuffer = inData[0]->cbuffer().as<inDatatype1*>();
+        auto *secondBlobBuffer = inData[1]->cbuffer().as<inDatatype2 *>();
+
+        if (!firstBlobBuffer || !secondBlobBuffer) {
+            THROW_IE_EXCEPTION << "empty input data";
+        }
+
+        auto outBlob = *outData.begin();
+        auto *outBuffer = outBlob->buffer().as<outDatatype *>();
+        if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
+
+        BroadcastOffset outOff(outBlob->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+        BroadcastOffset inOff1(inData[0]->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+        BroadcastOffset inOff2(inData[1]->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+
+        for (size_t i = 0; i < outBlob->size(); i++) {
+            SizeVector offsetDims = outOff.offset_dims(i);
+            outBuffer[outOff.offset(offsetDims)] = ConversionOutData()(ConversionInData1()(firstBlobBuffer[inOff1.offset(offsetDims)]) +
+                                                   ConversionInData2()(secondBlobBuffer[inOff2.offset(offsetDims)]));
+        }
+    }
+
     void inferImpl(const std::vector<Blob::CPtr>& inData,
                    const std::map<std::string, std::string>& params,
                    const std::map<std::string, Blob::Ptr>& blobs,
@@ -28,20 +96,126 @@ public:
         size_t numInputs = inData.size();
         if (inData.size() != 2)
             THROW_IE_EXCEPTION << "Unsupported number of inputs: " << numInputs << ". 2 inputs is supported";
-        auto* firstBlobBuffer = inData[0]->cbuffer().as<float*>();
-        auto* secondBlobBuffer = inData[1]->cbuffer().as<float*>();
 
-        if (!firstBlobBuffer || !secondBlobBuffer) {
-            THROW_IE_EXCEPTION << "empty input data";
-        }
-        auto outBlob = *outData.begin();
-        auto* outBuffer = outBlob->buffer().as<float*>();
-        if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
-        if (inData[0]->size() != inData[1]->size()) {
-            THROW_IE_EXCEPTION << "inputs with different shapes are not supported";
-        }
-        for (int i = 0; i < outBlob->size(); i++) {
-            outBuffer[i] = firstBlobBuffer[i] + secondBlobBuffer[i];
+        auto compare = getPrecisionMask(inData[0]->getTensorDesc().getPrecision(),
+                                        inData[1]->getTensorDesc().getPrecision(),
+                                        outData[0]->getTensorDesc().getPrecision());
+
+        switch (compare) {
+            case getPrecisionMask(Precision::U8, Precision::U8, Precision::U8):
+                add<uint8_t, uint8_t, uint8_t, noConversion<uint8_t>, noConversion<uint8_t>,
+                        noConversion<uint8_t>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::U8, Precision::I32, Precision::I32):
+                add<uint8_t, int, int, noConversion<uint8_t>, noConversion<int>,
+                        noConversion<int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::U8, Precision::I64, Precision::I64):
+                add<uint8_t, long long int, long long int, noConversion<uint8_t>, noConversion<long long int>,
+                        noConversion<long long int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::U8, Precision::FP16, Precision::FP16):
+                add<uint8_t, ie_fp16, ie_fp16, noConversion<uint8_t>, fp16tofp32,
+                        fp32tofp16>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::U8, Precision::FP32, Precision::FP32):
+                add<uint8_t, float, float, noConversion<uint8_t>, noConversion<float>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+
+            case getPrecisionMask(Precision::I32, Precision::U8, Precision::I32):
+                add<int, uint8_t, int, noConversion<int>, noConversion<uint8_t>,
+                        noConversion<int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I32, Precision::I32):
+                add<int, int, int, noConversion<int>, noConversion<int>,
+                        noConversion<int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I64, Precision::I64):
+                add<int, long long int, long long int, noConversion<int>, noConversion<long long int>,
+                        noConversion<long long int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::FP16, Precision::FP32):
+                add<int, ie_fp16, float, noConversion<int>, fp16tofp32,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::FP32, Precision::FP32):
+                add<int, float, float, noConversion<int>, noConversion<float>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+
+            case getPrecisionMask(Precision::I64, Precision::U8, Precision::I64):
+                add<long long int, uint8_t, long long int, noConversion<long long int>, noConversion<uint8_t>,
+                        noConversion<long long int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I64, Precision::I32, Precision::I64):
+                add<long long int, int, long long int, noConversion<long long int>, noConversion<int>,
+                        noConversion<long long int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I64, Precision::I64, Precision::I64):
+                add<long long int, long long int, long long int, noConversion<long long int>, noConversion<long long int>,
+                        noConversion<long long int>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I64, Precision::FP16, Precision::FP32):
+                add<long long int, ie_fp16, float, noConversion<long long int>, fp16tofp32,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I64, Precision::FP32, Precision::FP32):
+                add<long long int, float, float, noConversion<long long int>, noConversion<float>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+
+            case getPrecisionMask(Precision::FP16, Precision::U8, Precision::FP16):
+                add<ie_fp16, uint8_t, ie_fp16, fp16tofp32, noConversion<uint8_t>,
+                        fp32tofp16>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP16, Precision::I32, Precision::FP32):
+                add<ie_fp16, int, float, fp16tofp32, noConversion<int>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP16, Precision::I64, Precision::FP32):
+                add<ie_fp16, long long int, float, fp16tofp32, noConversion<long long int>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP16, Precision::FP16, Precision::FP16):
+                add<ie_fp16, ie_fp16, ie_fp16, fp16tofp32, fp16tofp32,
+                        fp32tofp16>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP16, Precision::FP32, Precision::FP16):
+                add<ie_fp16, float , ie_fp16, fp16tofp32, noConversion<float>,
+                        fp32tofp16>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP16, Precision::FP32, Precision::FP32):
+                add<ie_fp16, float, float, fp16tofp32, noConversion<float>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+
+            case getPrecisionMask(Precision::FP32, Precision::U8, Precision::FP32):
+                add<float, uint8_t, float, noConversion<float>, noConversion<uint8_t>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::I32, Precision::FP32):
+                add<float, int, float, noConversion<float>, noConversion<int>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::I64, Precision::FP32):
+                add<float, long long int, float, noConversion<float>, noConversion<long long int>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::FP16, Precision::FP32):
+                add<float, ie_fp16, float, noConversion<float>, fp16tofp32,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::FP16, Precision::FP16):
+                add<float, ie_fp16, ie_fp16, noConversion<float>, fp16tofp32,
+                        fp32tofp16>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::FP32, Precision::FP32):
+                add<float, float, float, noConversion<float>, noConversion<float>,
+                        noConversion<float>>(inData, params, blobs, outData);
+                break;
+            default:
+                THROW_IE_EXCEPTION << "Unsupported precision!";
         }
     }
 };
index fc4f39d..8951469 100644 (file)
@@ -14,7 +14,8 @@
 #include <ie_algorithm.hpp>
 #include "ie_const_infer_impl.hpp"
 #include "ie_parallel.hpp"
-#include "../../precision_utils.h"
+#include "precision_utils.h"
+#include "ie_memcpy.h"
 
 namespace InferenceEngine {
 namespace ShapeInfer {
@@ -47,23 +48,14 @@ public:
         if (inData[BROADCAST_SHAPE]->getTensorDesc().getDims().size() > 1)
             THROW_IE_EXCEPTION << "Shape vector should be 1 dimension";
 
-        if (inData[BROADCAST_SHAPE]->getTensorDesc().getPrecision() != Precision::I32)
-            THROW_IE_EXCEPTION << "Shape vector should be I32!";
-
-        if (!(inData[BROADCAST_INPUT]->getTensorDesc().getPrecision() == Precision::I32 &&
-              outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
-            !(inData[BROADCAST_INPUT]->getTensorDesc().getPrecision() == Precision::FP32 &&
-              outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
-            THROW_IE_EXCEPTION
-                    << "Input and output tensors should have same precision and only FP32 and I32 are supported!";
-        }
-
-        const int32_t *shape_dims = inData[BROADCAST_SHAPE]->cbuffer().as<int32_t *>() +
-                                    inData[BROADCAST_SHAPE]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        size_t data_size = inData[BROADCAST_INPUT]->getTensorDesc().getPrecision().size();
         size_t shape_size = (inData[BROADCAST_SHAPE]->getTensorDesc().getDims())[0];
         SizeVector dst_dims = outData[0]->getTensorDesc().getDims();
         SizeVector src_dims = inData[BROADCAST_INPUT]->getTensorDesc().getDims();
 
+        if (!src_dims.size())
+            src_dims = SizeVector(1, 1);
+
         if (dst_dims.size() != shape_size) {
             THROW_IE_EXCEPTION << "Output tensor dimension mismatch";
         }
@@ -72,26 +64,15 @@ public:
             THROW_IE_EXCEPTION << "Output tensor dimension is smaller then input tensor dimension";
         }
 
-        size_t i;
-        for (i = 0; i < dst_dims.size(); i++) {
-            if (static_cast<int>(dst_dims[i]) != shape_dims[i]) {
-                THROW_IE_EXCEPTION << "Output tensor dimension size mismatch";
-            }
-        }
-
-        size_t prefix_size = dst_dims.size() - src_dims.size();
-        for (i = 0; i < src_dims.size(); i++) {
-            if (src_dims[i] != 1 && static_cast<int>(src_dims[i]) != shape_dims[i + prefix_size]) {
-                THROW_IE_EXCEPTION
-                        << "In/Output corresponding dimension must have the same value, or Input dimension is equal to 1";
-            }
-        }
-
         InferenceEngine::SizeVector dstStrides = outData[0]->getTensorDesc().getBlockingDesc().getStrides();
         InferenceEngine::SizeVector srcStrides = inData[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getStrides();
         InferenceEngine::SizeVector src_aligned(dst_dims.size());
         InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
-        for (i = 0; i < dst_dims.size(); i++) {
+        if (!srcStrides.size())
+            srcStrides = SizeVector(1, 1);
+
+        size_t prefix_size = dst_dims.size() - src_dims.size();
+        for (size_t i = 0; i < dst_dims.size(); i++) {
             if (i < prefix_size) {
                 src_aligned[i] = 1;
                 srcStrides_aligned[i] = srcStrides[0];
@@ -102,67 +83,31 @@ public:
         }
 
         size_t work_amount_dst = dstStrides[0] * dst_dims[0];
-
-        switch (outData[0]->getTensorDesc().getPrecision()) {
-            case Precision::FP32: {
-                const float *src_data = inData[BROADCAST_INPUT]->cbuffer().as<const float *>() +
-                                        inData[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-                float *dst_data = outData[0]->cbuffer().as<float *>() +
-                                  outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-                parallel_nt(0, [&](const int ithr, const int nthr) {
-                    size_t i, src_idx, start = 0, end = 0;
-                    SizeVector counters(dst_dims.size(), 0);
-                    splitter(work_amount_dst, nthr, ithr, start, end);
-                    for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
-                        counters[j] = i % dst_dims[j];
-                        i /= dst_dims[j];
-                    }
-                    for (size_t iwork = start; iwork < end; ++iwork) {
-                        for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-                            src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
-
-                        dst_data[iwork] = src_data[src_idx];
-
-                        for (int j = dst_dims.size() - 1; j >= 0; j--) {
-                            counters[j] = (counters[j] + 1) % dst_dims[j];
-                            if (counters[j] != 0) break;
-                        }
-                    }
-                });
+        const uint8_t *src_data = inData[BROADCAST_INPUT]->cbuffer().as<const uint8_t *>() +
+                                  inData[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        uint8_t* dst_data = outData[0]->cbuffer().as<uint8_t *>() +
+                            outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            size_t i, src_idx, start = 0, end = 0;
+            SizeVector counters(dst_dims.size(), 0);
+            splitter(work_amount_dst, nthr, ithr, start, end);
+            for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
+                counters[j] = i % dst_dims[j];
+                i /= dst_dims[j];
             }
-            break;
-            case Precision::I32: {
-                const int32_t *src_data = inData[BROADCAST_INPUT]->cbuffer().as<const int32_t *>() +
-                                          inData[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-                int32_t *dst_data = outData[0]->cbuffer().as<int32_t *>() +
-                                    outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-                parallel_nt(0, [&](const int ithr, const int nthr) {
-                    size_t i, src_idx, start = 0, end = 0;
-                    SizeVector counters(dst_dims.size(), 0);
-                    splitter(work_amount_dst, nthr, ithr, start, end);
-                    for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
-                        counters[j] = i % dst_dims[j];
-                        i /= dst_dims[j];
-                    }
-                    for (size_t iwork = start; iwork < end; ++iwork) {
-                        for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-                            src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
-
-                        dst_data[iwork] = src_data[src_idx];
-
-                        for (int j = dst_dims.size() - 1; j >= 0; j--) {
-                            counters[j] = (counters[j] + 1) % dst_dims[j];
-                            if (counters[j] != 0) break;
-                        }
-                    }
-                });
+            for (size_t iwork = start * data_size; iwork < end * data_size; iwork += data_size) {
+                for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
+                    src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
+
+                ie_memcpy(&dst_data[iwork], data_size, &src_data[src_idx * data_size], data_size);
+
+                for (int j = dst_dims.size() - 1; j >= 0; j--) {
+                    counters[j] = (counters[j] + 1) % dst_dims[j];
+                    if (counters[j] != 0) break;
+                }
             }
-            break;
-            default:
-                THROW_IE_EXCEPTION << "Incorrect output precision. Only FP32 and I32 are supported!";
-        }
+        });
     }
 };
 
index 904c16b..10945ef 100644 (file)
@@ -35,7 +35,7 @@ public:
 
         auto outBlob = *outData.begin();
         SizeVector outShape = outBlob->getTensorDesc().getDims();
-        auto* outBuffer = outBlob->buffer().as<float*>();
+        auto* outBuffer = outBlob->buffer().as<int8_t *>();
 
         size_t outerSize = 1;
         for (int i = 0; i < layer._axis; i++)
@@ -44,11 +44,16 @@ public:
         size_t outIdx = 0;
         for (size_t osIdx = 0; osIdx < outerSize; osIdx++) {
             for (auto& inBlob : inData) {
-                const auto* inBuffer = inBlob->cbuffer().as<float*>();
+                if (inBlob->getTensorDesc().getPrecision() != outBlob->getTensorDesc().getPrecision())
+                    THROW_IE_EXCEPTION << "Unsupported concat layer with different precisions! Out precision: " +
+                    std::string(outBlob->getTensorDesc().getPrecision().name());
+                const auto* inBuffer = inBlob->cbuffer().as<int8_t*>();
                 size_t innerSize = inBlob->size() / outerSize;
 
                 for (size_t j = 0; j < innerSize; j++, outIdx++) {
-                    outBuffer[outIdx] = inBuffer[osIdx * innerSize + j];
+                    memcpy(outBuffer + outIdx*outBlob->element_size(),
+                           inBuffer + (osIdx * innerSize + j)*inBlob->element_size(),
+                           inBlob->element_size());
                 }
             }
         }
index d874ba3..4f5d3c3 100644 (file)
 #include "ie_gather_const_infer.hpp"
 #include "ie_split_const_infer.hpp"
 #include "ie_concat_const_infer.hpp"
+#include "ie_convert_const_infer.hpp"
 #include "ie_in_place_const_infer.hpp"
 #include "ie_strided_slice_const_infer.hpp"
 #include "ie_fill_const_infer.hpp"
 #include "ie_range_const_infer.hpp"
 #include "ie_broadcast_const_infer.hpp"
+#include "ie_permute_const_infer.hpp"
 #include "ie_onehot_const_infer.hpp"
+#include "ie_reduce_const_infer.hpp"
 #include <list>
 #include <memory>
 #include <string>
@@ -81,6 +84,20 @@ REG_CONST_INFER_FOR_TYPE(FillConstInfer, Fill);
 REG_CONST_INFER_FOR_TYPE(RangeConstInfer, Range);
 REG_CONST_INFER_FOR_TYPE(BroadcastConstInfer, Broadcast);
 REG_CONST_INFER_FOR_TYPE(OneHotConstInfer, OneHot);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceAnd);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceL1);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceL2);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceLogSum);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceLogSumExp);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceMax);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceMean);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceMin);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceOr);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceProd);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceSum);
+REG_CONST_INFER_FOR_TYPE(ReduceConstInfer, ReduceSumSquare);
+REG_CONST_INFER_FOR_TYPE(PermuteConstInfer, Permute);
+REG_CONST_INFER_FOR_TYPE(ConvertConstInfer, Convert);
 
 }  // namespace ShapeInfer
 }  // namespace InferenceEngine
index 45883dd..304f0e8 100644 (file)
@@ -17,7 +17,8 @@ void ConstInferImpl::infer(const std::vector<Blob::CPtr>& inData,
     std::string errorPrefix = "Ref infer error for Layer with `" + _type + "` type: ";
     if (outData.empty()) THROW_IE_EXCEPTION << errorPrefix + "output data is empty";
     for (auto const& data : outData) {
-        if (data->buffer() == nullptr) THROW_IE_EXCEPTION << errorPrefix + "output data is not allocated";
+        if (data->buffer() == nullptr)
+            THROW_IE_EXCEPTION << errorPrefix + "output data is not allocated";
     }
     // TODO: check for direct (NCHW, NCH, NC) and FP32
     inferImpl(inData, params, blobs, outData);
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_convert_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_convert_const_infer.hpp
new file mode 100644 (file)
index 0000000..918a5cc
--- /dev/null
@@ -0,0 +1,91 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_precision.hpp"
+#include "ie_parallel.hpp"
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Tile layer
+ */
+class ConvertConstInfer : public ConstInferImpl {
+    template<typename src_d, typename dst_d>
+    void exec_cast(const Blob::CPtr& inData, Blob::Ptr& outData) {
+        const src_d *src_data = inData->cbuffer().as<src_d *>() +
+                                inData->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        dst_d* dst_data = outData->buffer().as<dst_d *>() +
+                          outData->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        if (inData->size() != outData->size())
+            THROW_IE_EXCEPTION << " Convert constant inference error: Input and output buffers have different sizes! Input buffer size = `" << inData->size()
+                               << "` output buffer size = `" << outData->size() << "`";
+        parallel_for(inData->size(), [&](size_t i) {
+            dst_data[i] = static_cast<dst_d>(src_data[i]);
+        });
+    }
+
+public:
+    explicit ConvertConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        ConcatLayer layer(lp);
+        layer.params = params;
+        _validator->parseParams(&layer);
+        if (inData.size() != 1)
+            THROW_IE_EXCEPTION << " Convert constant inference error: incorrect number of inputs! Expected 1, got " << inData.size();
+        if (outData.size() != 1)
+            THROW_IE_EXCEPTION << " Convert constant inference error: incorrect number of outputs! Expected 1, got " << outData.size();
+        if (layer.params["precision"] != outData[0]->getTensorDesc().getPrecision().name())
+            THROW_IE_EXCEPTION << " Convert constant inference error: layer `precision` parameter and actual output data precision mismatch! "
+                                  "`precision`=\"" << layer.params["precision"] << "\", " <<
+                               "`output_data_precision`=\"" << outData[0]->getTensorDesc().getPrecision() << "\"";
+
+        auto compare = getPrecisionMask(inData[0]->getTensorDesc().getPrecision(), outData[0]->getTensorDesc().getPrecision());
+        switch (compare) {
+            case getPrecisionMask(Precision::I32, Precision::I32):
+                exec_cast<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::I32>::value_type>(inData[0], outData[0]);
+                break;
+            case getPrecisionMask(Precision::I64, Precision::I64):
+                exec_cast<PrecisionTrait<Precision::I64>::value_type, PrecisionTrait<Precision::I64>::value_type>(inData[0], outData[0]);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::FP32):
+                exec_cast<PrecisionTrait<Precision::FP32>::value_type, PrecisionTrait<Precision::FP32>::value_type>(inData[0], outData[0]);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I64):
+                exec_cast<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::I64>::value_type>(inData[0], outData[0]);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::FP32):
+                exec_cast<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::FP32>::value_type>(inData[0], outData[0]);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::I32):
+                exec_cast<PrecisionTrait<Precision::FP32>::value_type, PrecisionTrait<Precision::I32>::value_type>(inData[0], outData[0]);
+                break;
+            case getPrecisionMask(Precision::FP32, Precision::I64):
+                exec_cast<PrecisionTrait<Precision::FP32>::value_type, PrecisionTrait<Precision::I64>::value_type>(inData[0], outData[0]);
+                break;
+            default:
+                THROW_IE_EXCEPTION << " Convert constant inference error: Unsupported precision configuration! " <<
+                                   " Input precision: " << inData[0]->getTensorDesc().getPrecision() << ", output precision: "
+                                   << outData[0]->getTensorDesc().getPrecision();
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
index b8e40c5..865abd3 100644 (file)
@@ -7,6 +7,7 @@
 #include "ie_div_const_infer.hpp"
 #include "ie_add_const_infer.hpp"
 #include "ie_mul_const_infer.hpp"
+#include "ie_pow_const_infer.hpp"
 #include <ie_blob.h>
 #include <map>
 #include <memory>
@@ -26,6 +27,7 @@ public:
         _sum = std::shared_ptr<ConstInferImpl>(new AddConstInfer(_type));
         _mul = std::shared_ptr<ConstInferImpl>(new MulConstInfer(_type));
         _div = std::shared_ptr<ConstInferImpl>(new DivConstInfer(_type));
+        _pow = std::shared_ptr<ConstInferImpl>(new PowConstInfer(_type));
     }
 
     void inferImpl(const std::vector<Blob::CPtr>& inData,
@@ -43,6 +45,8 @@ public:
             actual = _mul;
         else if (operation == "div")
             actual = _div;
+        else if (operation == "pow")
+            actual = _pow;
         else
             THROW_IE_EXCEPTION << "Unsupported eltwise operation type " << operation << ". "
                                   "IE cannot propagate constants through this layer.";
@@ -51,7 +55,7 @@ public:
     }
 
 private:
-    std::shared_ptr<ConstInferImpl> _mul, _div, _sum;
+    std::shared_ptr<ConstInferImpl> _mul, _div, _sum, _pow;
 };
 
 }  // namespace ShapeInfer
index fc88216..9c43eff 100644 (file)
@@ -14,7 +14,7 @@
 #include <ie_algorithm.hpp>
 #include "ie_const_infer_impl.hpp"
 #include "ie_parallel.hpp"
-#include "../../precision_utils.h"
+#include "precision_utils.h"
 
 namespace InferenceEngine {
 namespace ShapeInfer {
@@ -51,48 +51,31 @@ public:
         }
     };
 
-    template <typename index_t, typename data_t, class Conversion>
+    template <typename index_t, class Conversion>
     void gather(const Blob::CPtr& indexes, const Blob::CPtr& dictionary, Blob::Ptr output, const GatherParams& p) {
         size_t src_indexSize = indexes->size();
         const index_t *src_index = indexes->cbuffer().as<const index_t *>() + indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const data_t *src_dataDict = dictionary->cbuffer().as<const data_t *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        data_t *dst_data = output->cbuffer().as<data_t*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        if (p.axis == 0) {
-            parallel_for(src_indexSize, [&](size_t i) {
-                unsigned int idx = Conversion()(src_index[i]);
-
-                //  Index clipping
-                if (idx < p.indexRange) {
-                    //  Copying data to destination from Dictionary
-                    ie_memcpy(&dst_data[i * p.dataLength],
-                        output->byteSize() - (p.dataLength * i),
-                        &src_dataDict[p.dataLength * idx],
-                        sizeof(data_t) * p.dataLength);
-                } else {
-                    memset(&dst_data[i * p.dataLength], 0, sizeof(data_t) * p.dataLength);
+        const uint8_t *src_dataDict = dictionary->cbuffer().as<const uint8_t *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        uint8_t *dst_data = output->cbuffer().as<uint8_t*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        parallel_for(src_indexSize, [&](size_t i) {
+            unsigned int idx = Conversion()(src_index[i]);
+
+            //  Index clipping
+            if (idx < p.indexRange) {
+                //  Copying data to destination from Dictionary
+                for (size_t j = 0; j < p.numDictionaries; j++) {
+                    ie_memcpy(&dst_data[p.dataLength * (i + j * src_indexSize)],
+                        output->byteSize() - (p.dataLength * (i + j * src_indexSize)),
+                        &src_dataDict[p.dataLength * (idx + j * p.indexRange)],
+                        p.dataLength);
                 }
-            });
-        } else {
-            parallel_for(src_indexSize, [&](size_t i) {
-                unsigned int idx = Conversion()(src_index[i]);
-
-                //  Index clipping
-                if (idx < p.indexRange) {
-                    //  Copying data to destination from Dictionary
-                    for (size_t j = 0; j < p.numDictionaries; j++) {
-                        ie_memcpy(&dst_data[p.dataLength * (i + j * src_indexSize)],
-                            output->byteSize() - (p.dataLength * (i + j * src_indexSize)),
-                            &src_dataDict[p.dataLength * (idx + j * p.indexRange)],
-                            sizeof(data_t) * p.dataLength);
-                    }
-                } else {
-                    for (size_t j = 0; j < p.numDictionaries; j++) {
-                        memset(&dst_data[p.dataLength * (i + j * src_indexSize)], 0, sizeof(data_t) * p.dataLength);
-                    }
+            } else {
+                for (size_t j = 0; j < p.numDictionaries; j++) {
+                    memset(&dst_data[p.dataLength * (i + j * src_indexSize)], 0, p.dataLength);
                 }
-            });
-        }
+            }
+        });
     }
 
     void inferImpl(const std::vector<Blob::CPtr>& inData,
@@ -118,54 +101,45 @@ public:
 
         Precision inDataPrecision = inData[GATHER_DICTIONARY]->getTensorDesc().getPrecision();
         if (inDataPrecision != Precision::FP32 &&
-            inDataPrecision != Precision::FP16)
-            THROW_IE_EXCEPTION << " Incorrect input precision. Only FP32 or FP16 are supported!";
+            inDataPrecision != Precision::FP16 &&
+            inIdxPrecision != Precision::I32)
+            THROW_IE_EXCEPTION << " Incorrect input precision. Only FP32|FP16|I32 are supported!";
 
         //  Remove redundant dimensions
         const SizeVector& dictionary_dims = inData[GATHER_DICTIONARY]->getTensorDesc().getDims();
-        size_t actualAxis = 0;
-        SizeVector dims_actual;
-        for (size_t i = 0; i < dictionary_dims.size(); i++) {
-            if (dictionary_dims[i] > 1) {
-                for (size_t j = i; j < dictionary_dims.size(); j++)
-                    dims_actual.push_back(dictionary_dims[j]);
-                break;
-            }
-        }
-
-        if (dims_actual.size() == 0)
+        if (dictionary_dims.size() == 0)
             THROW_IE_EXCEPTION << " Incorrect input parameters dimension!";
 
         GatherParams p;
         p.axis = static_cast<int>(layer.GetParamAsInt("axis"));
         // Dictionary must be at least rank axis + 1
-        if (p.axis > 0 && dims_actual.size() < (1 + p.axis))
-            THROW_IE_EXCEPTION << " Incorrect input parameters dimensions and axis number!";
-        else if (p.axis < 0 && (static_cast<int>(dims_actual.size()) + p.axis) < 0)
+        if (!(-static_cast<int>(dictionary_dims.size()) <= p.axis && p.axis < static_cast<int>(dictionary_dims.size())))
             THROW_IE_EXCEPTION << " Incorrect input parameters dimensions and axis number!";
 
         if (p.axis < 0)
-            p.axis += dims_actual.size();
+            p.axis += dictionary_dims.size();
 
         //  Find number of dictionaries, index range and data length
         for (size_t i = 0; i < p.axis; i++)
-            p.numDictionaries *= dims_actual[i];
-        p.indexRange = dims_actual[p.axis];
-        for (size_t i = p.axis + 1; i < dims_actual.size(); i++)
-            p.dataLength *= dims_actual[i];
+            p.numDictionaries *= dictionary_dims[i];
+        p.indexRange = dictionary_dims[p.axis];
+        for (size_t i = p.axis + 1; i < dictionary_dims.size(); i++)
+            p.dataLength *= dictionary_dims[i];
 
         if (p.dataLength == 0)
             THROW_IE_EXCEPTION << " Incorrect input parameters dimension!";
 
+        p.dataLength *= inData[GATHER_DICTIONARY]->getTensorDesc().getPrecision().size();
+
         switch (inData[GATHER_INDEXES]->getTensorDesc().getPrecision()) {
         case Precision::FP32:
-            gather<float, float, f32toUi32>(inData[GATHER_INDEXES], inData[GATHER_DICTIONARY], outData[0], p);
+            gather<float, f32toUi32>(inData[GATHER_INDEXES], inData[GATHER_DICTIONARY], outData[0], p);
             break;
         case Precision::FP16:
-            gather<ie_fp16, ie_fp16, f16toUi32>(inData[GATHER_INDEXES], inData[GATHER_DICTIONARY], outData[0], p);
+            gather<ie_fp16, f16toUi32>(inData[GATHER_INDEXES], inData[GATHER_DICTIONARY], outData[0], p);
             break;
         case Precision::I32:
-            gather<int32_t, float, i32toUi32>(inData[GATHER_INDEXES], inData[GATHER_DICTIONARY], outData[0], p);
+            gather<int32_t, i32toUi32>(inData[GATHER_INDEXES], inData[GATHER_DICTIONARY], outData[0], p);
             break;
         default:
             THROW_IE_EXCEPTION << " Unsupported precision!";
index d9afcce..8cd4dfa 100644 (file)
 #include <string>
 #include <vector>
 #include <ie_layers.h>
+#include <ie_precision.hpp>
+#include <precision_utils.h>
+#include "ie_const_infer_impl.hpp"
+#include "broadcast_offset.hpp"
 
 namespace InferenceEngine {
 namespace ShapeInfer {
 
 /**
  *@brief Implementation of Const inference for TBD layer
+ *
+ * Table of output data type value with given input parameters
+ *
+ *
+ *              U8       I32        I64        FP16        FP32
+ *     =============================================================
+ *     U8   ==  U8       I32        I64        FP16        FP32
+ *          ==
+ *     I32  ==  I32      I32        I64        FP32        FP32
+ *          ==
+ *     I64  ==  I64      I64        I64        FP32        FP32
+ *          ==
+ *     FP16 ==  FP16     FP32       FP32       FP16        FP32
+ *          ==
+ *     FP32 ==  FP32     FP32       FP32       FP32        FP32
+ *
+ *     There is a special case with FP16 precision. Convert input data to FP32 and multiply. After that
+ *     convert output data to FP16, if both of input parameters have FP16 precision or one - FP16 and another - U8.
  */
+
 class MulConstInfer : public ConstInferImpl {
-public:
-    explicit MulConstInfer(const std::string& type) : ConstInferImpl(type) {}
-
-    void inferImpl(const std::vector<Blob::CPtr>& inData,
-                   const std::map<std::string, std::string>& params,
-                   const std::map<std::string, Blob::Ptr>& blobs,
-                   std::vector<Blob::Ptr>& outData) override {
-        size_t numInputs = inData.size();
-        if (inData.size() != 2)
-            THROW_IE_EXCEPTION << "Unsupported number of inputs: " << numInputs << ". 2 inputs is supported";
-        auto* firstBlobBuffer = inData[0]->cbuffer().as<float*>();
-        auto* secondBlobBuffer = inData[1]->cbuffer().as<float*>();
-
-        if (!firstBlobBuffer || !secondBlobBuffer) {
-            THROW_IE_EXCEPTION << "empty input data";
-        }
-        auto outBlob = *outData.begin();
-        auto* outBuffer = outBlob->buffer().as<float*>();
-        if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
-        if (inData[0]->size() != inData[1]->size()) {
-            THROW_IE_EXCEPTION << "inputs with different shapes are not supported";
+    public:
+        explicit MulConstInfer(const std::string &type) : ConstInferImpl(type) {}
+
+        struct fp16tofp32{
+            inline float operator()(ie_fp16 value){
+                return static_cast<float>(PrecisionUtils::f16tof32(value));
+            }
+        };
+
+        struct fp32tofp16{
+            inline ie_fp16 operator()(float value){
+                return static_cast<float>(PrecisionUtils::f32tof16(value));
+            }
+        };
+
+        template<typename dataType>
+        struct noConversion{
+            inline dataType operator()(dataType value){
+                return value;
+            }
+        };
+
+        template<typename inDatatype1, typename inDatatype2, typename  outDatatype, class ConversionInData1,
+                class ConversionInData2, class ConversionOutData>
+        void mul(const std::vector<Blob::CPtr> &inData,
+                 const std::map<std::string, std::string> &params,
+                 const std::map<std::string, Blob::Ptr> &blobs,
+                 std::vector<Blob::Ptr> &outData) {
+            auto* firstBlobBuffer = inData[0]->cbuffer().as<inDatatype1*>();
+            auto* secondBlobBuffer = inData[1]->cbuffer().as<inDatatype2*>();
+            if (!firstBlobBuffer || !secondBlobBuffer) {
+                THROW_IE_EXCEPTION << "empty input data";
+            }
+
+            auto outBlob = *outData.begin();
+            auto* outBuffer = outBlob->buffer().as<outDatatype *>();
+            if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
+
+            BroadcastOffset outOff(outBlob->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+            BroadcastOffset inOff1(inData[0]->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+            BroadcastOffset inOff2(inData[1]->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+
+            for (size_t i = 0; i < outBlob->size(); i++) {
+                SizeVector offsetDims = outOff.offset_dims(i);
+                outBuffer[outOff.offset(offsetDims)] = ConversionOutData()(ConversionInData1()(firstBlobBuffer[inOff1.offset(offsetDims)]) *
+                        ConversionInData2()(secondBlobBuffer[inOff2.offset(offsetDims)]));
+            }
         }
-        for (int i = 0; i < outBlob->size(); i++) {
-            outBuffer[i] = firstBlobBuffer[i] * secondBlobBuffer[i];
+
+        void inferImpl(const std::vector<Blob::CPtr> &inData,
+                       const std::map<std::string, std::string> &params,
+                       const std::map<std::string, Blob::Ptr> &blobs,
+                       std::vector<Blob::Ptr> &outData) override {
+            size_t numInputs = inData.size();
+            if (inData.size() != 2)
+                THROW_IE_EXCEPTION << "Unsupported number of inputs: " << numInputs << ". 2 inputs is supported";
+
+            auto compare = getPrecisionMask(inData[0]->getTensorDesc().getPrecision(),
+                                            inData[1]->getTensorDesc().getPrecision(),
+                                            outData[0]->getTensorDesc().getPrecision());
+
+            switch (compare) {
+                case getPrecisionMask(Precision::U8, Precision::U8, Precision::U8):
+                    mul<uint8_t, uint8_t, uint8_t, noConversion<uint8_t>, noConversion<uint8_t>,
+                            noConversion<uint8_t>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::U8, Precision::I32, Precision::I32):
+                    mul<uint8_t, int, int, noConversion<uint8_t>, noConversion<int>,
+                            noConversion<int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::U8, Precision::I64, Precision::I64):
+                    mul<uint8_t, long long int, long long int, noConversion<uint8_t>, noConversion<long long int>,
+                            noConversion<long long int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::U8, Precision::FP16, Precision::FP16):
+                    mul<uint8_t, ie_fp16, ie_fp16, noConversion<uint8_t>, fp16tofp32,
+                            fp32tofp16>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::U8, Precision::FP32, Precision::FP32):
+                    mul<uint8_t, float, float, noConversion<uint8_t>, noConversion<float>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+
+                case getPrecisionMask(Precision::I32, Precision::U8, Precision::I32):
+                    mul<int, uint8_t, int, noConversion<int>, noConversion<uint8_t>,
+                            noConversion<int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I32, Precision::I32, Precision::I32):
+                    mul<int, int, int, noConversion<int>, noConversion<int>,
+                            noConversion<int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I32, Precision::I64, Precision::I64):
+                    mul<int, long long int, long long int, noConversion<int>, noConversion<long long int>,
+                            noConversion<long long int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I32, Precision::FP16, Precision::FP32):
+                    mul<int, ie_fp16, float, noConversion<int>, fp16tofp32,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I32, Precision::FP32, Precision::FP32):
+                    mul<int, float, float, noConversion<int>, noConversion<float>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+
+                case getPrecisionMask(Precision::I64, Precision::U8, Precision::I64):
+                    mul<long long int, uint8_t, long long int, noConversion<long long int>, noConversion<uint8_t>,
+                            noConversion<long long int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I64, Precision::I32, Precision::I64):
+                    mul<long long int, int, long long int, noConversion<long long int>, noConversion<int>,
+                            noConversion<long long int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I64, Precision::I64, Precision::I64):
+                    mul<long long int, long long int, long long int, noConversion<long long int>, noConversion<long long int>,
+                            noConversion<long long int>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I64, Precision::FP16, Precision::FP32):
+                    mul<long long int, ie_fp16, float, noConversion<long long int>, fp16tofp32,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::I64, Precision::FP32, Precision::FP32):
+                    mul<long long int, float, float, noConversion<long long int>, noConversion<float>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+
+                case getPrecisionMask(Precision::FP16, Precision::U8, Precision::FP16):
+                    mul<ie_fp16, uint8_t, ie_fp16, fp16tofp32, noConversion<uint8_t>,
+                            fp32tofp16>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP16, Precision::I32, Precision::FP32):
+                    mul<ie_fp16, int, float, fp16tofp32, noConversion<int>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP16, Precision::I64, Precision::FP32):
+                    mul<ie_fp16, long long int, float, fp16tofp32, noConversion<long long int>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP16, Precision::FP16, Precision::FP16):
+                    mul<ie_fp16, ie_fp16, ie_fp16, fp16tofp32, fp16tofp32,
+                            fp32tofp16>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP16, Precision::FP32, Precision::FP32):
+                    mul<ie_fp16, float, float, fp16tofp32, noConversion<float>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP16, Precision::FP32, Precision::FP16):
+                    mul<ie_fp16, float, ie_fp16, fp16tofp32, noConversion<float>,
+                            fp32tofp16>(inData, params, blobs, outData);
+                    break;
+
+                case getPrecisionMask(Precision::FP32, Precision::U8, Precision::FP32):
+                    mul<float, uint8_t, float, noConversion<float>, noConversion<uint8_t>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP32, Precision::I32, Precision::FP32):
+                    mul<float, int, float, noConversion<float>, noConversion<int>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP32, Precision::I64, Precision::FP32):
+                    mul<float, long long int, float, noConversion<float>, noConversion<long long int>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP32, Precision::FP16, Precision::FP32):
+                    mul<float, ie_fp16, float, noConversion<float>, fp16tofp32,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP32, Precision::FP16, Precision::FP16):
+                    mul<float, ie_fp16, ie_fp16, noConversion<float>, fp16tofp32,
+                            fp32tofp16>(inData, params, blobs, outData);
+                    break;
+                case getPrecisionMask(Precision::FP32, Precision::FP32, Precision::FP32):
+                    mul<float, float, float, noConversion<float>, noConversion<float>,
+                            noConversion<float>>(inData, params, blobs, outData);
+                    break;
+                default:
+                    THROW_IE_EXCEPTION << "Unsupported precision!";
+            }
         }
-    }
 };
-
 }  // namespace ShapeInfer
 }  // namespace InferenceEngine
index ef96d57..de159e8 100644 (file)
@@ -11,6 +11,7 @@
 #include <vector>
 #include <ie_layers.h>
 #include "ie_const_infer_impl.hpp"
+#include "precision_utils.h"
 
 namespace InferenceEngine {
 namespace ShapeInfer {
@@ -22,10 +23,10 @@ class OneHotConstInfer : public ConstInferImpl {
 public:
     explicit OneHotConstInfer(const std::string& type) : ConstInferImpl(type) {}
 
-    void inferImpl(const std::vector<Blob::CPtr>& inData,
-                   const std::map<std::string, std::string>& params,
-                   const std::map<std::string, Blob::Ptr>& blobs,
-                   std::vector<Blob::Ptr>& outData) override {
+    template <typename T>
+    void inferImplBody(const std::vector<Blob::CPtr>& inData,
+                       const std::map<std::string, std::string>& params,
+                       std::vector<Blob::Ptr>& outData) {
         OneHotLayer layer(LayerParams {});
         layer.params = params;
         layer.type = _type;
@@ -33,8 +34,8 @@ public:
         _validator->checkParams(&layer);
         auto src_dims = inData[0]->getTensorDesc().getDims();
 
-        const auto *src_data = inData[0]->cbuffer().as<const float *>();
-        auto *dst_data = outData[0]->buffer().as<float *>();
+        const auto *src_data = inData[0]->cbuffer().as<const T*>();
+        auto *dst_data = outData[0]->buffer().as<T*>();
         std::size_t prefix_size = 1;
         auto input_dims = inData[0]->getTensorDesc().getDims();
 
@@ -49,12 +50,68 @@ public:
             for (std::size_t depth_idx = 0; depth_idx < layer.depth; ++depth_idx) {
                 for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; suffix_idx++) {
                     auto src_index = prefix_idx * suffix_size + suffix_idx;
-                    std::size_t v = static_cast<std::size_t>(src_data[src_index]);
+                    auto v = static_cast<std::size_t>(src_data[src_index]);
                     dst_data[dst_offset++] = (v == depth_idx) ? layer.on_value : layer.off_value;
                 }
             }
         }
     }
+
+    void inferImplBody_fp16(const std::vector<Blob::CPtr>& inData,
+                       const std::map<std::string, std::string>& params,
+                       std::vector<Blob::Ptr>& outData) {
+        OneHotLayer layer(LayerParams {});
+        layer.params = params;
+        layer.type = _type;
+        _validator->parseParams(&layer);
+        _validator->checkParams(&layer);
+        auto src_dims = inData[0]->getTensorDesc().getDims();
+
+        const auto *src_data = inData[0]->cbuffer().as<const int16_t *>();
+        auto *dst_data = outData[0]->buffer().as<int16_t *>();
+        std::size_t prefix_size = 1;
+        auto input_dims = inData[0]->getTensorDesc().getDims();
+
+        std::size_t actual_axis = (layer.axis == -1) ? src_dims.size() : layer.axis;
+        for (size_t i = 0; i < actual_axis; ++i)
+            prefix_size *= input_dims[i];
+
+        std::size_t suffix_size = inData[0]->size() / prefix_size;
+
+        int16_t val_on = PrecisionUtils::f32tof16(layer.on_value);
+        int16_t val_off = PrecisionUtils::f32tof16(layer.off_value);
+
+        std::size_t dst_offset = 0;
+        for (std::size_t prefix_idx = 0; prefix_idx < prefix_size; ++prefix_idx) {
+            for (std::size_t depth_idx = 0; depth_idx < layer.depth; ++depth_idx) {
+                for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; suffix_idx++) {
+                    auto src_index = prefix_idx * suffix_size + suffix_idx;
+                    auto v = static_cast<std::size_t>(src_data[src_index]);
+                    dst_data[dst_offset++] = (v == depth_idx) ? val_on : val_off;
+                }
+            }
+        }
+    }
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        auto inputBlob = inData.front();
+        Precision precision = inputBlob->getTensorDesc().getPrecision();
+        switch (precision) {
+            case Precision::FP32: inferImplBody<PrecisionTrait<Precision::FP32>::value_type>(inData, params, outData); break;
+            case Precision::FP16: inferImplBody_fp16(inData, params, outData); break;
+            case Precision::Q78: inferImplBody<PrecisionTrait<Precision::Q78>::value_type>(inData, params, outData); break;
+            case Precision::I16: inferImplBody<PrecisionTrait<Precision::I16>::value_type>(inData, params, outData); break;
+            case Precision::U8: inferImplBody<PrecisionTrait<Precision::U8>::value_type>(inData, params, outData); break;
+            case Precision::I8: inferImplBody<PrecisionTrait<Precision::I8>::value_type>(inData, params, outData); break;
+            case Precision::U16: inferImplBody<PrecisionTrait<Precision::U16>::value_type>(inData, params, outData); break;
+            case Precision::I32: inferImplBody<PrecisionTrait<Precision::I32>::value_type>(inData, params, outData); break;
+            case Precision::I64: inferImplBody<PrecisionTrait<Precision::I64>::value_type>(inData, params, outData); break;
+            default: THROW_IE_EXCEPTION << "OneHot const inference: Unsupported precision " << precision.name();
+        }
+    }
 };
 
 }  // namespace ShapeInfer
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_permute_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_permute_const_infer.hpp
new file mode 100644 (file)
index 0000000..bf88a02
--- /dev/null
@@ -0,0 +1,75 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_algorithm.hpp>
+#include "ie_const_infer_impl.hpp"
+#include "ie_parallel.hpp"
+#include "../../precision_utils.h"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Broadcast layer
+ */
+class PermuteConstInfer : public ConstInferImpl {
+public:
+    explicit PermuteConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        CNNLayer layer(lp);
+        layer.params = params;
+
+        if (outData.empty())
+            THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
+
+        if (inData.size() != 1)
+            THROW_IE_EXCEPTION << "Incorrect number of input edges!";
+
+        if (inData[0]->getTensorDesc().getPrecision() != outData[0]->getTensorDesc().getPrecision()) {
+            THROW_IE_EXCEPTION
+                    << "Input and output tensors should have same precision!";
+        }
+
+        std::vector<size_t> order;
+        std::vector<int> layerOrder = layer.GetParamAsInts("order");
+        for (auto ord : layerOrder)
+            order.push_back(static_cast<size_t>(ord));
+
+        TensorDesc srcDesc = inData[0]->getTensorDesc();
+
+        SizeVector& dims = srcDesc.getDims();
+        InferenceEngine::SizeVector orderedDims;
+        for (auto ord : order) {
+            orderedDims.push_back(dims[ord]);
+        }
+        TensorDesc dstDesc(InferenceEngine::Precision::FP32, dims, {orderedDims, order});
+
+        size_t dataSize = inData[0]->size();
+        const auto * src_data = inData[0]->cbuffer().as<const uint8_t*>();
+        auto * dst_data = outData[0]->buffer().as<uint8_t*>();
+
+        parallel_for(dataSize, [&](size_t i) {
+            memcpy(dst_data + dstDesc.offset(i)*outData[0]->element_size(),
+                   src_data + srcDesc.offset(i)*inData[0]->element_size(),
+                   inData[0]->element_size());
+        });
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_pow_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_pow_const_infer.hpp
new file mode 100644 (file)
index 0000000..a16af70
--- /dev/null
@@ -0,0 +1,99 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cmath>
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_precision.hpp>
+#include <precision_utils.h>
+#include "broadcast_offset.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+class PowConstInfer : public ConstInferImpl {
+public:
+    explicit PowConstInfer(const std::string &type) : ConstInferImpl(type) {}
+
+    struct fp16tofp32{
+        inline float operator()(ie_fp16 value){
+            return static_cast<float>(PrecisionUtils::f16tof32(value));
+        }
+    };
+
+    struct fp32tofp16{
+        inline ie_fp16 operator()(float value){
+            return static_cast<float>(PrecisionUtils::f32tof16(value));
+        }
+    };
+
+    template<typename dataType>
+    struct noConversion{
+        inline dataType operator()(dataType value){
+            return value;
+        }
+    };
+
+    template<typename inDatatype1, typename inDatatype2, typename  outDatatype, class ConversionInData1,
+            class ConversionInData2, class ConversionOutData>
+    void pow(const std::vector<Blob::CPtr> &inData,
+             const std::map<std::string, std::string> &params,
+             const std::map<std::string, Blob::Ptr> &blobs,
+             std::vector<Blob::Ptr> &outData) {
+        auto* firstBlobBuffer = inData[0]->cbuffer().as<inDatatype1*>();
+        auto* secondBlobBuffer = inData[1]->cbuffer().as<inDatatype2*>();
+        if (!firstBlobBuffer || !secondBlobBuffer) {
+            THROW_IE_EXCEPTION << "empty input data";
+        }
+
+        auto outBlob = *outData.begin();
+        auto* outBuffer = outBlob->buffer().as<outDatatype *>();
+        if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
+
+        BroadcastOffset outOff(outBlob->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+        BroadcastOffset inOff1(inData[0]->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+        BroadcastOffset inOff2(inData[1]->getTensorDesc().getDims(), outBlob->getTensorDesc().getDims());
+        for (size_t i = 0; i < outBlob->size(); i++) {
+            SizeVector offsetDims = outOff.offset_dims(i);
+            outBuffer[outOff.offset(offsetDims)] = ConversionOutData()(
+                    std::pow(ConversionInData1()(firstBlobBuffer[inOff1.offset(offsetDims)]),
+                             ConversionInData2()(secondBlobBuffer[inOff2.offset(offsetDims)])));
+        }
+    }
+
+    void inferImpl(const std::vector<Blob::CPtr> &inData,
+                   const std::map<std::string, std::string> &params,
+                   const std::map<std::string, Blob::Ptr> &blobs,
+                   std::vector<Blob::Ptr> &outData) override {
+        size_t numInputs = inData.size();
+        if (inData.size() != 2)
+            THROW_IE_EXCEPTION << "Unsupported number of inputs: " << numInputs << ". 2 inputs is supported";
+
+        auto compare = getPrecisionMask(inData[0]->getTensorDesc().getPrecision(), inData[1]->getTensorDesc().getPrecision(),
+                outData[0]->getTensorDesc().getPrecision());
+        switch (compare) {
+            case getPrecisionMask(Precision::FP32, Precision::FP32, Precision::FP32):
+                pow<float, float, float, noConversion<float>, noConversion<float>, noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I32, Precision::FP32):
+                pow<int32_t, int32_t, float, noConversion<int32_t>, noConversion<int32_t>, noConversion<float>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::FP16, Precision::FP16, Precision::FP16):
+                pow<ie_fp16, ie_fp16, ie_fp16, noConversion<ie_fp16>, noConversion<ie_fp16>, noConversion<ie_fp16>>(inData, params, blobs, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I32, Precision::FP16):
+                pow<int32_t, int32_t, float, noConversion<int32_t>, noConversion<int32_t>, fp32tofp16>(inData, params, blobs, outData);
+                break;
+            default:
+                THROW_IE_EXCEPTION << "Not supported data type in port 0";
+        }
+    }
+};
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_reduce_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_reduce_const_infer.hpp
new file mode 100644 (file)
index 0000000..50b85fd
--- /dev/null
@@ -0,0 +1,374 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <cmath>
+#include <limits>
+#include <cfloat>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_algorithm.hpp>
+#include "ie_const_infer_impl.hpp"
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Reduce layer
+ */
+class ReduceConstInfer : public ConstInferImpl {
+private:
+    const size_t REDUCE_DATA = 0;
+    const size_t REDUCE_INDEXES = 1;
+
+template <typename src_t, typename dst_t>
+void reduce(
+    SizeVector   src_dims,
+    SizeVector   srcStrides,
+    const src_t *src_data,
+    dst_t       *dst_data,
+    size_t       work_amount_dst,
+    size_t       reduced_dims_work_amount,
+    SizeVector   axes_for_reduction,
+    SizeVector   dst_dims,
+    dst_t        init_value,
+    std::string reduceType
+) {
+    // I don't know why func 2 is necessary!
+    std::function<dst_t(dst_t, src_t)> func1;
+    std::function<dst_t(dst_t, src_t)> func2;
+    if (reduceType == "ReduceAnd") {
+        func1 = [](dst_t x, src_t y) -> dst_t { return x && y; };
+        func2 = [](dst_t x, src_t y) -> dst_t { return x && y; };
+    } else if (reduceType == "ReduceL1") {
+        func1 = [](dst_t x, src_t y) -> dst_t { return x + (std::abs)(y); };
+        func2 = [](dst_t x, src_t y) -> dst_t { return x + y; };
+    } else if (reduceType == "ReduceL2") {
+        func1 = [](dst_t x, src_t y) -> dst_t { return x + y * y; };
+        func2 = [](dst_t x, src_t y) -> dst_t { return x + y; };
+    } else if (reduceType == "ReduceLogSum") {
+        func1 = [](dst_t x, src_t y)->dst_t { return x + y; };
+        func2 = [](dst_t x, src_t y)->dst_t { return x + y; };
+    } else if (reduceType == "ReduceLogSumExp") {
+        func1 = [](dst_t x, src_t y)->dst_t { return x + expf(y); };
+        func2 = [](dst_t x, src_t y)->dst_t { return x + y; };
+    } else if (reduceType == "ReduceMax") {
+        func1 = [](dst_t x, src_t y)->dst_t { return x > y ? x : y; };
+        func2 = [](dst_t x, src_t y)->dst_t { return x > y ? x : y; };
+    } else if (reduceType == "ReduceMean") {
+        func1 = [](dst_t x, src_t y) -> dst_t { return (x + y); };
+        func2 = [](dst_t x, src_t y) -> dst_t { return (x + y); };
+    } else if (reduceType == "ReduceMin") {
+        func1 = [](dst_t x, src_t y)->dst_t { return x < y ? x : y; };
+        func2 = [](dst_t x, src_t y)->dst_t { return x < y ? x : y; };
+    } else if (reduceType == "ReduceOr") {
+        func1 = [](dst_t x, src_t y) -> dst_t { return x || y; };
+        func2 = [](dst_t x, src_t y) -> dst_t { return x || y; };
+    } else if (reduceType == "ReduceProd") {
+        func1 = [](dst_t x, src_t y)->dst_t { return x * y; };
+        func2 = [](dst_t x, src_t y)->dst_t { return x * y; };
+    } else if (reduceType == "ReduceSum") {
+        func1 = [](dst_t x, src_t y)->dst_t { return x + y; };
+        func2 = [](dst_t x, src_t y)->dst_t { return x + y; };
+    } else if (reduceType == "ReduceSumSquare") {
+        func1 = [](dst_t x, src_t y) -> dst_t { return x + y * y; };
+        func2 = [](dst_t x, src_t y) -> dst_t { return x + y; };
+    }
+
+    unsigned int nthr = parallel_get_max_threads();
+    if ((work_amount_dst + 1) >= nthr) {
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            int j;
+            size_t i, start = 0, end = 0;
+            SizeVector dst_counters(dst_dims.size(), 0);
+            splitter(work_amount_dst, nthr, ithr, start, end);
+            for (j = dst_dims.size() - 1, i = start; j >= 0; j--) {
+                dst_counters[j] = i % dst_dims[j];
+                i /= dst_dims[j];
+            }
+            for (size_t src_idx, dst_idx = start; dst_idx < end; ++dst_idx) {
+                dst_t reduce_prod = init_value;
+                bool update_idx = true;
+                SizeVector src_counters = dst_counters;
+                for (i = 0; i < reduced_dims_work_amount; ++i) {
+                    if (update_idx) {
+                        src_idx = 0;
+                        for (j = 0; j < static_cast<int>(src_dims.size()); ++j)
+                            src_idx += (src_counters[j] % src_dims[j]) * srcStrides[j];
+                        update_idx = false;
+                    }
+                    reduce_prod = func1(reduce_prod, src_data[src_idx]);
+                    for (j = axes_for_reduction.size() - 1; j >= 0; j--) {
+                        src_counters[axes_for_reduction[j]]++;
+                        if (src_counters[axes_for_reduction[j]] < src_dims[axes_for_reduction[j]]) {
+                            src_idx += srcStrides[axes_for_reduction[j]];
+                            break;
+                        } else {
+                            src_counters[axes_for_reduction[j]] = 0;
+                            update_idx = true;
+                        }
+                    }
+                }
+                dst_data[dst_idx] = reduce_prod;
+                for (j = dst_dims.size() - 1; j >= 0; j--) {
+                    dst_counters[j]++;
+                    if (dst_counters[j] < dst_dims[j])
+                        break;
+                    else
+                        dst_counters[j] = 0;
+                }
+            }
+        });
+    } else {
+        std::vector<dst_t> reduce_prod((nthr * work_amount_dst), init_value);
+        if (work_amount_dst == 1) {
+            parallel_nt(nthr, [&](const int ithr, const int nthr) {
+                size_t i, start = 0, end = 0;
+                splitter((srcStrides[0] * src_dims[0]), nthr, ithr, start, end);
+                for (i = start; i < end; ++i)
+                    reduce_prod[ithr] = func1(reduce_prod[ithr], src_data[i]);
+            });
+        } else {
+            SizeVector dstStrides(dst_dims.size(), 1);
+            for (int j = dst_dims.size() - 1; j >= 1; --j)
+                dstStrides[j - 1] = dstStrides[j] * dst_dims[j];
+            parallel_nt(nthr, [&](const int ithr, const int nthr) {
+                int j;
+                bool update_idx = true;
+                size_t i, src_idx, dst_idx = 0, start = 0, end = 0;
+                splitter((srcStrides[0] * src_dims[0]), nthr, ithr, start, end);
+                SizeVector src_counters(src_dims.size(), 0);
+                for (j = src_dims.size() - 1, src_idx = start; j >= 0; j--) {
+                    src_counters[j] = src_idx % src_dims[j];
+                    src_idx /= src_dims[j];
+                }
+                for (src_idx = start; src_idx < end; ++src_idx) {
+                    if (update_idx) {
+                        for (i = 0, dst_idx = 0; i < dst_dims.size(); ++i)
+                            dst_idx += (src_counters[i] % dst_dims[i]) * dstStrides[i];
+                        update_idx = false;
+                    }
+                    reduce_prod[ithr * work_amount_dst + dst_idx] = func1(reduce_prod[ithr * work_amount_dst + dst_idx], src_data[src_idx]);
+                    for (j = src_dims.size() - 1; j >= 0; j--) {
+                        src_counters[j]++;
+                        if (src_counters[j] < src_dims[j]) {
+                            if (dst_dims[j] > 1) dst_idx += dstStrides[j];
+                            break;
+                        } else {
+                            src_counters[j] = 0;
+                            update_idx = true;
+                        }
+                    }
+                }
+            });
+        }
+        for (size_t dst_idx = 0; dst_idx < work_amount_dst; dst_idx++) {
+            for (size_t ithr = work_amount_dst; ithr < (nthr * work_amount_dst); ithr += work_amount_dst)
+                reduce_prod[dst_idx] = func2(reduce_prod[dst_idx], reduce_prod[dst_idx + ithr]);
+            dst_data[dst_idx] = reduce_prod[dst_idx];
+        }
+    }
+}
+
+template<typename src_d, typename dst_d>
+void exec_reduce(const std::vector<Blob::CPtr>& insData, std::vector<Blob::Ptr>& outData, std::string reduce_mode,
+                 SizeVector   src_dims,
+                 SizeVector   srcStrides,
+                 size_t       work_amount_dst,
+                 size_t       reduced_dims_work_amount,
+                 SizeVector   axes_for_reduction,
+                 SizeVector   our_dims,
+                 dst_d min_val,
+                 dst_d max_val) {
+    const src_d *src_data = insData[REDUCE_DATA]->cbuffer().as<src_d *>() +
+                            insData[REDUCE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    dst_d* dst_data = outData[0]->cbuffer().as<dst_d *>() +
+                      outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    if (reduce_mode == "ReduceAnd") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 1, reduce_mode);
+    } else if (reduce_mode == "ReduceL1") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0, reduce_mode);
+    } else if (reduce_mode == "ReduceL2") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0, reduce_mode);
+
+        parallel_for(work_amount_dst, [&](size_t i) {
+            dst_data[i] = sqrt(dst_data[i]);
+        });
+    } else if (reduce_mode == "ReduceLogSum") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0,
+               reduce_mode);
+
+        parallel_for(work_amount_dst, [&](size_t i) {
+            dst_data[i] = logf(dst_data[i]);
+        });
+    } else if (reduce_mode == "ReduceLogSumExp") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0,
+               reduce_mode);
+
+        parallel_for(work_amount_dst, [&](size_t i) {
+            dst_data[i] = logf(dst_data[i]);
+        });
+    } else if (reduce_mode == "ReduceMax") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, min_val,
+               reduce_mode);
+    } else if (reduce_mode == "ReduceMean") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0,
+               reduce_mode);
+
+        parallel_for(work_amount_dst, [&](size_t i) {
+            dst_data[i] /= static_cast<float>(reduced_dims_work_amount);
+        });
+    } else if (reduce_mode == "ReduceMin") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, max_val,
+               reduce_mode);
+    } else if (reduce_mode == "ReduceOr") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0,
+               reduce_mode);
+    } else if (reduce_mode == "ReduceProd") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 1,
+               reduce_mode);
+    } else if (reduce_mode == "ReduceSum") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0,
+               reduce_mode);
+    } else if (reduce_mode == "ReduceSumSquare") {
+        reduce<src_d, dst_d>(src_dims, srcStrides, src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0,
+               reduce_mode);
+    } else {
+        THROW_IE_EXCEPTION << " Incorrect Reduce layer type!";
+    }
+}
+
+public:
+    explicit ReduceConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& insData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{"", _type};
+        CNNLayer layer(lp);
+        layer.params = params;
+
+        if (insData.empty() || outData.empty())
+            THROW_IE_EXCEPTION << " Reduce constant inference error: empty input or output data!";
+
+        if (insData.size() != 2)
+            THROW_IE_EXCEPTION << " Reduce constant inference error: Incorrect number of input edges! Should be 2 edges, got " << insData.size();
+
+        SizeVector idx_dims = insData[REDUCE_INDEXES]->getTensorDesc().getDims();
+        if (idx_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Reduce constant inference error: Index vector should be 1 dimension, got " << idx_dims.size() << " dimensions";
+
+        if (insData[REDUCE_INDEXES]->getTensorDesc().getPrecision() != Precision::I32)
+            THROW_IE_EXCEPTION << " Reduce constant inference error: Incorrect 'axes_to_reduction' input precision. Only I32 is supported! Current precision: "
+                               << insData[REDUCE_INDEXES]->getTensorDesc().getPrecision();
+
+        SizeVector data_dims = insData[REDUCE_DATA]->getTensorDesc().getDims();
+        SizeVector dst_dims = outData[0]->getTensorDesc().getDims();
+
+        bool keep_dims = layer.GetParamAsBool("keep_dims", true);
+        if (keep_dims) {
+            if (data_dims.size() != dst_dims.size())
+                THROW_IE_EXCEPTION << " Reduce constant inference error: Incorrect number of input/output dimensions!";
+        } else {
+            if (data_dims.size() <= dst_dims.size())
+                THROW_IE_EXCEPTION << " Reduce constant inference error: Incorrect number of input/output dimensions!";
+        }
+
+        SizeVector src_dims = insData[REDUCE_DATA]->getTensorDesc().getDims();
+        SizeVector srcStrides = insData[REDUCE_DATA]->getTensorDesc().getBlockingDesc().getStrides();
+
+        int32_t *idx_data = insData[REDUCE_INDEXES]->cbuffer().as<int32_t *>() +
+            insData[REDUCE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        SizeVector axes;
+        for (size_t i = 0; i < idx_dims[0]; i++) {
+            int32_t axis = idx_data[i];
+            if (axis < 0)
+                axis += data_dims.size();
+
+            if (static_cast<size_t>(axis) > data_dims.size())
+                THROW_IE_EXCEPTION << " Reduce constant inference error: Index to reduce exceeds data tensor dimension";
+            axes.push_back(static_cast<size_t>(axis));
+        }
+
+        size_t reduced_dims_work_amount = 1;
+        InferenceEngine::SizeVector our_dims, out_dims, axes_for_reduction;
+        for (size_t i = 0; i < src_dims.size(); i++) {
+            bool found = false;
+            for (size_t axis : axes)
+                if (i == axis) found = true;
+
+            if (found) {
+                axes_for_reduction.push_back(i);
+                reduced_dims_work_amount *= src_dims[i];
+                if (keep_dims) out_dims.push_back(1);
+                our_dims.push_back(1);
+            } else {
+                out_dims.push_back(src_dims[i]);
+                our_dims.push_back(src_dims[i]);
+            }
+        }
+
+        if (!our_dims.size())
+            our_dims = SizeVector(1, 1);
+
+        for (size_t i = 0; i < (std::min)(out_dims.size(), dst_dims.size()); i++)
+            if (out_dims[i] != dst_dims[i])
+                THROW_IE_EXCEPTION << " Reduce constant inference error: Incorrect number of output dimensions!";
+
+        size_t work_amount_dst;
+        if (!dst_dims.size())
+            work_amount_dst = 1;
+        else
+            work_amount_dst = outData[0]->getTensorDesc().getBlockingDesc().getStrides()[0] * dst_dims[0];
+
+        std::string reduce_mode = layer.type;
+
+        auto compare = getPrecisionMask(insData[REDUCE_DATA]->getTensorDesc().getPrecision(), outData[0]->getTensorDesc().getPrecision());
+        switch (compare) {
+            case getPrecisionMask(Precision::FP32, Precision::FP32):
+                exec_reduce<PrecisionTrait<Precision::FP32>::value_type, PrecisionTrait<Precision::FP32>::value_type>(
+                        insData, outData, reduce_mode, src_dims, srcStrides, work_amount_dst,
+                        reduced_dims_work_amount, axes_for_reduction, dst_dims,
+                        (std::numeric_limits<PrecisionTrait<Precision::FP32>::value_type>::min)(),
+                        (std::numeric_limits<PrecisionTrait<Precision::FP32>::value_type>::max)());
+                break;
+
+            case getPrecisionMask(Precision::I32, Precision::I64):
+                exec_reduce<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::I64>::value_type>(
+                        insData, outData, reduce_mode, src_dims, srcStrides, work_amount_dst,
+                        reduced_dims_work_amount, axes_for_reduction, dst_dims,
+                        (std::numeric_limits<PrecisionTrait<Precision::I64>::value_type>::min)(),
+                        (std::numeric_limits<PrecisionTrait<Precision::I64>::value_type>::max)());
+                break;
+            case getPrecisionMask(Precision::I32, Precision::FP32):
+                exec_reduce<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::FP32>::value_type>(
+                        insData, outData, reduce_mode, src_dims, srcStrides, work_amount_dst,
+                        reduced_dims_work_amount, axes_for_reduction, dst_dims,
+                        (std::numeric_limits<PrecisionTrait<Precision::FP32>::value_type>::min)(),
+                        (std::numeric_limits<PrecisionTrait<Precision::FP32>::value_type>::max)());
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I32):
+                exec_reduce<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::I32>::value_type>(
+                        insData, outData, reduce_mode, src_dims, srcStrides, work_amount_dst,
+                        reduced_dims_work_amount, axes_for_reduction, dst_dims,
+                        (std::numeric_limits<PrecisionTrait<Precision::I32>::value_type>::min)(),
+                        (std::numeric_limits<PrecisionTrait<Precision::I32>::value_type>::max)());
+                break;
+            default:
+                THROW_IE_EXCEPTION << "Reduce constant inference error: Incorrect data tensor precisions. REDUCE_DATA precision: " <<
+                                   insData[REDUCE_DATA]->getTensorDesc().getPrecision() <<
+                                   " Output precision: " << outData[0]->getTensorDesc().getPrecision();
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
index c5da316..3360dd3 100644 (file)
@@ -10,7 +10,7 @@
 #include <string>
 #include <vector>
 #include <ie_layers.h>
-#include "../../precision_utils.h"
+#include "precision_utils.h"
 
 namespace InferenceEngine {
 namespace ShapeInfer {
@@ -35,6 +35,16 @@ public:
             for (int i = 0; i < outBlob->size(); i++) {
                 outBuffer[i] = PrecisionUtils::f32tof16(static_cast<float>(inShape[i]));
             }
+        } else if (outBlob->getTensorDesc().getPrecision() == Precision::I32) {
+            auto* outBuffer = outBlob->buffer().as<int32_t*>();
+            for (int i = 0; i < outBlob->size(); i++) {
+                outBuffer[i] = static_cast<int32_t>(inShape[i]);
+            }
+        } else if (outBlob->getTensorDesc().getPrecision() == Precision::I64) {
+            auto* outBuffer = outBlob->buffer().as<int64_t*>();
+            for (int i = 0; i < outBlob->size(); i++) {
+                outBuffer[i] = static_cast<int64_t>(inShape[i]);
+            }
         } else {
             auto* outBuffer = outBlob->buffer().as<float*>();
             for (int i = 0; i < outBlob->size(); i++) {
index d9e7682..45b6d58 100644 (file)
@@ -12,6 +12,7 @@
 #include <algorithm>
 #include <ie_layers.h>
 #include <ie_memcpy.h>
+#include "ie_precision.hpp"
 #include "ie_const_infer_impl.hpp"
 #include "ie_parallel.hpp"
 
@@ -26,11 +27,8 @@ public:
         CNNLayer layer(lp);
         layer.params = params;
 
-        src_data = inData[STRIDEDSLICE_DATA]->cbuffer().as<const float*>() +
-                   inData[STRIDEDSLICE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
         if (inData.size() > 4)
-            THROW_IE_EXCEPTION << " Incorrect number of input/output edges!";
+            THROW_IE_EXCEPTION << "StridedSlice constant inference error: Incorrect number of input edges!";
 
         src_dims = inData[STRIDEDSLICE_DATA]->getTensorDesc().getDims();
 
@@ -38,30 +36,33 @@ public:
         if (inData.size() > 1) {
             begin_dims = inData[STRIDEDSLICE_BEGIN]->getTensorDesc().getDims();
             if (inData[STRIDEDSLICE_BEGIN]->getTensorDesc().getPrecision() != Precision::I32)
-                THROW_IE_EXCEPTION << " Incorrect 'begin' input precision. Only I32 is supported!";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: Incorrect 'begin' input precision. Only I32 is supported! Current precision: " <<
+                                   inData[STRIDEDSLICE_BEGIN]->getTensorDesc().getPrecision();
             if (begin_dims.size() > 1)
-                THROW_IE_EXCEPTION << " Begin vector should be 1 dimension";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: Begin vector should be 1 dimension, got: " << begin_dims.size() << " dimensions";
             bounds_size = begin_dims[0];
         }
 
         if (inData.size() > 2) {
             end_dims = inData[STRIDEDSLICE_END]->getTensorDesc().getDims();
             if (inData[STRIDEDSLICE_END]->getTensorDesc().getPrecision() != Precision::I32)
-                THROW_IE_EXCEPTION << " Incorrect 'end' input precision. Only I32 is supported!";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: Incorrect 'end' input precision. Only I32 is supported! Current precision: " <<
+                                   inData[STRIDEDSLICE_END]->getTensorDesc().getPrecision();
             if (end_dims.size() > 1)
-                THROW_IE_EXCEPTION << " End vector should be 1 dimension";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: End vector should be 1 dimension, got: " << end_dims.size() << " dimensions";
             if (begin_dims[0] != end_dims[0])
-                THROW_IE_EXCEPTION << " Begin vector size should be equal end vectror size";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: Begin vector size should be equal end vector size";
         }
 
         if (inData.size() > 3) {
             stride_dims = inData[STRIDEDSLICE_STRIDE]->getTensorDesc().getDims();
             if (inData[STRIDEDSLICE_STRIDE]->getTensorDesc().getPrecision() != Precision::I32)
-                THROW_IE_EXCEPTION << " Incorrect 'strides' input precision. Only I32 is supported!";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: Incorrect 'strides' input precision. Only I32 is supported! Current precision: "
+                                   << inData[STRIDEDSLICE_STRIDE]->getTensorDesc().getPrecision();
             if (stride_dims.size() > 1)
-                THROW_IE_EXCEPTION << " End vector should be 1 dimension";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: End vector should be 1 dimension, got: " << stride_dims.size() << " dimensions";
             if (begin_dims[0] != stride_dims[0])
-                THROW_IE_EXCEPTION << " Stride vector size should be equal begin vectror size";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: Stride vector size should be equal begin vector size";
         }
 
         std::string::size_type i;
@@ -209,35 +210,60 @@ public:
         return out_dims;
     }
 
-    void infer(std::vector<Blob::Ptr>& outData) {
+    template <class src_t, class dst_t>
+    void exec_strided_slice(const std::vector<Blob::CPtr>& inData, std::vector<Blob::Ptr>& outData) {
+        const src_t* src_data = inData[STRIDEDSLICE_DATA]->cbuffer().as<const src_t*>() +
+                          inData[STRIDEDSLICE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        dst_t* dst_data = outData[0]->cbuffer().as<dst_t*>() +
+                          outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        if (src_dims.size() == max_dims && shrink_axis == 0 && stride_dms[stride_dms.size() - 1] == 1 &&
+            stride_dms.size() > 1)
+            strided_slice_vp(src_data, dst_data);
+        else if (src_dims.size() == max_dims && shrink_axis == 0)
+            strided_slice_p(src_data, dst_data);
+        else
+            strided_slice(src_data, dst_data, our_dims);
+    }
+
+    void infer(const std::vector<Blob::CPtr>& inData, std::vector<Blob::Ptr>& outData) {
         dst_dims = outData[0]->getTensorDesc().getDims();
         size_t range = out_dims.size() < dst_dims.size() ? out_dims.size() : dst_dims.size();
         for (int i = 0; i < range; i++) {
             if (out_dims[i] != dst_dims[i])
-                THROW_IE_EXCEPTION << "parameter mismatch";
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: parameter mismatch";
         }
         dstStrides = outData[0]->getTensorDesc().getBlockingDesc().getStrides();
         if (dst_dims.size() == 1 && dst_dims[0] == 1)
             dstStrides.push_back(1);
         if (outData.size() != 1)
-            THROW_IE_EXCEPTION << " Incorrect number of input/output edges!";
-        float* dst_data = outData[0]->cbuffer().as<float*>() +
-                          outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            THROW_IE_EXCEPTION << "StridedSlice constant inference error: Incorrect number of output edges!";
 
-        if (src_dims.size() == max_dims && shrink_axis == 0 && stride_dms[stride_dms.size() - 1] == 1 &&
-            stride_dms.size() > 1)
-            strided_slice_vp(src_data, dst_data);
-        else if (src_dims.size() == max_dims && shrink_axis == 0)
-            strided_slice_p(src_data, dst_data);
-        else
-            strided_slice(src_data, dst_data, our_dims);
+        auto compare = getPrecisionMask(inData[0]->getTensorDesc().getPrecision(), outData[0]->getTensorDesc().getPrecision());
+        switch (compare) {
+            case getPrecisionMask(Precision::FP32, Precision::FP32):
+                exec_strided_slice<PrecisionTrait<Precision::FP32>::value_type, PrecisionTrait<Precision::FP32>::value_type>(inData, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I32):
+                exec_strided_slice<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::I32>::value_type>(inData, outData);
+                break;
+            case getPrecisionMask(Precision::I32, Precision::I64):
+                exec_strided_slice<PrecisionTrait<Precision::I32>::value_type, PrecisionTrait<Precision::I64>::value_type>(inData, outData);
+                break;
+            default:
+                THROW_IE_EXCEPTION << "StridedSlice constant inference error: Unsupported precision configuration:" <<
+                                   " input precision: " << inData[0]->getTensorDesc().getPrecision() <<
+                                   " output precision: " << outData[0]->getTensorDesc().getPrecision();
+        }
     }
 
 private:
-    void strided_slice(const float* src_data, float* dst_data, std::vector<size_t>& dims) {
+    template <class src_t, class dst_t>
+    void strided_slice(const src_t* src_data, dst_t* dst_data, std::vector<size_t>& dims) {
         size_t i;
         int j;
-        size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+        size_t work_amount_dst = (dstStrides.empty() && dst_dims.empty()) ? 1 : dstStrides[0] * dst_dims[0];
         SizeVector counters(max_dims, 0);
 
         for (size_t iwork = 0; iwork < work_amount_dst; ++iwork) {
@@ -259,7 +285,8 @@ private:
         }
     }
 
-    void strided_slice_vp(const float* src_data, float* dst_data) {
+    template <class src_t, class dst_t>
+    void strided_slice_vp(const src_t* src_data, dst_t* dst_data) {
         //  Vectorized copy
         size_t dims_size_1 = dst_dims.size() - 1;
         size_t dataLength = dst_dims[dims_size_1];
@@ -296,7 +323,8 @@ private:
         });
     }
 
-    void strided_slice_p(const float* src_data, float* dst_data) {
+    template <class src_t, class dst_t>
+    void strided_slice_p(const src_t* src_data, dst_t* dst_data) {
         size_t dims_size = dst_dims.size();
         size_t work_amount_dst = dstStrides[0] * dst_dims[0];
 
@@ -360,7 +388,6 @@ private:
 
     InferenceEngine::SizeVector out_dims;
     InferenceEngine::SizeVector our_dims;
-    const float* src_data;
 };
 
 /**
@@ -381,7 +408,7 @@ public:
         _validator->parseParams(&layer);
 
         StridedSliceHelper helper(inData, params);
-        helper.infer(outData);
+        helper.infer(inData, outData);
     }
 };
 
index 9834ad9..722e65e 100644 (file)
@@ -14,7 +14,6 @@
 #include "shape_infer/ie_reshape_launcher.hpp"
 #include "shape_infer/ie_reshape_io_controllers.hpp"
 #include "ie_reshape_launcher.hpp"
-
 #include "built-in/ie_tensor_iterator_shape_infer.hpp"
 
 using namespace InferenceEngine;
@@ -332,4 +331,4 @@ void OutMemoryReshapeLauncher::applyChanges(CNNLayer* layer) {
 
 void OutMemoryReshapeLauncher::reset() {
     _iController->reset();
-}
+}
\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/transform/transform_network.cpp b/inference-engine/src/inference_engine/transform/transform_network.cpp
deleted file mode 100644 (file)
index 923fa3f..0000000
+++ /dev/null
@@ -1,353 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <transform/transform_network.hpp>
-#include <limits>
-#include <string>
-#include <vector>
-#include <memory>
-#include <map>
-
-using namespace InferenceEngine;
-
-Transform::Port::Port(Builder::Network& network, PortInfo port, bool isInput)
-    : network(network), port(port), input(isInput) {
-    const auto& layer = network.getLayer(port.layerId());
-    if (isInput) {
-        if (layer->getInputPorts().size() < port.portId())
-            THROW_IE_EXCEPTION << "Cannot find input port "
-                               << port.portId() << " in layer "
-                               << layer->getName();
-    } else {
-        if (layer->getOutputPorts().size() < port.portId())
-            THROW_IE_EXCEPTION << "Cannot find output port "
-                               << port.portId() << " in layer "
-                               << layer->getName();
-    }
-}
-
-PortData::Ptr Transform::Port::getData() const {
-    return input ?
-           network.getLayer(port.layerId())->getInputPorts()[port.portId()].getData() :
-           network.getLayer(port.layerId())->getOutputPorts()[port.portId()].getData();
-}
-
-const std::map<std::string, Parameter> &Transform::Port::getParameters() const {
-    return input ?
-           network.getLayer(port.layerId())->getInputPorts()[port.portId()].getParameters() :
-           network.getLayer(port.layerId())->getOutputPorts()[port.portId()].getParameters();
-}
-
-Transform::Layer Transform::Port::getLayer() const {
-    return Transform::Network(network).getLayer(getPortInfo().layerId());
-}
-
-Transform::Connection Transform::Port::getConnection() const {
-    return Connection(*this);
-}
-
-void Transform::Port::connect(const Port& port) {
-    if (this->input)
-        this->getConnection().setSource(port);
-    else
-        this->getConnection().addDestination(port);
-}
-
-void Transform::Port::disconnect() {
-    getConnection().remove();
-}
-
-const SizeVector& Transform::Port::shape() const {
-    return this->getData()->getData()->getTensorDesc().getDims();
-}
-
-PortInfo Transform::Port::getPortInfo() const {
-    return port;
-}
-
-bool Transform::Port::operator==(const Port& rObj) const {
-    return &network == &rObj.network &&
-           port == rObj.port &&
-           input == rObj.input;
-}
-
-bool Transform::Port::operator!=(const Port& rObj) const {
-    return !(*this == rObj);
-}
-
-
-Transform::Layer::Layer(Builder::Network& network, idx_t id)
-    : network(network), layerId(id) {}
-
-idx_t Transform::Layer::getId() const {
-    return layerId;
-}
-
-std::string Transform::Layer::getName() const {
-    return getLayer()->getName();
-}
-
-std::string Transform::Layer::getType() const {
-    return getLayer()->getType();
-}
-
-Builder::Layer::Ptr Transform::Layer::getLayer() const {
-    return network.getLayer(layerId);
-}
-
-Transform::Layer::operator Builder::Layer::Ptr() const {
-    return getLayer();
-}
-
-Transform::Port Transform::Layer::getInPort() const {
-    if (getLayer()->getInputPorts().size() != 1)
-        THROW_IE_EXCEPTION << "Layer " << getName()
-                           << " has more than 1 input port.";
-    return Transform::Port(network, {layerId, 0}, true);
-}
-
-Transform::Port Transform::Layer::getInPort(idx_t idx) const {
-    if (getLayer()->getInputPorts().size() <= idx)
-        THROW_IE_EXCEPTION << "Layer " << getName()
-                           << " has less than " << idx << " input port(s).";
-    return Transform::Port(network, {layerId, idx}, true);
-}
-
-std::vector<Transform::Port> Transform::Layer::getInPorts() const {
-    std::vector<Transform::Port> ports;
-    for (size_t i = 0; i < getLayer()->getInputPorts().size(); i++) {
-        ports.push_back({network, {layerId, i}, true});
-    }
-    return ports;
-}
-
-Transform::Port Transform::Layer::getOutPort() const {
-    if (getLayer()->getOutputPorts().size() != 1)
-        THROW_IE_EXCEPTION << "Layer " << getName()
-                           << " has more than 1 output port.";
-    return Transform::Port(network, {layerId, 0}, false);
-}
-
-Transform::Port Transform::Layer::getOutPort(idx_t idx) const {
-    if (getLayer()->getOutputPorts().size() <= idx)
-        THROW_IE_EXCEPTION << "Layer " << getName()
-                           << " has less than " << idx << " output port(s).";
-    return Transform::Port(network, {layerId, idx}, false);
-}
-
-std::vector<Transform::Port> Transform::Layer::getOutPorts() const {
-    std::vector<Transform::Port> ports;
-    for (size_t i = 0; i < getLayer()->getInputPorts().size(); i++) {
-        ports.push_back({network, {layerId, i}, false});
-    }
-    return ports;
-}
-
-void Transform::Layer::setParameter(const std::string& key, const Parameter& value) {
-    auto& params = getLayer()->getParameters();
-    params[key] = value;
-}
-
-Parameter& Transform::Layer::getParameter(const std::string& key) const {
-    auto& params = getLayer()->getParameters();
-    if (params.find(key) == params.end())
-        THROW_IE_EXCEPTION << "Layer " << getName() << " has no parameter " << key;
-    return params[key];
-}
-
-Transform::Connection::Connection(const Transform::Port& port)
-    : network(port.network), inPort({(std::numeric_limits<idx_t>::max)(), (std::numeric_limits<idx_t>::max)()}) {
-    if (port.input) {
-        outPorts = {port.getPortInfo()};
-        for (const auto& connection : network.getLayerConnections(port.getPortInfo().layerId())) {
-            if (connection.to() == port.getPortInfo()) {
-                inPort = connection.from();
-                break;
-            }
-        }
-    } else {
-        inPort = port.getPortInfo();
-        for (const auto& connection : network.getLayerConnections(port.getPortInfo().layerId())) {
-            if (connection.from() == port.getPortInfo()) {
-                outPorts.emplace_back(connection.to());
-            }
-        }
-    }
-}
-Transform::Connection::Connection(Builder::Network& network, const InferenceEngine::Connection& connection)
-    : Connection(network, connection.from(), connection.to()) {}
-Transform::Connection::Connection(Builder::Network& network, const PortInfo& inPort, const PortInfo& outPort)
-    : Connection(network, inPort, std::vector<PortInfo>({outPort})) {}
-Transform::Connection::Connection(Builder::Network& network, const PortInfo& inPort, const std::vector<PortInfo>& outPorts)
-    : network(network), inPort(inPort), outPorts(outPorts) {}
-
-Transform::Port Transform::Connection::getSource() const {
-    if (!inPortExist())
-        THROW_IE_EXCEPTION << "Connection doesn't have source port!";
-    return Port(network, inPort, false);
-}
-
-void Transform::Connection::setSource(const Transform::Port &port) {
-    if (inPortExist()) {
-        // disconnect old port
-        for (const auto& outPort : outPorts) {
-            network.disconnect({inPort, outPort});
-        }
-    }
-    inPort = port.getPortInfo();
-    for (const auto& outPort : outPorts) {
-        network.connect(inPort, outPort);
-    }
-}
-
-Transform::Port Transform::Connection::getDestination() const {
-    if (outPorts.size() != 1)
-        THROW_IE_EXCEPTION << "Connection has more than 1 output.";
-    return Transform::Port(network, outPorts[0], true);
-}
-
-Transform::Port Transform::Connection::getDestination(idx_t idx) {
-    if (outPorts.size() <= idx)
-        THROW_IE_EXCEPTION << "Connection has less than "
-                           << idx << " input port(s).";
-    return Transform::Port(network, outPorts[idx], true);
-}
-
-std::vector<Transform::Port> Transform::Connection::getDestinations() const {
-    std::vector<Transform::Port> ports;
-    for (const auto& port : outPorts) {
-        ports.emplace_back(network, port, true);
-    }
-    return ports;
-}
-
-void Transform::Connection::addDestination(const Transform::Port &port) {
-    for (const auto& outPort : outPorts) {
-        if (outPort == port.getPortInfo()) {
-            THROW_IE_EXCEPTION << "Cannot connect twice with one port!";
-        }
-    }
-    outPorts.emplace_back(port.getPortInfo());
-    if (!inPortExist())
-        return;
-    network.connect(inPort, outPorts[outPorts.size() - 1]);
-}
-
-void Transform::Connection::setDestination(const Transform::Port &port) {
-    if (outPorts.size() > 1) {
-        THROW_IE_EXCEPTION << "Cannot set destination for connection which has more than 1 consumer."
-                           << "Please use addDestination or setDestinations methods!";
-    }
-
-    if (!outPorts.empty()) {
-        if (inPortExist())
-            network.disconnect({inPort, outPorts[0]});
-        outPorts.clear();
-    }
-    addDestination(port);
-}
-
-void Transform::Connection::setDestinations(const std::vector<Transform::Port> &ports) {
-    if (!outPorts.empty() && outPorts.size() != ports.size())
-        THROW_IE_EXCEPTION << "Cannot change number of output connections!";
-
-    if (inPortExist()) {
-        for (const auto &port : outPorts) {
-            network.disconnect({inPort, port});
-        }
-    }
-    outPorts.clear();
-    for (const auto &port : ports) {
-        addDestination(port);
-    }
-}
-
-void Transform::Connection::remove() {
-    if (!inPortExist())
-        return;
-    for (const auto& port : outPorts) {
-        network.disconnect({inPort, port});
-    }
-}
-
-bool Transform::Connection::inPortExist() const {
-    static PortInfo uninitPort((std::numeric_limits<idx_t>::max)(), (std::numeric_limits<idx_t>::max)());
-    return inPort != uninitPort;
-}
-
-Transform::Layer Transform::Network::addLayer(const Builder::Layer &layer) {
-    idx_t layerId = network.addLayer(layer);
-    return Transform::Layer(network, layerId);
-}
-
-void Transform::Network::removeLayer(const Transform::Layer &layer) {
-    for (const auto& connection : network.getLayerConnections(layer.getId()))
-        network.disconnect(connection);
-    network.removeLayer(layer.getId());
-}
-
-Transform::Layer Transform::Network::getLayer(const std::string &name) const {
-    for (const auto& layer : network) {
-        if (layer->getName() == name)
-            return Transform::Layer(network, layer->getId());
-    }
-    THROW_IE_EXCEPTION << "Layer with name: " << name << " was not found!";
-}
-
-Transform::Layer Transform::Network::getLayer(idx_t id) const {
-    for (const auto& layer : network) {
-        if (layer->getId() == id)
-            return Transform::Layer(network, layer->getId());
-    }
-    THROW_IE_EXCEPTION << "Layer with id: " << id << " was not found!";
-}
-
-Transform::Connection Transform::Network::connect(const Transform::Layer &src,
-        const Transform::Layer &dst) {
-    Port srcPort = src.getOutPort();
-    Port dstPort = dst.getInPort();
-
-    network.connect(srcPort.getPortInfo(), dstPort.getPortInfo());
-    return Connection(network, srcPort.getPortInfo(), dstPort.getPortInfo());
-}
-
-Transform::Connection Transform::Network::connect(const Transform::Port &src,
-        const Transform::Port &dst) {
-    network.connect(src.getPortInfo(), dst.getPortInfo());
-    return Connection(network, src.getPortInfo(), dst.getPortInfo());
-}
-
-void Transform::Network::disconnect(const Transform::Layer &src, const Transform::Layer &dst) {
-    getConnection(src, dst).remove();
-}
-
-void Transform::Network::disconnect(const Transform::Port &src, const Transform::Port &dst) {
-    getConnection(src, dst).remove();
-}
-
-Builder::Network& Transform::Network::getBuilderNetwork() const {
-    return network;
-}
-
-Transform::Connection Transform::Network::getConnection(const Transform::Layer &src,
-        const Transform::Layer &dst) const {
-    Port srcPort = src.getOutPort();
-    Port dstPort = dst.getInPort();
-
-    for (const auto& connection : network.getConnections()) {
-        if (connection.from() == srcPort.getPortInfo() && connection.to() == dstPort.getPortInfo())
-            return Connection(network, srcPort.getPortInfo(), dstPort.getPortInfo());
-    }
-    THROW_IE_EXCEPTION << "Connection " << src.getName() << " -> " << dst.getName() << " was not found!";
-}
-
-Transform::Connection Transform::Network::getConnection(const Transform::Port &src,
-        const Transform::Port &dst) const {
-    for (const auto& connection : network.getConnections()) {
-        if (connection.from() == src.getPortInfo() && connection.to() == dst.getPortInfo())
-            return Connection(network, src.getPortInfo(), dst.getPortInfo());
-    }
-    THROW_IE_EXCEPTION << "Connection " << getLayer(src.getPortInfo().layerId()).getName()
-        << " -> " << getLayer(dst.getPortInfo().layerId()).getName() << " was not found!";
-}
diff --git a/inference-engine/src/inference_engine/transform/transform_network.hpp b/inference-engine/src/inference_engine/transform/transform_network.hpp
deleted file mode 100644 (file)
index a712203..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ie_parameter.hpp>
-#include <ie_builders.hpp>
-#include <string>
-#include <vector>
-#include <memory>
-#include <map>
-
-namespace InferenceEngine {
-namespace Transform {
-
-class Connection;
-class Layer;
-
-class INFERENCE_ENGINE_API_CLASS(Port) {
-public:
-    Port(Builder::Network& network, PortInfo port, bool isInput);
-    PortData::Ptr getData() const;
-    const std::map<std::string, Parameter>& getParameters() const;
-    Layer getLayer() const;
-    Connection getConnection() const;
-    void connect(const Port& port);
-    void disconnect();
-    const SizeVector& shape() const;
-    PortInfo getPortInfo() const;
-    bool operator==(const Port& rObj) const;
-    bool operator!=(const Port& rObj) const;
-
-private:
-    Builder::Network& network;
-    PortInfo port;
-    bool input;
-
-    friend class Connection;
-};
-
-class INFERENCE_ENGINE_API_CLASS(Layer) {
-public:
-    Layer(Builder::Network& network, idx_t id);
-    Port getInPort() const;
-    Port getInPort(idx_t idx) const;
-    std::vector<Port> getInPorts() const;
-    Port getOutPort() const;
-    Port getOutPort(idx_t idx) const;
-    std::vector<Port> getOutPorts() const;
-
-    void setParameter(const std::string& key, const Parameter& value);
-    Parameter& getParameter(const std::string& value) const;
-
-    idx_t getId() const;
-    std::string getName() const;
-    std::string getType() const;
-    operator Builder::Layer::Ptr() const;
-
-private:
-    Builder::Network& network;
-    idx_t layerId;
-
-    Builder::Layer::Ptr getLayer() const;
-};
-
-class INFERENCE_ENGINE_API_CLASS(Connection) {
-public:
-    explicit Connection(const Port& port);
-    Connection(Builder::Network& network, const InferenceEngine::Connection& connection);
-    Connection(Builder::Network& network, const PortInfo& inPort, const PortInfo& outPort);
-    Connection(Builder::Network& network, const PortInfo& inPort, const std::vector<PortInfo>& outPorts);
-
-    Port getSource() const;
-    void setSource(const Port& port);
-    Port getDestination() const;
-    Port getDestination(idx_t idx);
-    std::vector<Port> getDestinations() const;
-    void addDestination(const Port& port);
-    void setDestination(const Port& port);
-    void setDestinations(const std::vector<Port>& ports);
-    void remove();
-
-private:
-    Builder::Network& network;
-    PortInfo inPort;
-    std::vector<PortInfo> outPorts;
-
-    bool inPortExist() const;
-};
-
-class INFERENCE_ENGINE_API_CLASS(Network) {
-public:
-    explicit Network(Builder::Network& network): network(network) {}
-    virtual ~Network() = default;
-
-    Layer addLayer(const Builder::Layer& layer);
-    void removeLayer(const Layer& layer);
-    Layer getLayer(const std::string& name) const;
-    Layer getLayer(idx_t id) const;
-
-    Builder::Network& getBuilderNetwork() const;
-
-    Connection connect(const Layer& src, const Layer& dst);
-    Connection connect(const Port& src, const Port& dst);
-    void disconnect(const Layer& src, const Layer& dst);
-    void disconnect(const Port& src, const Port& dst);
-    Connection getConnection(const Layer& src, const Layer& dst) const;
-    Connection getConnection(const Port& src, const Port& dst) const;
-
-private:
-    Builder::Network& network;
-};
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformation.cpp b/inference-engine/src/inference_engine/transform/transformation.cpp
deleted file mode 100644 (file)
index 7852139..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <transform/transformation.hpp>
-#include <string>
-
-namespace InferenceEngine {
-namespace Transform {
-
-std::string Transformation::getName() const {
-    return name;
-}
-
-void Transformation::setName(const std::string& name) {
-    this->name = name;
-}
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformation.hpp b/inference-engine/src/inference_engine/transform/transformation.hpp
deleted file mode 100644 (file)
index 6a9a13d..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <transform/transform_network.hpp>
-#include <string>
-#include <vector>
-#include <map>
-
-namespace InferenceEngine {
-namespace Transform {
-
-class Transformation {
-    std::string name;
-public:
-    std::string getName() const;
-    void setName(const std::string& name);
-    virtual ~Transformation() = default;
-    virtual void execute(Network& network) = 0;
-};
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.cpp b/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.cpp
deleted file mode 100644 (file)
index 19e76f9..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "eltwise_broadcast.hpp"
-#include "builders/ie_network_builder.hpp"
-#include "builders/ie_reshape_layer.hpp"
-#include "builders/ie_tile_layer.hpp"
-#include "debug.h"
-#include <string>
-#include <vector>
-#include <iostream>
-
-namespace InferenceEngine {
-namespace Transform {
-
-TransformationEltwiseBroadcast::TransformationEltwiseBroadcast() {
-    this->setName("ie.transform.eltwise_broadcast");
-}
-
-void insertTileOverDimension(Transform::Network& network, Transform::Port& inputPort, size_t axis, size_t tile) {
-    auto tileLayerBuilder = Builder::TileLayer("Tile" + std::to_string(axis) + "_" + std::to_string(tile)).setAxis(axis).setTiles(tile);
-    auto tileLayer = network.addLayer(tileLayerBuilder);
-    inputPort.getConnection().setDestination(tileLayer.getInPort());
-    tileLayer.getOutPort().connect(inputPort);
-}
-
-void TransformationEltwiseBroadcast::execute(Network& network) {
-    for (auto layer : network.getBuilderNetwork()) {
-        if (layer->getType() == "Eltwise") {
-            auto eltwiseLayer = network.getLayer(layer->getName());
-            auto outShape = eltwiseLayer.getOutPort(0).shape();
-            for (auto& eltwiseInPort : eltwiseLayer.getInPorts()) {
-                auto inShape = eltwiseInPort.shape();
-                // if shape lengths are not equal then insert Reshape with shape prepended with ones
-                if (inShape.size() < outShape.size()) {
-                    std::vector<int> reshapeDims(inShape.begin(), inShape.end());
-                    reshapeDims.insert(reshapeDims.begin(), outShape.size() - inShape.size(), 1);
-                    auto reshapeLayerBuilder = Builder::ReshapeLayer(eltwiseInPort.getLayer().getName() + "/Reshape").setDims(reshapeDims);
-                    auto reshapeLayer = network.addLayer(reshapeLayerBuilder);
-                    eltwiseInPort.getConnection().setDestination(reshapeLayer.getInPort());
-                    reshapeLayer.getOutPort().connect(eltwiseInPort);
-                    SizeVector newOutShape(reshapeDims.size());
-                    // update shape of the Port
-                    for (size_t ind = 0; ind < reshapeDims.size(); ++ind)
-                        newOutShape[ind] = reshapeDims[ind];
-                    eltwiseInPort.getData()->setShape(newOutShape);
-                    inShape = newOutShape;
-                }
-                for (size_t axis = 0; axis < inShape.size(); ++axis) {
-                    if (inShape[axis] != outShape[axis]) {
-                        if (inShape[axis] != 1) {
-                            THROW_IE_EXCEPTION << "Layer " << layer->getName()
-                                               << " input has invalid shape "
-                                               << details::dumpVec(inShape)
-                                               << " which can not be broadcasted to output shape "
-                                               << details::dumpVec(outShape);
-                        }
-                        insertTileOverDimension(network, eltwiseInPort, axis, outShape[axis]);
-                    }
-                }
-            }
-        }
-    }
-}
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.hpp b/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.hpp
deleted file mode 100644 (file)
index 634a705..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <transform/transformation.hpp>
-
-namespace InferenceEngine {
-namespace Transform {
-
-class TransformationEltwiseBroadcast: public Transformation {
-public:
-    TransformationEltwiseBroadcast();
-    void execute(Network& network) override;
-};
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/lrn.cpp b/inference-engine/src/inference_engine/transform/transformations/lrn.cpp
deleted file mode 100644 (file)
index ab630a2..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "lrn.hpp"
-#include "builders/ie_network_builder.hpp"
-#include "builders/ie_power_layer.hpp"
-#include "builders/ie_eltwise_layer.hpp"
-#include "builders/ie_norm_layer.hpp"
-#include <iostream>
-#include <cmath>
-
-namespace InferenceEngine {
-namespace Transform {
-
-TransformationLRN::TransformationLRN() {
-    this->setName("ie.transform.lrn");
-}
-
-void TransformationLRN::execute(Network& network) {
-    for (auto layer : network.getBuilderNetwork()) {
-        if (layer->getType() == "LRN") {
-            auto lrnLayer = network.getLayer(layer->getName());
-            float scale_value = 1.0f / std::pow(static_cast<float>(lrnLayer.getParameter("bias")),
-                                                static_cast<float>(lrnLayer.getParameter("beta")));
-
-            auto normLayerBuilder = Builder::NormLayer(lrnLayer.getName() + "/Norm").
-                    setAlpha(static_cast<float>(lrnLayer.getParameter("alpha")) / static_cast<float>(lrnLayer.getParameter("bias"))).
-                    setSize(static_cast<unsigned int>(lrnLayer.getParameter("size"))).
-                    setBeta(static_cast<float>(lrnLayer.getParameter("beta"))).
-                    setAcrossMaps(true);
-            auto normLayer = network.addLayer(normLayerBuilder);
-
-            auto mulLayerBuilder = Builder::EltwiseLayer(lrnLayer.getName() + "/Mul").setEltwiseType(
-                    Builder::EltwiseLayer::EltwiseType::MUL);
-            auto mulLayer = network.addLayer(mulLayerBuilder);
-
-            auto tensorDesc = TensorDesc(Precision::FP32, SizeVector(4, 1), Layout::NCHW);
-            auto blob = make_shared_blob<float>(tensorDesc);
-            blob->allocate();
-            float *buffer = blob->buffer().as<PrecisionTrait<Precision::FP32>::value_type *>();
-            buffer[0] = scale_value;
-
-            auto constLayerBuilder = Builder::ConstLayer(mulLayerBuilder.getName() + "/Const").setData(blob);
-            auto constLayer = network.addLayer(constLayerBuilder);
-
-            // re-connect input of LRN layer to input of Norm layer
-            lrnLayer.getInPort().getConnection().setDestination(normLayer.getInPort());
-
-            // multiple output of Norm with a constant
-            mulLayer.getInPort(0).connect(normLayer.getOutPort());
-            mulLayer.getInPort(1).connect(constLayer.getOutPort());
-
-            // connect consumers of LRN with mul
-            lrnLayer.getOutPort().getConnection().setSource(mulLayer.getOutPort());
-
-            network.removeLayer(lrnLayer);
-        }
-    }
-}
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/lrn.hpp b/inference-engine/src/inference_engine/transform/transformations/lrn.hpp
deleted file mode 100644 (file)
index d17fede..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <transform/transformation.hpp>
-
-namespace InferenceEngine {
-namespace Transform {
-
-class TransformationLRN: public Transformation {
-public:
-    TransformationLRN();
-    void execute(Network& network) override;
-};
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/sub.cpp b/inference-engine/src/inference_engine/transform/transformations/sub.cpp
deleted file mode 100644 (file)
index 5a3eeb8..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "sub.hpp"
-#include "builders/ie_network_builder.hpp"
-#include "builders/ie_power_layer.hpp"
-#include "builders/ie_eltwise_layer.hpp"
-#include <vector>
-#include <string>
-#include <iostream>
-
-namespace InferenceEngine {
-namespace Transform {
-
-TransformationSub::TransformationSub() {
-    this->setName("ie.transform.sub");
-}
-
-void TransformationSub::execute(Network& network) {
-    for (auto layer : network.getBuilderNetwork()) {
-        if (layer->getType() == "Eltwise" && layer->getParameters()["operation"].as<std::string>() == "sub") {
-            auto subLayer = network.getLayer(layer->getName());
-
-            auto powerLayerBuilder = Builder::PowerLayer(subLayer.getName() + "/Power").setPower(1.0f).setScale(-1.0f).setShift(0.0f);
-            auto powerLayer = network.addLayer(powerLayerBuilder);
-
-            auto eltwiseLayerBuilder = Builder::EltwiseLayer(subLayer.getName() + "/Add").setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM);
-            auto eltwiseLayer = network.addLayer(eltwiseLayerBuilder);
-
-            // negate the second input to the sub layer
-            subLayer.getInPort(1).getConnection().setDestination(powerLayer.getInPort());
-
-            // connect new eltwise with sum with two inputs
-            subLayer.getInPort(0).getConnection().setDestination(eltwiseLayer.getInPort(0));
-            eltwiseLayer.getInPort(1).connect(powerLayer.getOutPort());
-
-            // reconnect new eltwise with outputs of all eltwise with sub
-            subLayer.getOutPort().getConnection().setSource(eltwiseLayer.getOutPort());
-
-            network.removeLayer(subLayer);
-        }
-    }
-}
-
-}  // namespace Transform
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/sub.hpp b/inference-engine/src/inference_engine/transform/transformations/sub.hpp
deleted file mode 100644 (file)
index bcefc62..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <transform/transformation.hpp>
-
-namespace InferenceEngine {
-namespace Transform {
-
-class TransformationSub: public Transformation {
-public:
-    TransformationSub();
-    void execute(Network& network) override;
-};
-
-}  // namespace Transform
-}  // namespace InferenceEngine
index d100d51..4fa5611 100644 (file)
@@ -3,10 +3,31 @@
 //
 
 #pragma once
+
+#if defined(_WIN32)
+
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
+#ifndef NOMINMAX
+# define NOMINMAX
+# define NOMINMAX_UNDEF
+#endif
+
+#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
+# define _X86_
+#endif
+
+#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
+# define _AMD64_
+#endif
+
 #include <string>
-#if defined(WIN32)
-#include "w_unistd.h"
-#include "debug.h"
+#include <windef.h>
+#include <fileapi.h>
+#include <Winbase.h>
 #include <sys/stat.h>
 
 // Copied from linux libc sys/stat.h:
@@ -28,10 +49,17 @@ struct dirent {
 };
 
 class DIR {
-    WIN32_FIND_DATA FindFileData;
+    WIN32_FIND_DATAA FindFileData;
     HANDLE hFind;
     dirent *next;
 
+    static inline bool endsWith(const std::string &src, const char *with) {
+        int wl = static_cast<int>(strlen(with));
+        int so = static_cast<int>(src.length()) - wl;
+        if (so < 0) return false;
+        return 0 == strncmp(with, &src[so], wl);
+    }
+
 public:
     DIR(const DIR &other) = delete;
     DIR(DIR &&other) = delete;
@@ -39,14 +67,12 @@ public:
     DIR& operator=(DIR &&other) = delete;
 
     explicit DIR(const char *dirPath) : next(nullptr) {
-        // wchar_t  ws[1024];
-        // swprintf(ws, 1024, L"%hs\\*", dirPath);
         std::string ws = dirPath;
-        if (InferenceEngine::details::endsWith(ws, "\\"))
+        if (endsWith(ws, "\\"))
             ws += "*";
         else
             ws += "\\*";
-        hFind = FindFirstFile(ws.c_str(), &FindFileData);
+        hFind = FindFirstFileA(ws.c_str(), &FindFileData);
         FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
     }
 
@@ -71,7 +97,7 @@ public:
         size_t outSize;
         mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094);
         next = new dirent(wbuf);
-        FindFileData.dwReserved0 = FindNextFile(hFind, &FindFileData);
+        FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData);
         return next;
     }
 };
@@ -93,10 +119,20 @@ static struct dirent* readdir(DIR *dp) {
 static void closedir(DIR *dp) {
     delete dp;
 }
+
+#ifdef WIN32_LEAN_AND_MEAN_UNDEF
+# undef WIN32_LEAN_AND_MEAN
+# undef WIN32_LEAN_AND_MEAN_UNDEF
+#endif
+
+#ifdef NOMINMAX_UNDEF
+# undef NOMINMAX_UNDEF
+# undef NOMINMAX
+#endif
+
 #else
 
 #include <sys/types.h>
 #include <dirent.h>
 
 #endif
-
index 271bc56..201545b 100644 (file)
@@ -16,25 +16,38 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
     template <typename T>
     primitive_desc_iterator(const T &adesc, const mkldnn::primitive_attr &aattr, const engine &aengine) {
         mkldnn_primitive_desc_iterator_t result;
-        error::wrap_c_api(mkldnn_primitive_desc_iterator_create_v2(
-                &result, &adesc.data, aattr.get(), aengine.get(), nullptr),
-                          "could not create a primitive descriptor iterator");
-        reset(result);
+        auto sts = mkldnn_primitive_desc_iterator_create_v2(
+                &result, &adesc.data, aattr.get(), aengine.get(), nullptr);
+
+        if (sts == mkldnn_status_t::mkldnn_success)
+            reset(result);
+        else if (sts == mkldnn_status_t::mkldnn_unimplemented)
+            reset(nullptr);
+        else
+            THROW_IE_EXCEPTION << "could not create a primitive descriptor iterator";
     }
 
     template <typename T, typename TF>
     primitive_desc_iterator(const T &adesc, const mkldnn::primitive_attr &aattr,
             const engine &aengine, const TF &hint_fwd_primitive_desc) {
         mkldnn_primitive_desc_iterator_t result;
-        error::wrap_c_api(mkldnn_primitive_desc_iterator_create_v2(&result,
-                        &adesc.data,
-                        aattr.get(),
-                        aengine.get(),
-                        hint_fwd_primitive_desc.get()),
-                "could not create a primitive descriptor iterator");
-        reset(result);
+        auto sts = mkldnn_primitive_desc_iterator_create_v2(&result,
+                &adesc.data,
+                aattr.get(),
+                aengine.get(),
+                hint_fwd_primitive_desc.get());
+
+        if (sts == mkldnn_status_t::mkldnn_success)
+            reset(result);
+        else if (sts == mkldnn_status_t::mkldnn_unimplemented)
+            reset(nullptr);
+        else
+            THROW_IE_EXCEPTION << "could not create a primitive descriptor iterator";
     }
 
+    bool is_not_end() const {
+        return (handle::get() != nullptr);
+    }
 
     memory::primitive_desc fetch() const {
         memory::primitive_desc adesc;
@@ -46,9 +59,14 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
         return adesc;
     }
 
-    bool next() {
+    primitive_desc_iterator operator++(int) {
         mkldnn_status_t status = mkldnn_primitive_desc_iterator_next(get());
-        return status == mkldnn_status_t::mkldnn_success;
+        if (status == mkldnn_status_t::mkldnn_iterator_ends)
+            reset(nullptr);
+        else if (status != mkldnn_status_t::mkldnn_success)
+            THROW_IE_EXCEPTION << "could not get next iteration";
+
+        return *this;
     }
 
     memory::primitive_desc src_primitive_desc(size_t index = 0) const {
index af464ec..9debb88 100644 (file)
@@ -151,14 +151,14 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
     auto inputDesc = getInputDesc();
     auto outputDesc = getOutputDesc();
     if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) ||
-            (inputDesc.getDims()[0] != 1 && inputDesc != outputDesc))
+            (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && inputDesc != outputDesc))
         THROW_IE_EXCEPTION << "Cannot allocate memory. Nodes have primitive descriptors with different formats.";
     if (inputDesc.getLayout() == InferenceEngine::Layout::ANY)
         THROW_IE_EXCEPTION << "Cannot get input descriptor!";
 
     auto parentPtr = getParent();
     memoryPtr.reset(new MKLDNNMemory(parentPtr->getEngine()));
-    memoryPtr->Create(MKLDNNMemoryDesc(inputDesc), mem_ptr);
+    memoryPtr->Create(MKLDNNMemoryDesc(inputDesc), mem_ptr, false);  // no pads zeroing
     status = Status::Allocated;
 }
 
@@ -209,7 +209,7 @@ const MKLDNNDims& MKLDNNEdge::getDims() {
 
         dims = outDims.ndims() ? outDims : inDims;
 
-        if (!dims.ndims())
+        if (!(outDims.ndims() == 0 && inDims.ndims() == 0) && !dims.ndims())
             THROW_IE_EXCEPTION << "Cannot detect right dims for nodes " << getParent()->getName()
                                << " and " << getChild()->getName();
     }
@@ -549,7 +549,7 @@ MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() {
 }
 
 InferenceEngine::Blob::Ptr MKLDNNEdge::getBlob() {
-    if (!memoryPtr || !dims.ndims())
+    if (!memoryPtr)
         THROW_IE_EXCEPTION << "Cannot get blob! Edge isn't initialized.";
     InferenceEngine::TensorDesc desc = getDesc();
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
new file mode 100644 (file)
index 0000000..b7916b5
--- /dev/null
@@ -0,0 +1,244 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_metric_helpers.hpp>
+#include "mkldnn_exec_network.h"
+
+#include "mkldnn_async_infer_request.h"
+#include "mkldnn_infer_request.h"
+#include "mkldnn_memory_state.h"
+#include <ie_util_internal.hpp>
+#include <graph_tools.hpp>
+#include <cnn_network_int8_normalizer.hpp>
+#include <cpp_interfaces/ie_executor_manager.hpp>
+
+#include <algorithm>
+#include <unordered_set>
+
+using namespace MKLDNNPlugin;
+using namespace MKLDNNPlugin::cpu;
+using namespace InferenceEngine;
+using InferenceEngine::details::CNNNetworkInt8Normalizer;
+
+InferenceEngine::InferRequestInternal::Ptr
+MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+                                          InferenceEngine::OutputsDataMap networkOutputs) {
+    if (graphs.size() > 1)  // streams uses special requests that are not connected to graphs
+        return std::make_shared<MKLDNNGraphlessInferRequest>(networkInputs, networkOutputs);
+    else
+        return std::make_shared<MKLDNNInferRequest>(networkInputs, networkOutputs);
+}
+
+MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network,
+                                     const Config &cfg,
+                                     const MKLDNNExtensionManager::Ptr& extMgr) : extensionManager(extMgr) {
+    ICNNNetworkStats* pstats = nullptr;
+    StatusCode s = network.getStats(&pstats, nullptr);
+    // we are cloning network if we have statistics and we can transform network.
+    auto clonedNetwork = cloneNet(network);
+
+    if (Precision::FP16 == network.getPrecision()) {
+        clonedNetwork->setPrecision(Precision::FP32);
+    }
+    details::CNNNetworkIterator itLayer(static_cast<ICNNNetwork*>(clonedNetwork.get()));
+    while (itLayer != details::CNNNetworkIterator()) {
+        CNNLayer::Ptr layer = *itLayer;
+        convertLayerFP16toFP32(layer);
+        itLayer++;
+    }
+
+    if (s == StatusCode::OK && pstats && !pstats->isEmpty()) {
+        CNNNetworkInt8Normalizer cnnorm;
+        cnnorm.NormalizeNetwork(*clonedNetwork, *pstats);
+    }
+
+    MKLDNNGraph::ApplyUnrollPasses(static_cast<ICNNNetwork&>(*clonedNetwork));
+
+    if (cfg.batchLimit > 1) {
+        // check topology for applicability
+        if (!CanProcessDynBatch(*clonedNetwork)) {
+            THROW_IE_EXCEPTION << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!";
+        }
+    }
+    // check whether any (affinity-related) envs are set and if user requested thread binding
+    const bool bPinningRequested = !check_env_variables() && cfg.useThreadBinding;
+    // general #threads logic
+    const int env_threads = parallel_get_env_threads();
+    const int sockets = MKLDNNPlugin::cpu::getNumberOfCPUSockets();
+    // use logical cores only for single-socket targets in throughput mode
+    const int hw_cores = cfg.throughputStreams > 1 && sockets == 1 ? parallel_get_max_threads() : getNumberOfCPUCores();
+
+    const int threads = cfg.threadsNum ? cfg.threadsNum : (env_threads ? env_threads : hw_cores);
+    const int threads_per_stream = std::max(1, threads/cfg.throughputStreams);
+
+    // graph(s) initialization in taskExecutor threads (streams), in parallel (in case of streams)
+    std::vector<Task::Ptr> tasks;
+    const int workers_per_socket = std::max(1, static_cast<int>(std::ceil(static_cast<float>(cfg.throughputStreams)/sockets)));
+    for (int n = 0; n < cfg.throughputStreams; n++) {
+        MKLDNNGraph::Ptr _graph = std::make_shared<MKLDNNGraph>();
+        graphs.push_back(_graph);
+        auto task = std::make_shared<InferenceEngine::Task>([=, &cfg]() {
+            _graph->CreateArena(threads_per_stream);
+
+            if (bPinningRequested) {
+                _graph->CreateObserver(n, threads_per_stream);
+            }
+
+            _graph->setConfig(cfg);
+            int socket = n / workers_per_socket;
+            _graph->CreateGraph(static_cast<ICNNNetwork&>(*clonedNetwork), extensionManager, socket);
+            if (cfg.throughputStreams > 1)  // for streams, each worker thread has it's own graph
+                MKLDNNPlugin::MultiWorkerTaskExecutor::ptrContext.ptrGraph = _graph;
+        });
+        tasks.push_back(task);
+    }
+
+    if (cfg.throughputStreams > 1) {
+        // special executor with as many threads as requested #streams, each with it's own initialization task
+        _taskExecutor = std::make_shared<MultiWorkerTaskExecutor>(tasks);
+    } else {
+        if (cfg.exclusiveAsyncRequests) {
+            // special case when all InferRequests are muxed into a single queue
+            ExecutorManager *executorManager = ExecutorManager::getInstance();
+            _taskExecutor = executorManager->getExecutor("CPU");
+        }
+        _taskExecutor->startTask(tasks[0]);
+        Task::Status sts = tasks[0]->wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+    }
+    for (auto t : tasks)
+        t->checkException();
+
+    // Save all MemoryLayer data tensors. Will use insight about mechanics
+    // of MemoryLayer implementation. It uses output edge of MemoryLayer
+    // producer as storage for tensor to keep it between infer calls.
+    if (graphs.size() == 1) {
+        for (auto &node : graphs[0]->GetNodes()) {
+            if (node->getType() == MemoryInput) {
+                auto state_store = node->getChildEdgeAt(0)->getMemoryPtr();
+                auto state_name = node->getName();
+
+                // Remove suffix with pair ID. Internal information.
+                auto suffix_idx = state_name.find("/id=");
+                if (suffix_idx != std::string::npos)
+                    state_name = state_name.substr(0, suffix_idx);
+
+                memoryStates.emplace_back(new MKLDNNMemoryState(state_name, state_store));
+            }
+        }
+    }
+}
+
+void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
+    for (auto g : graphs)
+        g->setProperty(properties);
+}
+
+void MKLDNNExecNetwork::CreateInferRequest(InferenceEngine::IInferRequest::Ptr &asyncRequest) {
+    auto syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
+    syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
+    auto asyncRequestImpl = std::make_shared<MKLDNNAsyncInferRequest>(syncRequestImpl, _taskExecutor,
+                                                                      _taskSynchronizer, _callbackExecutor);
+    asyncRequest.reset(new InferRequestBase<MKLDNNAsyncInferRequest>(asyncRequestImpl),
+                       [](IInferRequest *p) { p->Release(); });
+
+    asyncRequestImpl->SetPointerToPublicInterface(asyncRequest);
+
+    if (graphs.size() == 1) {  // single-stream (legacy/hetero) case - single graph for all requests
+        auto mkldnnSyncRequest = dynamic_cast<MKLDNNInferRequest *>(syncRequestImpl.get());
+        if (!mkldnnSyncRequest)
+            THROW_IE_EXCEPTION << " Cannot get mkldnn sync request.";
+        mkldnnSyncRequest->SetGraph(graphs[0]);
+    }
+}
+
+void MKLDNNExecNetwork::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) {
+    graphPtr = graphs[0]->dump();
+}
+
+void MKLDNNExecNetwork::GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const {
+    Config engConfig = graphs[0]->getProperty();
+    auto option = engConfig._config.find(name);
+    if (option != engConfig._config.end()) {
+        result = option->second;
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork config key: " << name;
+    }
+}
+
+void MKLDNNExecNetwork::GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const {
+    if (name == METRIC_KEY(NETWORK_NAME)) {
+        result = IE_SET_METRIC(NETWORK_NAME, graphs[0]->dump()->getName());
+    } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
+        std::vector<std::string> metrics;
+        metrics.push_back(METRIC_KEY(NETWORK_NAME));
+        metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
+        metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+        metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
+        result = IE_SET_METRIC(SUPPORTED_METRICS, metrics);
+    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
+        std::vector<std::string> configKeys;
+        for (auto && key : graphs[0]->getProperty()._config) {
+            configKeys.push_back(key.first);
+        }
+        result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys);
+    } else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
+        Config engConfig = graphs[0]->getProperty();
+        auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
+        IE_ASSERT(option != engConfig._config.end());
+        result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast<unsigned int>(std::stoi(option->second)));
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;
+    }
+}
+
+bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::ICNNNetwork &network) const {
+    InputsDataMap inputs;
+    network.getInputsInfo(inputs);
+
+    CNNLayerSet inputLayers;
+    std::unordered_set<CNNLayer *> allLayers;
+
+    if (inputs.empty())
+        return false;
+
+    auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
+    if (secondLayers.empty())
+        return false;
+
+    bool check_result = true;
+    details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
+        auto type = TypeFromName(layer->type);
+        // This is WA for Tile layer
+        auto tileLayer = dynamic_cast<TileLayer *>(layer.get());
+        if (tileLayer && tileLayer->axis)
+            return;
+
+        if (type != Input &&
+            type != Output &&
+            type != Convolution &&
+            type != Deconvolution &&
+            type != Activation &&
+            type != Depthwise &&
+            type != Lrn &&
+            type != Pooling &&
+            type != FullyConnected &&
+            type != Gemm &&
+            type != SoftMax &&
+            type != Split &&
+            type != Concatenation &&
+            type != Power &&
+            type != Eltwise &&
+            type != Crop &&
+            type != BatchNormalization &&
+            type != Copy) {
+            check_result = false;
+        }
+    }, false);
+
+    return check_result;
+}
+
+std::vector<IMemoryStateInternal::Ptr> MKLDNNExecNetwork::QueryState() {
+    return memoryStates;
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
new file mode 100644 (file)
index 0000000..2f70ac2
--- /dev/null
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
+
+#include "mkldnn_graph.h"
+#include "mkldnn_extension_mngr.h"
+
+#include <vector>
+#include <memory>
+#include <map>
+#include <string>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefault {
+public:
+    typedef std::shared_ptr<MKLDNNExecNetwork> Ptr;
+
+    InferenceEngine::InferRequestInternal::Ptr
+    CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+              InferenceEngine::OutputsDataMap networkOutputs) override;
+
+    void CreateInferRequest(InferenceEngine::IInferRequest::Ptr &asyncRequest) override;
+
+    MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network, const Config &cfg,
+                      const MKLDNNExtensionManager::Ptr& extMgr);
+
+    virtual ~MKLDNNExecNetwork() {
+        graphs.clear();
+        extensionManager.reset();
+    }
+
+    void setProperty(const std::map<std::string, std::string> &properties);
+
+    void GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const override;
+
+    void GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const override;
+
+    void GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) override;
+
+    std::vector<IMemoryStateInternal::Ptr> QueryState() override;
+
+protected:
+    MKLDNNExtensionManager::Ptr extensionManager;
+    std::vector<MKLDNNGraph::Ptr> graphs;
+    std::vector<IMemoryStateInternal::Ptr> memoryStates;
+
+    bool CanProcessDynBatch(const InferenceEngine::ICNNNetwork &network) const;
+};
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
index e687e43..96bd3c7 100644 (file)
@@ -90,6 +90,8 @@ InferenceEngine::TensorDesc MKLDNNExtensionUtils::getUninitTensorDesc(const Infe
 bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2) {
     if (desc1.getDims() != desc2.getDims() || desc1.getPrecision() != desc2.getPrecision())
         return false;
+    if (desc1.getLayout() == InferenceEngine::Layout::SCALAR && desc2.getLayout() == InferenceEngine::Layout::SCALAR)
+        return true;
     if (desc1.getLayout() == InferenceEngine::Layout::ANY || desc2.getLayout() == InferenceEngine::Layout::ANY)
         return true;
     bool batch1 = desc1.getDims()[0] == 1;
index 494accc..6cdf8c0 100644 (file)
 #include <memory>
 #include <utility>
 
-#include "details/caseless.hpp"
-
-#include "ie_metric_helpers.hpp"
 #include "mkldnn_graph.h"
+#include "mkldnn_graph_dumper.h"
 #include "mkldnn_graph_optimizer.h"
-#include <debug.h>
+#include "mkldnn_extension_utils.h"
+#include "mkldnn_extension_mngr.h"
+#include "mkldnn_memory_solver.hpp"
 #include <nodes/mkldnn_input_node.h>
 #include <nodes/mkldnn_reorder_node.h>
-#include <nodes/mkldnn_depthwise_node.h>
-#include <nodes/mkldnn_conv_node.h>
 
-#include "mkldnn_extension_utils.h"
-#include "mkldnn_extension_mngr.h"
-#include "mkldnn/omp_manager.h"
+#include <debug.h>
 #include <graph_tools.hpp>
-#include <cpp_interfaces/ie_executor_manager.hpp>
-#include "ie_algorithm.hpp"
-#include "memory_solver.hpp"
-#include "mkldnn_infer_request.h"
-#include "mkldnn_async_infer_request.h"
+#include <ie_algorithm.hpp>
 #include <blob_factory.hpp>
-#include <ie_util_internal.hpp>
 #include <net_pass.h>
 #include <details/ie_cnn_network_tools.h>
-
-#include <mkldnn_graph_dumper.h>
-
-#include <data_stats.h>
-#include "cnn_network_int8_normalizer.hpp"
-#include "ie_memcpy.h"
-
-#include "precision_utils.h"
-#include <ie_plugin_config.hpp>
+#include <ie_memcpy.h>
 
 #define XBYAK_NO_OP_NAMES
 #define XBYAK_UNDEF_JNL
 #include "../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
 
-#include "cnn_network_stats_impl.hpp"
-
 #include "utils/blob_dump.h"
-#include "mkldnn_plugin.h"
 
 /*****************************************************
  * Debug capability
@@ -328,7 +308,11 @@ void MKLDNNGraph::Replicate(const ICNNNetwork &network, const MKLDNNExtensionMan
         inputNodes[input.first] = layer2node[inputLayer];
 
         // Loading mean images
-        MKLDNNDims outDims(inputNodes[input.first]->getChildEdgeAt(0)->getDims());
+        MKLDNNDims outDims;
+        if (!inputNodes[input.first]->getChildEdgeAt(0)->getDims().ndims())
+            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
+        else
+            outDims = MKLDNNDims(inputNodes[input.first]->getChildEdgeAt(0)->getDims());
         if (inputs.find(input.first) != inputs.end()) {
             InputInfo::Ptr ii = inputs[input.first];
             if (ii && ii->getPreProcess().getNumberOfChannels()) {
@@ -629,6 +613,13 @@ void MKLDNNGraph::AllocateWithReuse() {
                 // !! Fallback to individual memory allocation !!
                 // if you like to check infer without reuse just call this function without arguments.
                 edge->allocate(workspace_ptr + offset * alignment);  // alignment in byte
+
+                // TODO: WA for some test (like strided_slice_test) which use tensors with
+                //       shapes {0}. And it is implisitly converted into {1} tensor.
+                //       Zeroing of input data allow pass tests.
+                if (edge->getParent()->type == Input)
+                    edge->getMemoryPtr()->FillZero();
+
                 count++;
             }
         }
@@ -652,8 +643,11 @@ void MKLDNNGraph::Allocate() {
     for (auto& edge : graphEdges) edge->validate();
 }
 
-void MKLDNNGraph::CreatePrimitives() {
+void MKLDNNGraph::CreatePrimitives() { IE_PROFILING_AUTO_SCOPE(MKLDNNGraph::CreatePrimitives)
+    bool weights_caching = config.throughputStreams != 1;
     for (auto& node : graphNodes) {
+        // disable caching if graph was created only once
+        node->enableWeightCaching(weights_caching);
         node->createPrimitive();
     }
 }
@@ -1080,202 +1074,3 @@ void MKLDNNGraph::do_after(const std::string &dir, const MKLDNNNodePtr &node) {
 InferenceEngine::ICNNNetwork::Ptr MKLDNNGraph::dump() const {
     return dump_graph_as_ie_net(*this);
 }
-
-bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::ICNNNetwork &network) const {
-    InputsDataMap inputs;
-    network.getInputsInfo(inputs);
-
-    CNNLayerSet inputLayers;
-    std::unordered_set<CNNLayer *> allLayers;
-
-    if (inputs.empty())
-        return false;
-
-    auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
-    if (secondLayers.empty())
-        return false;
-
-    bool check_result = true;
-    details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
-        auto type = TypeFromName(layer->type);
-        // This is WA for Tile layer
-        auto tileLayer = dynamic_cast<TileLayer *>(layer.get());
-        if (tileLayer && tileLayer->axis)
-            return;
-
-        if (type != Input &&
-            type != Output &&
-            type != Convolution &&
-            type != Deconvolution &&
-            type != Activation &&
-            type != Depthwise &&
-            type != Lrn &&
-            type != Pooling &&
-            type != FullyConnected &&
-            type != Gemm &&
-            type != SoftMax &&
-            type != Split &&
-            type != Concatenation &&
-            type != Power &&
-            type != Eltwise &&
-            type != Crop &&
-            type != BatchNormalization &&
-            type != Copy) {
-            check_result = false;
-        }
-    }, false);
-
-    return check_result;
-}
-
-InferenceEngine::InferRequestInternal::Ptr
-MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
-                                          InferenceEngine::OutputsDataMap networkOutputs) {
-    if (graphs.size() > 1)  // streams uses special requests that are not connected to graphs
-        return std::make_shared<MKLDNNGraphlessInferRequest>(networkInputs, networkOutputs);
-    else
-        return std::make_shared<MKLDNNInferRequest>(networkInputs, networkOutputs);
-}
-
-MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network,
-                                     const Config &cfg,
-                                     const MKLDNNExtensionManager::Ptr& extMgr) : extensionManager(extMgr) {
-    ICNNNetworkStats* pstats = nullptr;
-    StatusCode s = network.getStats(&pstats, nullptr);
-    // we are cloning network if we have statistics and we can transform network.
-    auto clonedNetwork = cloneNet(network);
-
-    if (Precision::FP16 == network.getPrecision()) {
-        clonedNetwork->setPrecision(Precision::FP32);
-    }
-    details::CNNNetworkIterator itLayer(reinterpret_cast<ICNNNetwork *>(clonedNetwork.get()));
-    while (itLayer != details::CNNNetworkIterator()) {
-        CNNLayer::Ptr layer = *itLayer;
-        convertLayerFP16toFP32(layer);
-        itLayer++;
-    }
-
-    if (s == StatusCode::OK && pstats && !pstats->isEmpty()) {
-        CNNNetworkInt8Normalizer cnnorm;
-        cnnorm.NormalizeNetwork(*clonedNetwork, *pstats);
-    }
-
-    MKLDNNGraph::ApplyUnrollPasses(*clonedNetwork);
-
-    if (cfg.batchLimit > 1) {
-        // check topology for applicability
-        if (!CanProcessDynBatch(*clonedNetwork)) {
-            THROW_IE_EXCEPTION << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!";
-        }
-    }
-    // check whether any (affinity-related) envs are set and if user requested thread binding
-    const bool bPinningRequested = !check_env_variables() && cfg.useThreadBinding;
-    // general #threads logic
-    const int env_threads = parallel_get_env_threads();
-    const int sockets = MKLDNNPlugin::cpu::getNumberOfCPUSockets();
-    // use logical cores only for single-socket targets in throughput mode
-    const int hw_cores = cfg.throughputStreams > 1 && sockets == 1 ? parallel_get_max_threads() : getNumberOfCPUCores();
-
-    const int threads = cfg.threadsNum ? cfg.threadsNum : (env_threads ? env_threads : hw_cores);
-    const int threads_per_stream = std::max(1, threads/cfg.throughputStreams);
-
-    // graph(s) initialization in taskExecutor threads (streams), in parallel (in case of streams)
-    std::vector<Task::Ptr> tasks;
-    const int workers_per_socket = std::max(1, static_cast<int>(std::ceil(static_cast<float>(cfg.throughputStreams)/sockets)));
-    for (int n = 0; n < cfg.throughputStreams; n++) {
-        MKLDNNGraph::Ptr _graph = std::make_shared<MKLDNNGraph>();
-        graphs.push_back(_graph);
-        auto task = std::make_shared<InferenceEngine::Task>([=, &cfg, &network]() {
-            _graph->CreateArena(threads_per_stream);
-
-            if (bPinningRequested) {
-                _graph->CreateObserver(n, threads_per_stream);
-            }
-
-            _graph->setConfig(cfg);
-            int socket = n / workers_per_socket;
-            _graph->CreateGraph(*clonedNetwork, extensionManager, socket);
-            if (cfg.throughputStreams > 1)  // for streams, each worker thread has it's own graph
-                MKLDNNPlugin::MultiWorkerTaskExecutor::ptrContext.ptrGraph = _graph;
-        });
-        tasks.push_back(task);
-    }
-
-    if (cfg.throughputStreams > 1) {
-        // special executor with as many threads as requested #streams, each with it's own initialization task
-        _taskExecutor = std::make_shared<MultiWorkerTaskExecutor>(tasks);
-    } else {
-        if (cfg.exclusiveAsyncRequests) {
-            // special case when all InferRequests are muxed into a single queue
-            ExecutorManager *executorManager = ExecutorManager::getInstance();
-            _taskExecutor = executorManager->getExecutor("CPU");
-        }
-        _taskExecutor->startTask(tasks[0]);
-        Task::Status sts = tasks[0]->wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
-    }
-    for (auto t : tasks)
-        t->checkException();
-}
-
-void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
-    for (auto g : graphs)
-        g->setProperty(properties);
-}
-
-void MKLDNNExecNetwork::CreateInferRequest(InferenceEngine::IInferRequest::Ptr &asyncRequest) {
-    auto syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
-    syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
-    auto asyncRequestImpl = std::make_shared<MKLDNNAsyncInferRequest>(syncRequestImpl, _taskExecutor,
-                                                                      _taskSynchronizer, _callbackExecutor);
-    asyncRequest.reset(new InferRequestBase<MKLDNNAsyncInferRequest>(asyncRequestImpl),
-                       [](IInferRequest *p) { p->Release(); });
-
-    asyncRequestImpl->SetPointerToPublicInterface(asyncRequest);
-
-    if (graphs.size() == 1) {  // single-stream (legacy/hetero) case - single graph for all requests
-        auto mkldnnSyncRequest = dynamic_cast<MKLDNNInferRequest *>(syncRequestImpl.get());
-        if (!mkldnnSyncRequest)
-            THROW_IE_EXCEPTION << " Cannot get mkldnn sync request.";
-        mkldnnSyncRequest->SetGraph(graphs[0]);
-    }
-}
-
-void MKLDNNExecNetwork::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) {
-    graphPtr = graphs[0]->dump();
-}
-
-void MKLDNNExecNetwork::GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const {
-    Config engConfig = graphs[0]->getProperty();
-    auto option = engConfig._config.find(name);
-    if (option != engConfig._config.end()) {
-        result = option->second;
-    } else {
-        THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork config key: " << name;
-    }
-}
-
-void MKLDNNExecNetwork::GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const {
-    if (name == METRIC_KEY(NETWORK_NAME)) {
-        result = IE_SET_METRIC(NETWORK_NAME, graphs[0]->dump()->getName());
-    } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
-        std::vector<std::string> metrics;
-        metrics.push_back(METRIC_KEY(NETWORK_NAME));
-        metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
-        metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
-        metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
-        result = IE_SET_METRIC(SUPPORTED_METRICS, metrics);
-    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
-        std::vector<std::string> configKeys;
-        for (auto && key : graphs[0]->getProperty()._config) {
-            configKeys.push_back(key.first);
-        }
-        result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys);
-    } else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
-        Config engConfig = graphs[0]->getProperty();
-        auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
-        IE_ASSERT(option != engConfig._config.end());
-        result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast<unsigned int>(std::stoi(option->second)));
-    } else {
-        THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;
-    }
-}
index f02d982..057fb82 100644 (file)
@@ -4,23 +4,19 @@
 
 #pragma once
 
-#include <map>
-#include <string>
-#include <vector>
-#include <memory>
-#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
-
 #include "ie_parallel.hpp"
-#include "mkldnn_memory.h"
 #include "config.h"
-#include "perf_count.h"
-#include "mkldnn_dims.h"
+#include "mkldnn_memory.h"
 #include "mean_image.h"
 #include "mkldnn_node.h"
 #include "mkldnn_edge.h"
-#include "mkldnn_extension_utils.h"
 #include "mkldnn_streams.h"
 
+#include <map>
+#include <string>
+#include <vector>
+#include <memory>
+
 namespace MKLDNNPlugin {
 
 class MKLDNNGraph {
@@ -186,37 +182,4 @@ private:
     };
 };
 
-
-class MKLDNNExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefault {
-public:
-    typedef std::shared_ptr<MKLDNNExecNetwork> Ptr;
-
-    InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
-                                                                      InferenceEngine::OutputsDataMap networkOutputs) override;
-
-    void CreateInferRequest(InferenceEngine::IInferRequest::Ptr &asyncRequest) override;
-
-    MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network, const Config &cfg,
-                      const MKLDNNExtensionManager::Ptr& extMgr);
-
-    ~MKLDNNExecNetwork() {
-        graphs.clear();
-        extensionManager.reset();
-    }
-
-    void setProperty(const std::map<std::string, std::string> &properties);
-
-    void GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const override;
-
-    void GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const override;
-
-    void GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) override;
-
-protected:
-    std::vector<MKLDNNGraph::Ptr> graphs;
-    MKLDNNExtensionManager::Ptr extensionManager;
-
-    bool CanProcessDynBatch(const InferenceEngine::ICNNNetwork &network) const;
-};
-
 }  // namespace MKLDNNPlugin
index 5a3728e..7ff46e5 100644 (file)
@@ -2,24 +2,28 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <blob_factory.hpp>
-#include "nodes/mkldnn_reshape_node.h"
 #include "mkldnn_graph_optimizer.h"
-#include <nodes/mkldnn_activation_node.h>
+
+#include "mkldnn_extension_utils.h"
+#include "nodes/mkldnn_reshape_node.h"
+#include "nodes/mkldnn_activation_node.h"
 #include "nodes/mkldnn_pooling_node.h"
 #include "nodes/mkldnn_eltwise_node.h"
 #include "nodes/mkldnn_depthwise_node.h"
 #include "nodes/mkldnn_concat_node.h"
 #include "nodes/mkldnn_reorder_node.h"
+#include "nodes/mkldnn_conv_node.h"
+#include "nodes/mkldnn_bin_conv_node.h"
+#include "nodes/mkldnn_quantize_node.h"
+
+#include <blob_factory.hpp>
+#include <ie_layers_internal.hpp>
+#include <cpu_isa_traits.hpp>
 
 #include <string>
 #include <list>
 #include <memory>
 #include <set>
-#include <ie_layers_internal.hpp>
-#include <nodes/mkldnn_bin_conv_node.h>
-#include <nodes/mkldnn_quantize_node.h>
-#include "cpu_isa_traits.hpp"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -111,8 +115,8 @@ void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) {
         // TODO: Rewrite topology optimizer at all. it should be clean and understandable
         auto concat = conv->getChildEdgeAt(0)->getChild();
         // Merge and remove Convolution
-        for (size_t i = 1; i < split->getChildEdges().size(); i++) {
-            auto peerInEdge = split->getChildEdgeAt(i);
+        while (split->getChildEdges().size() > 1) {
+            auto peerInEdge = split->getChildEdgeAt(1);
             auto peer = peerInEdge->getChild();
             conv->mergeWith(peer);
             convInDims[1] += (peerInEdge->getDims())[1];
@@ -537,16 +541,28 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
         auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
         auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
 
-        bool isSutableParent1 = (parent1->getType() == Convolution && parent1->fusedWith.empty()) ||
-                                parent1->getType() == BinaryConvolution;
-        bool isSutableParent2 = (parent2->getType() == Convolution && parent2->fusedWith.empty()) ||
-                                parent2->getType() == BinaryConvolution;
+        bool isSutableParent1 = parent1->getType() == Convolution || parent1->getType() == BinaryConvolution;
+        bool isSutableParent2 = parent2->getType() == Convolution || parent2->getType() == BinaryConvolution;
+
+        auto* parentNode1 = dynamic_cast<MKLDNNConvolutionNode *>(parent1.get());
+        if (parentNode1) {
+            if (parentNode1->getCnnLayer()->precision == Precision::FP32) {
+                isSutableParent1 = isSutableParent1 && parentNode1->getFusedWith().empty();
+            }
+        }
+
+        auto* parentNode2 = dynamic_cast<MKLDNNConvolutionNode *>(parent2.get());
+        if (parentNode2) {
+            if (parentNode2->getCnnLayer()->precision == Precision::FP32) {
+                isSutableParent2 = isSutableParent2 && parentNode2->getFusedWith().empty();
+            }
+        }
 
         if (!isSutableParent1 && !isSutableParent2)
             continue;
 
-        auto mergedConv = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent1 : parent2;
-        auto peerNode = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent2 : parent1;
+        auto mergedConv = isSutableParent1 ? parent1 : parent2;
+        auto peerNode = isSutableParent1 ? parent2 : parent1;
         if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
             mergedConv->getChildEdges().size() != 1) {
             mergedConv = parent2;
index 0ab8ed5..68929cd 100644 (file)
@@ -48,7 +48,7 @@ void MKLDNNMemory::Create(memory::dims dims, memory::data_type data_type, memory
     Create(desc, data);
 }
 
-void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data) {
+void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bool pads_zeroing) {
     auto primitive_desc = memory::primitive_desc(desc, eng);
     uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(desc.data.data_type));
 
@@ -64,13 +64,25 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data) {
                 real_size *= prim->get_primitive_desc().desc().data.layout_desc.blocking.padding_dims[i];
             }
         }
-        uint8_t* dataPtr = static_cast<uint8_t*>(GetData());
-        dataPtr += itemSize * prim->get_primitive_desc().desc().data.layout_desc.blocking.offset_padding;
-
-        memset(dataPtr, 0, real_size * itemSize);
     } else {
         // MKLDNN accepts not a const data, probably need to remove some level of consteness in a call stack
-        prim.reset(new memory(primitive_desc, const_cast<void*>(data)));
+
+        // ========================
+        // Equivalent of constructor memory(const primitive_desc &desc, void *hdl)
+        // but with ability to skipp pads zeroing.
+        mkldnn_primitive_t result;
+        error::wrap_c_api(mkldnn_primitive_create(&result, primitive_desc.get(), nullptr, nullptr),
+                "could not create a memory primitive");
+        auto *mem = new memory(nullptr);
+        mem->reset(result);
+        if (pads_zeroing)
+            mem->set_data_handle(const_cast<void*>(data));
+        else
+            mem->set_data_handle_no_pads_proc(const_cast<void*>(data));
+        //
+        // ========================
+
+        prim.reset(mem);
     }
 }
 
@@ -83,10 +95,10 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format format, co
 
         std::vector<ptrdiff_t> dims(memData.dims, memData.dims + memData.ndims);
 
-        auto dataType = GetDataType();
+        auto data_type = GetDataType();
 
         MKLDNNMemory src(eng);
-        src.Create(dims, dataType, format, data);
+        src.Create(dims, data_type, format, data);
 
         std::shared_ptr<mkldnn::reorder> pReorder =
                 std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(src.GetPrimitive(), GetPrimitive()));
@@ -238,6 +250,8 @@ bool MKLDNNMemory::IsPlainFormat(memory::format format) {
 
 memory::format MKLDNNMemory::GetPlainFormat(memory::dims dims) {
     switch (dims.size()) {
+        case 0:
+            return memory::x;
         case 1:
             return memory::x;
         case 2:
@@ -313,6 +327,8 @@ memory::format MKLDNNMemory::Convert(const InferenceEngine::Layout layout) {
             return memory::nc;
         case C:
             return memory::x;
+        case SCALAR:
+            return memory::x;
         default:
             return memory::blocked;
     }
@@ -437,7 +453,12 @@ MKLDNNMemoryDesc::operator mkldnn::memory::desc() const {
 MKLDNNMemoryDesc::MKLDNNMemoryDesc(mkldnn::memory::dims dims, mkldnn::memory::data_type dataType,
                                    mkldnn::memory::format format): desc(dims, dataType, mkldnn::memory::any) {
     if (format != memory::blocked) {
-        desc = mkldnn::memory::desc(dims, dataType, format);
+        if (format == memory::x && dims.size() == 0) {
+            desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format);
+            MKLDNNMemory::CreateBlockingDesc(desc);
+        } else {
+            desc = mkldnn::memory::desc(dims, dataType, format);
+        }
         return;
     }
     MKLDNNMemory::CreateBlockingDesc(desc);
index 0a047dd..f11a9a3 100644 (file)
@@ -101,7 +101,7 @@ public:
     void Create(mkldnn::memory::dims dims, mkldnn::memory::data_type data_type, mkldnn::memory::format format,
                 const void* data = nullptr);
 
-    void Create(const mkldnn::memory::desc& desc, const void* data = nullptr);
+    void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true);
 
     void SetData(mkldnn::memory::data_type dataType, mkldnn::memory::format format, const void* data, size_t size, bool ftz = true) const;
     void SetData(const MKLDNNMemory& memory, bool ftz = true) const;
@@ -2,15 +2,15 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "memory_solver.hpp"
+#include "mkldnn_memory_solver.hpp"
 
-#include "details/ie_exception.hpp"
+#include <details/ie_exception.hpp>
 
 #include <algorithm>
 #include <vector>
 #include <map>
 
-namespace InferenceEngine {
+namespace MKLDNNPlugin {
 
 MemorySolver::MemorySolver(const std::vector<Box>& boxes) : _boxes(boxes) {
     int max_ts = 0;
@@ -133,4 +133,4 @@ void MemorySolver::calcDepth() {
     }
 }
 
-}  // namespace InferenceEngine
+}  // namespace MKLDNNPlugin
@@ -13,7 +13,7 @@
 #include <vector>
 #include <map>
 
-namespace InferenceEngine {
+namespace MKLDNNPlugin {
 
 /**
  * @brief Helps to solve issue of optimal memory allocation only for particular
@@ -42,7 +42,7 @@ namespace InferenceEngine {
  *  Exec order is predefined.
  */
 
-class INFERENCE_ENGINE_API_CLASS(MemorySolver) {
+class MemorySolver {
 public:
     /** @brief Representation of edge (size and live time)*/
     struct Box {
@@ -89,4 +89,4 @@ private:
     void calcDepth();
 };
 
-}  // namespace InferenceEngine
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.cpp
new file mode 100644 (file)
index 0000000..aff1134
--- /dev/null
@@ -0,0 +1,35 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mkldnn_memory_state.h"
+#include "mkldnn_extension_utils.h"
+
+using namespace InferenceEngine;
+
+namespace MKLDNNPlugin {
+
+std::string  MKLDNNMemoryState::GetName() const {
+    return name;
+}
+
+void  MKLDNNMemoryState::Reset() {
+    storage->FillZero();
+}
+
+void  MKLDNNMemoryState::SetState(Blob::Ptr newState) {
+    auto prec = newState->getTensorDesc().getPrecision();
+    auto data_type = MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
+    auto data_layout = MKLDNNMemory::Convert(newState->getTensorDesc().getLayout());
+    auto data_ptr = newState->cbuffer().as<void*>();
+    auto data_size = newState->byteSize();
+
+    storage->SetData(data_type, data_layout, data_ptr, data_size);
+}
+
+InferenceEngine::Blob::CPtr MKLDNNMemoryState::GetLastState() const {
+    THROW_IE_EXCEPTION << "GetLastState method is not implemented for MemoryState";
+    return nullptr;
+}
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h
new file mode 100644 (file)
index 0000000..a2d94ab
--- /dev/null
@@ -0,0 +1,29 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cpp_interfaces/impl/ie_memory_state_internal.hpp"
+#include "mkldnn_memory.h"
+
+#include <string>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNMemoryState : public InferenceEngine::IMemoryStateInternal {
+public:
+    MKLDNNMemoryState(std::string name, MKLDNNMemoryPtr storage) :
+            name(name), storage(storage) {}
+
+    std::string GetName() const override;
+    void Reset() override;
+    void SetState(InferenceEngine::Blob::Ptr newState) override;
+    InferenceEngine::Blob::CPtr GetLastState() const override;
+
+private:
+    std::string name;
+    MKLDNNMemoryPtr storage;
+};
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
index 3739d31..7edda28 100644 (file)
@@ -382,7 +382,9 @@ const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) con
 
 
 std::vector<memory::format> MKLDNNNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
-    if (dims.ndims() == 1)
+    if (dims.ndims() == 0)
+        return {memory::format::x};
+    else if (dims.ndims() == 1)
         return {memory::format::x};
     else if (dims.ndims() == 2)
         return {memory::format::nc};
@@ -406,46 +408,42 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
         return;
 
     for (auto& desc : descs) {
-        try {
-            std::shared_ptr<primitive_desc_iterator> itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
-            do {
-                InferenceEngine::LayerConfig config;
-                config.dynBatchSupport = true;
-                for (size_t i = 0; i < desc.inputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(*itpd, i));
-                    config.inConfs.push_back(dataConfig);
-                }
+        auto itpd = desc.createPrimitiveDescriptorIterator(engine);
+        while (itpd.is_not_end()) {
+            InferenceEngine::LayerConfig config;
+            config.dynBatchSupport = true;
+            for (size_t i = 0; i < desc.inputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i));
+                config.inConfs.push_back(dataConfig);
+            }
 
-                std::vector<mkldnn::memory::format> outFormats;
-                for (size_t i = 0; i < desc.outputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = canBeInPlace() ? 0 : -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(*itpd, i));
-                    config.outConfs.push_back(dataConfig);
-
-                    auto primDesc = itpd->fetch();
-                    auto dstPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(dst_pd), 0);
-                    if (dstPrimDesc) {
-                        outFormats.emplace_back(static_cast<memory::format>(itpd->dst_primitive_desc().desc().data.format));
-                    } else {
-                        // This path is needed to correctly handle Deconvolution node
-                        auto diffSrcPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(diff_src_pd), 0);
-                        if (diffSrcPrimDesc) {
-                            outFormats.emplace_back(static_cast<memory::format>(itpd->diff_src_primitive_desc().desc().data.format));
-                        }
+            std::vector<mkldnn::memory::format> outFormats;
+            for (size_t i = 0; i < desc.outputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = canBeInPlace() ? 0 : -1;
+                dataConfig.constant = false;
+                dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i));
+                config.outConfs.push_back(dataConfig);
+
+                auto primDesc = itpd.fetch();
+                auto dstPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(dst_pd), 0);
+                if (dstPrimDesc) {
+                    outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
+                } else {
+                    // This path is needed to correctly handle Deconvolution node
+                    auto diffSrcPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(diff_src_pd), 0);
+                    if (diffSrcPrimDesc) {
+                        outFormats.emplace_back(static_cast<memory::format>(itpd.diff_src_primitive_desc().desc().data.format));
                     }
                 }
-                impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str());
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 
-                supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
-            } while (itpd->next());
-        } catch (std::exception& e) {
-            // it throw exception in case of no implementation found
-            continue;
+            supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
+            itpd++;
         }
     }
 }
@@ -468,47 +466,46 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
     InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
     size_t selected_count = 0;
     for (size_t j = 0; j < descs.size(); j++) {
-        try {
-            const auto &desc = descs[j];
-            std::shared_ptr<primitive_desc_iterator> itpd;
-            if (attr == nullptr) {
-                itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
-            } else {
-                itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine, *(attr.get())));
+        const auto &desc = descs[j];
+        std::shared_ptr<primitive_desc_iterator> itpd;
+        if (attr == nullptr) {
+            itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
+        } else {
+            itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine, *(attr.get())));
+        }
+        while (itpd->is_not_end()) {
+            InferenceEngine::LayerConfig cfg;
+            cfg.dynBatchSupport = true;
+            for (size_t i = 0; i < desc.inputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = canBeInPlace() ? 0 : -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getSrcMemDesc(*itpd, i);
+                cfg.inConfs.push_back(dataConfig);
             }
-            do {
-                InferenceEngine::LayerConfig cfg;
-                cfg.dynBatchSupport = true;
-                for (size_t i = 0; i < desc.inputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = canBeInPlace() ? 0 : -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(*itpd, i);
-                    cfg.inConfs.push_back(dataConfig);
-                }
 
-                for (size_t i = 0; i < desc.outputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(*itpd, i);
-                    cfg.outConfs.push_back(dataConfig);
-                }
-                impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str().c_str());
-                if (selected_count == selectedPrimitiveDescriptorIndex) {
-                    if (impl_type != selectedPD->getImplementationType()) {
-                        THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
-                    }
-                    rightConfig = cfg;
+            for (size_t i = 0; i < desc.outputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getDstMemDesc(*itpd, i);
+                cfg.outConfs.push_back(dataConfig);
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str().c_str());
+            if (selected_count == selectedPrimitiveDescriptorIndex) {
+                if (impl_type != selectedPD->getImplementationType()) {
+                    THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
                 }
-                if (j == descs.size() - 1) {
-                    if (impl_type == selectedPD->getImplementationType()) {
-                        rightConfig = config;
-                    }
+                rightConfig = cfg;
+            }
+            if (j == descs.size() - 1) {
+                if (impl_type == selectedPD->getImplementationType()) {
+                    rightConfig = config;
                 }
-                selected_count++;
-            } while (itpd->next());
-        } catch(...) {}
+            }
+            selected_count++;
+            (*itpd)++;
+        }
     }
 
     if (descs.empty()) {
@@ -615,30 +612,38 @@ void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::pri
     for (size_t i = 0; i < internalBlobs.size(); i++) {
         const auto &internalBlob = internalBlobs[i];
 
-        const uint64_t data_hash = Engine::GetWeightsSharing(socket)->GetHashFunc().hash(
-                internalBlob->buffer(), internalBlob->byteSize());
-        const std::string string_hash = name + "_" + std::to_string(i)
-                                     + "_" + std::to_string(internalBlob->byteSize())
-                                     + "_" + std::to_string(data_hash);
-        MKLDNNMemoryPtr ptr =
-                Engine::GetWeightsSharing(socket)->findOrCreate(string_hash, [&] () {
-                    MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(engine));
-                    _ptr->Create(intDescs[i]);
-                    MKLDNNMemory memory(engine);
-
-                    auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc());
-                    auto newFormat = newDesc.getFormat();
-                    if (newFormat == mkldnn::memory::ncdhw) {
-                        newFormat = mkldnn::memory::goihw;
-                    }
-                    if (newFormat == mkldnn::memory::nchw) {
-                        newFormat = mkldnn::memory::oihw;
-                    }
-                    memory.Create(MKLDNNMemoryDesc(newDesc.getDims(), newDesc.getDataType(), newFormat), internalBlob->buffer());
-                    auto aformat = memory.GetFormat();
-                    _ptr->SetData(memory);
-                    return _ptr;
-                });
+        auto create = [&] () {
+            MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(engine));
+            _ptr->Create(intDescs[i]);
+            MKLDNNMemory memory(engine);
+
+            auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc());
+            auto newFormat = newDesc.getFormat();
+            if (newFormat == mkldnn::memory::ncdhw) {
+                newFormat = mkldnn::memory::goihw;
+            }
+            if (newFormat == mkldnn::memory::nchw) {
+                newFormat = mkldnn::memory::oihw;
+            }
+            memory.Create(MKLDNNMemoryDesc(newDesc.getDims(), newDesc.getDataType(), newFormat), internalBlob->buffer());
+            auto aformat = memory.GetFormat();
+            _ptr->SetData(memory);
+            return _ptr;
+        };
+
+        MKLDNNMemoryPtr ptr;
+        if (weight_caching) {
+            const uint64_t data_hash = Engine::GetWeightsSharing(socket)->GetHashFunc().hash(
+                    internalBlob->buffer(), internalBlob->byteSize());
+
+            const std::string string_hash = name + "_" + std::to_string(i)
+                                            + "_" + std::to_string(internalBlob->byteSize())
+                                            + "_" + std::to_string(data_hash);
+
+            ptr = Engine::GetWeightsSharing(socket)->findOrCreate(string_hash, create);
+        } else {
+            ptr = create();
+        }
         internalBlobMemory.push_back(ptr);
     }
 }
@@ -928,10 +933,18 @@ int MKLDNNNode::batchToProcess() {
 
 int MKLDNNNode::getMaxBatch() {
     // FIXME: batch != 0 dims number
-    if (!inDims.empty())
-        return inDims[0][0];
-    if (!outDims.empty())
-        return outDims[0][0];
+    if (!inDims.empty()) {
+        if (inDims[0].ndims())
+            return inDims[0][0];
+        else
+            return 1;
+    }
+    if (!outDims.empty() && outDims[0].ndims()) {
+        if (outDims[0].ndims())
+            return outDims[0][0];
+        else
+            return 1;
+    }
     return 0;
 }
 
index 60206d8..04e61d0 100644 (file)
@@ -402,31 +402,28 @@ public:
             THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set for node " << getName() << ".";
 
         for (const auto& desc : descs) {
-            try {
-                mkldnn::primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine, attr);
-                do {
-                    std::vector<InferenceEngine::TensorDesc> srcDescs;
-                    for (size_t i = 0; i < desc.inputNumbers(); i++)
-                        srcDescs.push_back(getSrcMemDesc(itpd, i));
-
-                    std::vector<InferenceEngine::TensorDesc> dstDescs;
-                    for (size_t i = 0; i < desc.outputNumbers(); i++)
-                        dstDescs.push_back(getDstMemDesc(itpd, i));
-
-                    impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
-
-                    if (impl_type == selected_pd->getImplementationType() &&
-                        descsEqual(srcDescs, selected_pd->getConfig().inConfs) &&
-                        descsEqual(dstDescs, selected_pd->getConfig().outConfs)) {
-                        prepareMemory(selected_pd, itpd);
-                        PD prim_desc = createPd<PD, D, FPD>(desc);
-                        itpd.getPrimitiveDescriptor(prim_desc);
-                        return prim_desc;
-                    }
-                } while (itpd.next());
-            } catch (std::exception& e) {
-                // it throw exception in case of no implementation found
-                continue;
+            auto itpd = desc.createPrimitiveDescriptorIterator(engine, attr);
+
+            while (itpd.is_not_end())  {
+                std::vector<InferenceEngine::TensorDesc> srcDescs;
+                for (size_t i = 0; i < desc.inputNumbers(); i++)
+                    srcDescs.push_back(getSrcMemDesc(itpd, i));
+
+                std::vector<InferenceEngine::TensorDesc> dstDescs;
+                for (size_t i = 0; i < desc.outputNumbers(); i++)
+                    dstDescs.push_back(getDstMemDesc(itpd, i));
+
+                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+
+                if (impl_type == selected_pd->getImplementationType() &&
+                    descsEqual(srcDescs, selected_pd->getConfig().inConfs) &&
+                    descsEqual(dstDescs, selected_pd->getConfig().outConfs)) {
+                    prepareMemory(selected_pd, itpd);
+                    PD prim_desc = createPd<PD, D, FPD>(desc);
+                    itpd.getPrimitiveDescriptor(prim_desc);
+                    return prim_desc;
+                }
+                itpd++;
             }
         }
 
@@ -512,6 +509,13 @@ protected:
     int batchToProcess();
     int whichSocket() { return socket; }
 
+    // TODO: While CPU plugin has no ease way to clone graph object we use weight
+    //       caching in global Engine context to avoid tensor duplication. Just to
+    //       improve memory consumption in case of throughput streams when we have
+    //       duplicate of graph for single input ICNNNetwork.
+    //       Remove this flag when graph clone functionality will be added.
+    void enableWeightCaching(bool val) { weight_caching = val; }
+
     InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, bool weights);
 
     template<typename To>
@@ -537,6 +541,7 @@ private:
     Type type;
     int execIndex = -1;
     int socket;
+    bool weight_caching = false;
 
     std::string typeToStr(Type type);
 
index 6ca5ba6..9fbfcd1 100644 (file)
@@ -209,7 +209,7 @@ void Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string
 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin*& plugin, ResponseDesc *resp) noexcept {
     try {
         plugin = make_ie_compatible_plugin(
-                {{2, 0},
+                {{2, 1},
                  CI_BUILD_NUMBER,
                  "MKLDNNPlugin"}, std::make_shared<Engine>());
         return OK;
index 4ae7fed..f46c70d 100644 (file)
@@ -4,14 +4,15 @@
 
 #pragma once
 
-#include "mkldnn_graph.h"
+#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
+#include "mkldnn_exec_network.h"
+
 #include <string>
 #include <map>
 #include <unordered_map>
 #include <memory>
 #include <functional>
 #include <vector>
-#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
 
 namespace MKLDNNPlugin {
 
index 68ad8a3..a0c2159 100644 (file)
@@ -134,10 +134,10 @@ bool pin_current_thread_to_socket(int socket) {
 MultiWorkerTaskExecutor::MultiWorkerTaskExecutor(const std::vector<Task::Ptr>& init_tasks, std::string name) :
         _isStopped(false), _name(name), _initCount(0) {
     const int sockets = MKLDNNPlugin::cpu::getNumberOfCPUSockets();
-    const int worker_per_sockets = init_tasks.size() / sockets;
+    const int worker_per_sockets = (std::max)(1, static_cast<int>(std::ceil(static_cast<float>(init_tasks.size()) / sockets)));
     for (int t= 0; t < init_tasks.size(); t++) {
         _threads.push_back(std::thread([&, t, init_tasks] {
-            int socket = t/worker_per_sockets;
+            int socket = t / worker_per_sockets;
             pin_current_thread_to_socket(socket);
             // initialization (no contention, every worker thread is doing it's own task)
             init_tasks[t]->runNoThrowNoBusyCheck();
index 5fa64e3..1dad240 100644 (file)
@@ -238,7 +238,7 @@ void MKLDNNBatchNormalizationNode::initSupportedPrimitiveDescriptors() {
     // BN primitive doesn't support strides
     for (auto& desc : descs) {
         primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine());
-        do {
+        while (itpd.is_not_end()) {
             InferenceEngine::LayerConfig config;
             config.dynBatchSupport = true;
             for (size_t i = 0; i < desc.inputNumbers(); i++) {
@@ -262,7 +262,8 @@ void MKLDNNBatchNormalizationNode::initSupportedPrimitiveDescriptors() {
             impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 
             supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
-        } while (itpd.next());
+            itpd++;
+        }
     }
 }
 
index f17a3ba..0f6320c 100644 (file)
@@ -321,47 +321,43 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
     setPostOps(attr);
 
     for (auto& desc : descs) {
-        try {
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
-            do {
-                InferenceEngine::LayerConfig config;
-                config.dynBatchSupport = true;
-                for (size_t i = 0; i < desc.inputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(itpd, i);
-                    if (!isGrouped)
-                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
-                    config.inConfs.push_back(dataConfig);
-                }
+        auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+        while (itpd.is_not_end()) {
+            InferenceEngine::LayerConfig config;
+            config.dynBatchSupport = true;
+            for (size_t i = 0; i < desc.inputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getSrcMemDesc(itpd, i);
+                if (!isGrouped)
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                config.inConfs.push_back(dataConfig);
+            }
 
-                std::vector<memory::format> outFormats;
-                for (size_t i = 0; i < desc.outputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    if (withSum) {
-                        dataConfig.inPlace = 1;
-                    }
+            std::vector<memory::format> outFormats;
+            for (size_t i = 0; i < desc.outputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                if (withSum) {
+                    dataConfig.inPlace = 1;
+                }
 
-                    dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(itpd, i);
-                    if (!isGrouped)
-                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
-                    config.outConfs.push_back(dataConfig);
-                    outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
+                dataConfig.constant = false;
+                dataConfig.desc = getDstMemDesc(itpd, i);
+                if (!isGrouped)
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                config.outConfs.push_back(dataConfig);
+                outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
 
-                    if (withSum) {
-                        dataConfig.inPlace = -1;
-                        config.inConfs.push_back(dataConfig);
-                    }
+                if (withSum) {
+                    dataConfig.inPlace = -1;
+                    config.inConfs.push_back(dataConfig);
                 }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 
-                supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
-            } while (itpd.next());
-        } catch (std::exception& e) {
-            // it throw exception in case of no implementation found
-            continue;
+            supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
+            itpd++;
         }
     }
 }
@@ -426,48 +422,45 @@ void MKLDNNBinaryConvolutionNode::initDescriptor(const InferenceEngine::LayerCon
     size_t selected_count = 0;
     for (size_t i = 0; i < descs.size(); i++) {
         const auto& desc = descs[i];
-        try {
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
-            do {
-                InferenceEngine::LayerConfig cfg;
-                cfg.dynBatchSupport = true;
-                for (size_t j = 0; j < desc.inputNumbers(); j++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(itpd, j);
+        auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+        while (itpd.is_not_end()) {
+            InferenceEngine::LayerConfig cfg;
+            cfg.dynBatchSupport = true;
+            for (size_t j = 0; j < desc.inputNumbers(); j++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getSrcMemDesc(itpd, j);
+                cfg.inConfs.push_back(dataConfig);
+            }
+
+            for (size_t j = 0; j < desc.outputNumbers(); j++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                if (withSum) {
                     cfg.inConfs.push_back(dataConfig);
+                    dataConfig.inPlace = 1;
                 }
+                dataConfig.constant = false;
+                dataConfig.desc = getDstMemDesc(itpd, j);
 
-                for (size_t j = 0; j < desc.outputNumbers(); j++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    if (withSum) {
-                        cfg.inConfs.push_back(dataConfig);
-                        dataConfig.inPlace = 1;
-                    }
-                    dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(itpd, j);
-
-                    cfg.outConfs.push_back(dataConfig);
-                }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+                cfg.outConfs.push_back(dataConfig);
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 
-                if (selected_count == selectedPrimitiveDescriptorIndex) {
-                    if (impl_type != selectedPD->getImplementationType()) {
-                        THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
-                    }
-                    rightConfig = cfg;
+            if (selected_count == selectedPrimitiveDescriptorIndex) {
+                if (impl_type != selectedPD->getImplementationType()) {
+                    THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
                 }
-                if (i == descs.size() - 1) {
-                    if (impl_type == selectedPD->getImplementationType()) {
-                        rightConfig = config;
-                    }
+                rightConfig = cfg;
+            }
+            if (i == descs.size() - 1) {
+                if (impl_type == selectedPD->getImplementationType()) {
+                    rightConfig = config;
                 }
-                selected_count++;
-            } while (itpd.next());
-        } catch (std::exception& e) {
-            continue;
+            }
+            selected_count++;
+            itpd++;
         }
     }
     selectedPD->getConfig() = rightConfig;
index 1184896..5ff69e6 100644 (file)
@@ -148,18 +148,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
     invertVectorCopyUtoI(allPads.end, paddingR);
 
     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
-
-    for (int i = 0; i < paddingR.size(); i++) {
-        int with_group = (isGrouped || isMerged) ? 1 : 0;
-        int krn = weightsDims[with_group + 2 + i];
-        int src = getParentEdgeAt(0)->getDims()[2 + i];
-        int dst = getChildEdgeAt(0)->getDims()[2 + i];
-
-        krn = (krn - 1)*(dilation[i] + 1) + 1;
-        int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
-        paddingR[i] = (dst - calc_dst) * stride[i];
-    }
-
     withSum = isFusedWith(Eltwise);
 
     for (auto &node : fusedWith) {
@@ -176,6 +164,17 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                 dw_conv_strides.push_back(convLayer->_stride[i]);
             }
             dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(convLayer->outData[0]->getPrecision());
+
+            for (int i = 0; i < paddingR.size(); i++) {
+                int with_group = (isGrouped || isMerged) ? 1 : 0;
+                int krn = weightsDims[with_group + 2 + i];
+                int src = getParentEdgeAt(0)->getDims()[2 + i];
+                int dst = getChildEdgeAt(0)->getDims()[2 + i];
+
+                krn = (krn - 1)*(dilation[i] + 1) + 1;
+                int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
+                paddingR[i] = (dst - calc_dst) * stride[i];
+            }
         }
     }
 
@@ -480,48 +479,44 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
     setPostOps(attr);
 
     for (auto& desc : descs) {
-        try {
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
-            do {
-                InferenceEngine::LayerConfig config;
-                config.dynBatchSupport = true;
-                for (size_t i = 0; i < desc.inputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(itpd, i);
-                    if (!isGrouped)
-                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
-                    config.inConfs.push_back(dataConfig);
-                }
-
-                std::vector<memory::format> outFormats;
-                for (size_t i = 0; i < desc.outputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    if (withSum) {
-                        dataConfig.inPlace = 1;
-                    }
+        auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+        while (itpd.is_not_end()) {
+            InferenceEngine::LayerConfig config;
+            config.dynBatchSupport = true;
+            for (size_t i = 0; i < desc.inputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getSrcMemDesc(itpd, i);
+                if (!isGrouped)
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                config.inConfs.push_back(dataConfig);
+            }
 
-                    dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(itpd, i);
-                    if (!isGrouped)
-                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
-                    config.outConfs.push_back(dataConfig);
+            std::vector<memory::format> outFormats;
+            for (size_t i = 0; i < desc.outputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                if (withSum) {
+                    dataConfig.inPlace = 1;
+                }
 
-                    if (withSum) {
-                        dataConfig.inPlace = -1;
-                        config.inConfs.push_back(dataConfig);
-                    }
+                dataConfig.constant = false;
+                dataConfig.desc = getDstMemDesc(itpd, i);
+                if (!isGrouped)
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                config.outConfs.push_back(dataConfig);
 
-                    outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
+                if (withSum) {
+                    dataConfig.inPlace = -1;
+                    config.inConfs.push_back(dataConfig);
                 }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 
-                supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
-            } while (itpd.next());
-        } catch (std::exception& e) {
-            // it throw exception in case of no implementation found
-            continue;
+                outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+
+            supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
+            itpd++;
         }
     }
 }
@@ -599,20 +594,24 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
     MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
 
     for (auto alg : {algorithm::convolution_winograd, algorithm::convolution_direct}) {
-        std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
-        if (withBiases) {
-            MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::any};
+        try {
+            std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
+            if (withBiases) {
+                MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::any};
 
-            conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
-                                                          in_candidate, wgh_candidate, bias_candidate, out_candidate,
-                                                          stride, dilation, paddingL, paddingR, padding_kind::zero));
-        } else {
-            conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
-                                                          in_candidate, wgh_candidate, out_candidate, stride, dilation,
-                                                          paddingL, paddingR, padding_kind::zero));
-        }
+                conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
+                            in_candidate, wgh_candidate, bias_candidate, out_candidate,
+                            stride, dilation, paddingL, paddingR, padding_kind::zero));
+            } else {
+                conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
+                            in_candidate, wgh_candidate, out_candidate, stride, dilation,
+                            paddingL, paddingR, padding_kind::zero));
+            }
 
-        descs.emplace_back(conv_desc);
+            descs.emplace_back(conv_desc);
+        } catch (...) {
+            THROW_IE_EXCEPTION << "Cannot create convolution forward descriptor for layer: " << getName();
+        }
     }
 }
 
@@ -659,48 +658,45 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
     size_t selected_count = 0;
     for (size_t i = 0; i < descs.size(); i++) {
         const auto& desc = descs[i];
-        try {
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
-            do {
-                InferenceEngine::LayerConfig cfg;
-                cfg.dynBatchSupport = true;
-                for (size_t j = 0; j < desc.inputNumbers(); j++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(itpd, j);
+        auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+        while (itpd.is_not_end()) {
+            InferenceEngine::LayerConfig cfg;
+            cfg.dynBatchSupport = true;
+            for (size_t j = 0; j < desc.inputNumbers(); j++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getSrcMemDesc(itpd, j);
+                cfg.inConfs.push_back(dataConfig);
+            }
+
+            for (size_t j = 0; j < desc.outputNumbers(); j++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getDstMemDesc(itpd, j);
+                if (withSum) {
                     cfg.inConfs.push_back(dataConfig);
+                    dataConfig.inPlace = 1;
                 }
 
-                for (size_t j = 0; j < desc.outputNumbers(); j++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    if (withSum) {
-                        cfg.inConfs.push_back(dataConfig);
-                        dataConfig.inPlace = 1;
-                    }
-                    dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(itpd, j);
-
-                    cfg.outConfs.push_back(dataConfig);
-                }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+                cfg.outConfs.push_back(dataConfig);
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 
-                if (selected_count == selectedPrimitiveDescriptorIndex) {
-                    if (impl_type != selectedPD->getImplementationType()) {
-                        THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
-                    }
-                    rightConfig = cfg;
+            if (selected_count == selectedPrimitiveDescriptorIndex) {
+                if (impl_type != selectedPD->getImplementationType()) {
+                    THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
                 }
-                if (i == descs.size() - 1 && addedNewDesc) {
-                    if (impl_type == selectedPD->getImplementationType()) {
-                        rightConfig = config;
-                    }
+                rightConfig = cfg;
+            }
+            if (i == descs.size() - 1 && addedNewDesc) {
+                if (impl_type == selectedPD->getImplementationType()) {
+                    rightConfig = config;
                 }
-                selected_count++;
-            } while (itpd.next());
-        } catch (std::exception& e) {
-            continue;
+            }
+            selected_count++;
+            itpd++;
         }
     }
     selectedPD->getConfig() = rightConfig;
index 01a8172..5d40016 100644 (file)
@@ -123,40 +123,36 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
     mkldnn::primitive_attr attr;
 
     for (auto& desc : descs) {
-        try {
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
-            do {
-                InferenceEngine::LayerConfig config;
-                config.dynBatchSupport = true;
-                for (size_t i = 0; i < desc.inputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(itpd, i);
-                    if (!isGrouped)
-                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
-                    config.inConfs.push_back(dataConfig);
-                }
-
-                std::vector<memory::format> outFormats;
-                for (size_t i = 0; i < desc.outputNumbers(); i++) {
-                    InferenceEngine::DataConfig dataConfig;
-
-                    dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(itpd, i);
-                    if (!isGrouped)
-                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
-                    config.outConfs.push_back(dataConfig);
-
-                    outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
-                }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
-
-                supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
-            } while (itpd.next());
-        } catch (std::exception& e) {
-            // it throw exception in case of no implementation found
-            continue;
+        auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+        while (itpd.is_not_end()) {
+            InferenceEngine::LayerConfig config;
+            config.dynBatchSupport = true;
+            for (size_t i = 0; i < desc.inputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getSrcMemDesc(itpd, i);
+                if (!isGrouped)
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                config.inConfs.push_back(dataConfig);
+            }
+
+            std::vector<memory::format> outFormats;
+            for (size_t i = 0; i < desc.outputNumbers(); i++) {
+                InferenceEngine::DataConfig dataConfig;
+
+                dataConfig.constant = false;
+                dataConfig.desc = getDstMemDesc(itpd, i);
+                if (!isGrouped)
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                config.outConfs.push_back(dataConfig);
+
+                outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+
+            supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
+            itpd++;
         }
     }
 }
@@ -246,44 +242,41 @@ void MKLDNNDeformableConvolutionNode::initDescriptor(const InferenceEngine::Laye
     size_t selected_count = 0;
     for (size_t i = 0; i < descs.size(); i++) {
         const auto& desc = descs[i];
-        try {
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
-            do {
-                InferenceEngine::LayerConfig cfg;
-                cfg.dynBatchSupport = true;
-                for (size_t j = 0; j < desc.inputNumbers(); j++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(itpd, j);
-                    cfg.inConfs.push_back(dataConfig);
-                }
-
-                for (size_t j = 0; j < desc.outputNumbers(); j++) {
-                    InferenceEngine::DataConfig dataConfig;
-                    dataConfig.inPlace = -1;
-                    dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(itpd, j);
-
-                    cfg.outConfs.push_back(dataConfig);
-                }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
-
-                if (selected_count == selectedPrimitiveDescriptorIndex) {
-                    if (impl_type != selectedPD->getImplementationType()) {
-                        THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
-                    }
-                    rightConfig = cfg;
+        auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+        while (itpd.is_not_end()) {
+            InferenceEngine::LayerConfig cfg;
+            cfg.dynBatchSupport = true;
+            for (size_t j = 0; j < desc.inputNumbers(); j++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getSrcMemDesc(itpd, j);
+                cfg.inConfs.push_back(dataConfig);
+            }
+
+            for (size_t j = 0; j < desc.outputNumbers(); j++) {
+                InferenceEngine::DataConfig dataConfig;
+                dataConfig.inPlace = -1;
+                dataConfig.constant = false;
+                dataConfig.desc = getDstMemDesc(itpd, j);
+
+                cfg.outConfs.push_back(dataConfig);
+            }
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+
+            if (selected_count == selectedPrimitiveDescriptorIndex) {
+                if (impl_type != selectedPD->getImplementationType()) {
+                    THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
                 }
-                if (i == descs.size() - 1 && addedNewDesc) {
-                    if (impl_type == selectedPD->getImplementationType()) {
-                        rightConfig = config;
-                    }
+                rightConfig = cfg;
+            }
+            if (i == descs.size() - 1 && addedNewDesc) {
+                if (impl_type == selectedPD->getImplementationType()) {
+                    rightConfig = config;
                 }
-                selected_count++;
-            } while (itpd.next());
-        } catch (std::exception& e) {
-            continue;
+            }
+            selected_count++;
+            itpd++;
         }
     }
     selectedPD->getConfig() = rightConfig;
index ab8e2ac..7609e05 100644 (file)
@@ -66,6 +66,8 @@ bool MKLDNNEltwiseNode::isWithBroadcast() {
             if (inDims.size() < outDims.size())
                 withBroadcast = true;
         }
+        if (inDims.size() == 0 && outDims.size())
+            withBroadcast = true;
     }
 
     return withBroadcast;
@@ -235,7 +237,7 @@ void MKLDNNEltwiseNode::dims_calc(int *dims, const MKLDNNDims &edge_dims) {
     for (int i = 0; i < ndims; i++) {
         dims[4 - i] = edge_dims[ndims - 1 - i];
     }
-    if (!(broadcast && edge_dims[0] == getChildEdgeAt(0)->getDims()[0]))
+    if (edge_dims.ndims() && !(broadcast && edge_dims[0] == getChildEdgeAt(0)->getDims()[0]))
         dims[batch_dim] = std::min(dims[batch_dim], batchToProcess());
 }
 
index 326f9e4..030dc7f 100644 (file)
@@ -120,7 +120,8 @@ void MKLDNNGenericNode::execLayer() {
         } else {
             // TODO: Ask the right dims using getShape() from previous node
             inputDescs.push_back(inputs[inputs.size() - 1]->getTensorDesc());
-            inputDescs[inputDescs.size() - 1].getDims()[0] = static_cast<size_t>(batchToProcess());
+            if (inputDescs[inputDescs.size() - 1].getDims().size() > 0)
+                inputDescs[inputDescs.size() - 1].getDims()[0] = static_cast<size_t>(batchToProcess());
         }
     }
 
index 3c17bfd..7d9e704 100644 (file)
@@ -60,16 +60,6 @@ void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm)  {
     memcpy(dst_ptr, src_ptr, srcMemory.GetSize());
 }
 
-std::string MKLDNNMemoryInputNode::nameFromCombinedName(std::string name) {
-    auto idSplitter = name.find("/id=");
-    return name.substr(0, idSplitter);
-}
-
-std::string MKLDNNMemoryInputNode::idFromCombinedName(std::string name) {
-    auto idSplitter = name.find("/id=");
-    return name.substr(idSplitter == std::string::npos ? 0 : idSplitter + 4);
-}
-
 MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, int socket)
         : MKLDNNInputNode(layer, eng, socket), MKLDNNMemoryNode(layer) {
     if (created()) {
@@ -91,7 +81,6 @@ void MKLDNNMemoryNodeVirtualEdge::registerInput(MKLDNNMemoryInputNode * node) {
     } else {
         getExisted()[node->getId()] = node;
     }
-    // std::cout <<"[register] " << node << ", size="<< getExisted().size() <<"\n" << std::flush;
 }
 
 void MKLDNNMemoryNodeVirtualEdge::registerOutput(MKLDNNMemoryOutputNode * node) {
@@ -104,5 +93,4 @@ void MKLDNNMemoryNodeVirtualEdge::registerOutput(MKLDNNMemoryOutputNode * node)
     } else {
         getExisted()[node->getId()] = node;
     }
-    // std::cout <<"[register] " << node << ", size="<< getExisted().size() <<"\n" << std::flush;
 }
index d389d16..6ea799d 100644 (file)
@@ -58,7 +58,6 @@ class MKLDNNMemoryNodeVirtualEdge {
         InferenceEngine::details::erase_if(getExisted(), [&](const Holder::value_type & it){
             return it.second == node;
         });
-        // std::cout <<"[remove]   " << node << ", size="<< getExisted().size() <<"\n" << std::flush;
     }
 };
 
@@ -86,12 +85,8 @@ class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode {
     static Register<MKLDNNMemoryOutputNode> reg;
 };
 
-
 class MKLDNNMemoryInputNode : public MKLDNNInputNode, public MKLDNNMemoryNode {
- protected:
-    static std::string nameFromCombinedName(std::string name);
-    static std::string idFromCombinedName(std::string name);
- public:
+public:
     MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, int socket);
     ~MKLDNNMemoryInputNode() override;
 
@@ -104,7 +99,5 @@ class MKLDNNMemoryInputNode : public MKLDNNInputNode, public MKLDNNMemoryNode {
     static Register<MKLDNNMemoryInputNode> reg;
 };
 
-
-
 }  // namespace MKLDNNPlugin
 
index 8d82728..a8013ad 100644 (file)
@@ -504,6 +504,64 @@ static void permute_to_102(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     });
 }
 
+static void permute_to_02341(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+    auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetData());
+    auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetData());
+    src_data += srcMemPtr->GetDescriptor().data.layout_desc.blocking.offset_padding;
+    dst_data += dstMemPtr->GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+    const int DIM1 = srcMemPtr->GetDims()[1];
+    const int DIM2 = srcMemPtr->GetDims()[2];
+    const int DIM3 = srcMemPtr->GetDims()[3];
+    const int DIM4 = srcMemPtr->GetDims()[4];
+
+    parallel_for4d(MB, DIM2, DIM3, DIM4, [&](int n, int dim2, int dim3, int dim4) {
+        for (int dim1 = 0; dim1 < DIM1; dim1++) {
+            int src_off = n * DIM1 * DIM2 * DIM3 * DIM4 +
+                          dim1 * DIM2 * DIM3 * DIM4 +
+                          dim2 * DIM3 * DIM4 +
+                          dim3 * DIM4 +
+                          dim4;
+            int dst_off = n * DIM2 * DIM3 * DIM4 * DIM1 +
+                          dim2 * DIM3 * DIM4 * DIM1 +
+                          dim3 * DIM4 * DIM1 +
+                          dim4 * DIM1 +
+                          dim1;
+
+            dst_data[dst_off] = src_data[src_off];
+        }
+    });
+}
+
+static void permute_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+    auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetData());
+    auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetData());
+    src_data += srcMemPtr->GetDescriptor().data.layout_desc.blocking.offset_padding;
+    dst_data += dstMemPtr->GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+    const int DIM1 = srcMemPtr->GetDims()[1];
+    const int DIM2 = srcMemPtr->GetDims()[2];
+    const int DIM3 = srcMemPtr->GetDims()[3];
+    const int DIM4 = srcMemPtr->GetDims()[4];
+
+    parallel_for4d(MB, DIM4, DIM1, DIM2, [&](int n, int dim4, int dim1, int dim2) {
+        for (int dim3 = 0; dim3 < DIM3; dim3++) {
+            int src_off = n * DIM1 * DIM2 * DIM3 * DIM4 +
+                          dim1 * DIM2 * DIM3 * DIM4 +
+                          dim2 * DIM3 * DIM4 +
+                          dim3 * DIM4 +
+                          dim4;
+            int dst_off = n * DIM4 * DIM1 * DIM2 * DIM3 +
+                          dim4 * DIM1 * DIM2 * DIM3 +
+                          dim1 * DIM2 * DIM3 +
+                          dim2 * DIM3 +
+                          dim3;
+
+            dst_data[dst_off] = src_data[src_off];
+        }
+    });
+}
+
 std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
         {{0, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_0231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return true;
@@ -549,7 +607,13 @@ std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDN
         })},
         {{1, 0, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_102, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return MKLDNNMemory::IsPlainFormat(srcMemPtr->GetFormat()) && MB == srcMemPtr->GetDims()[0];
-        })}
+        })},
+        {{0, 2, 3, 4, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_02341, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+            return MKLDNNMemory::IsPlainFormat(srcMemPtr->GetFormat());
+        })},
+        {{0, 4, 1, 2, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_04123, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+            return MKLDNNMemory::IsPlainFormat(srcMemPtr->GetFormat());
+        })},
 };
 
 void MKLDNNPermuteNode::execute(mkldnn::stream strm) {
index 1cacf76..959a4e3 100644 (file)
@@ -173,15 +173,17 @@ void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
         return {config, impl, fmt};
     };
 
-    supportedPrimitiveDescriptors.push_back(same(memory::nhwc, ref_any));
-
     if (isPackedStore()) {
-        primitive_desc_iterator itpd = descs[0].createPrimitiveDescriptorIterator(getEngine());
-        do {
+        auto itpd = descs[0].createPrimitiveDescriptorIterator(getEngine());
+        while (itpd.is_not_end()) {
             impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
             supportedPrimitiveDescriptors.push_back(same(memory::nhwc, impl_type));
-        } while (itpd.next());
+            itpd++;
+        }
     }
+
+    // Ref implementation. Not from MKLDNN.
+    supportedPrimitiveDescriptors.push_back(same(memory::nhwc, ref_any));
 }
 
 void MKLDNNQuantizeNode::createPrimitive() {
index 1e6d249..14da20c 100644 (file)
@@ -77,10 +77,10 @@ void MKLDNNReorderNode::createPrimitive() {
 
 void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr) {
     src_blocked = std::make_shared<MKLDNNMemory>(getEngine());
-    src_blocked->Create(srcDesc, srcPtr);
+    src_blocked->Create(srcDesc, srcPtr, false);
 
     dst_blocked = std::make_shared<MKLDNNMemory>(getEngine());
-    dst_blocked->Create(dstDesc, dstPtr);
+    dst_blocked->Create(dstDesc, dstPtr, false);
 
     mkldnn::primitive_attr attr;
 
@@ -126,6 +126,7 @@ bool MKLDNNReorderNode::created() const {
 void MKLDNNReorderNode::execute(mkldnn::stream strm) {
     src_blocked->GetPrimitivePtr()->set_data_handle(getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
     dst_blocked->GetPrimitivePtr()->set_data_handle(getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle());
+
     MKLDNNNode::execute(strm);
 }
 
index a5323a4..184252e 100644 (file)
@@ -20,7 +20,7 @@ public:
     void getSupportedDescriptors() override;
     void createPrimitive() override;
     bool created() const override;
-
+    using MKLDNNNode::createDescriptor;
     void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
                           const std::vector<InferenceEngine::TensorDesc>& outputDesc,
                           const std::vector<mkldnn::memory::format> &outputFormats);
index f8a4cad..88ef229 100644 (file)
@@ -73,14 +73,15 @@ void MKLDNNSoftMaxNode::createPrimitive() {
     auto prim_desc = softmax_forward::primitive_desc(*selected_desc_ptr, getEngine());
     primitive_desc_iterator itpd = descs[0].createPrimitiveDescriptorIterator(getEngine());
 
-    do {
+    while (itpd.is_not_end()) {
         impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
         auto primitiveDescriptor = getSelectedPrimitiveDescriptor();
         if ((primitiveDescriptor != nullptr) && (impl_type == primitiveDescriptor->getImplementationType())) {
             itpd.getPrimitiveDescriptor(prim_desc);
             break;
         }
-    } while (itpd.next());
+        itpd++;
+    }
 
     prim.reset(new softmax_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
                                 getChildEdgeAt(0)->getMemory().GetPrimitive()));
index 2f909bd..cffd6a8 100644 (file)
@@ -15,6 +15,7 @@ namespace MKLDNNPlugin {
 
 class PortMapHelper {
 public:
+    virtual ~PortMapHelper() = default;
     virtual void execute(int n_iter, mkldnn::stream strm) = 0;
 protected:
     std::vector<mkldnn::reorder> reorders;
index 737d98e..875f260 100644 (file)
@@ -37,7 +37,7 @@ struct IEB_HEADER {
 };
 
 static IEB_HEADER prepare_header(const TensorDesc& desc) {
-    IEB_HEADER header = {0};
+    IEB_HEADER header = {};
 
     header.magic[0] = IEB_MAGIC[0];
     header.magic[1] = IEB_MAGIC[1];
index 1cd4978..01b93ff 100644 (file)
@@ -3,22 +3,10 @@
 #
 
 #
-# Locate firmware files
-#
-
-if (ENABLE_MYRIAD)
-    find_file(VPU_FIRMWARE_MA2450_FILE MvNCAPI-ma2450.mvcmd "${VPU_FIRMWARE_MA2450}/mvnc")
-    find_file(VPU_FIRMWARE_MA2X8X_FILE MvNCAPI-ma2x8x.mvcmd "${VPU_FIRMWARE_MA2X8X}/mvnc")
-    
-    if(NOT VPU_FIRMWARE_MA2450_FILE OR NOT VPU_FIRMWARE_MA2X8X_FILE)
-        message(FATAL_ERROR "[VPU] Missing firmware")
-    endif()
-endif()
-
-#
 # Build common part
 #
 
+add_subdirectory(common)
 add_subdirectory(graph_transformer)
 
 add_subdirectory(
@@ -29,19 +17,6 @@ add_subdirectory(
 # Build plugins
 #
 
-set(plugin_target "")
-
 if(ENABLE_MYRIAD)
     add_subdirectory(myriad_plugin)
-    set(plugin_target "myriadPlugin")
-endif()
-
-
-if(ENABLE_MYRIAD)
-    set(firmware_out_dir "$<TARGET_FILE_DIR:${plugin_target}>")
-    add_custom_target(vpu_copy_firmware ALL
-        COMMAND "${CMAKE_COMMAND}" -E copy "${VPU_FIRMWARE_MA2450_FILE}" "${firmware_out_dir}/MvNCAPI-ma2450.mvcmd"
-        COMMAND "${CMAKE_COMMAND}" -E copy "${VPU_FIRMWARE_MA2X8X_FILE}" "${firmware_out_dir}/MvNCAPI-ma2x8x.mvcmd"
-        COMMENT "[VPU] Copy firmware")
-
 endif()
diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt
new file mode 100644 (file)
index 0000000..969670c
--- /dev/null
@@ -0,0 +1,42 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "vpu_common_lib")
+
+file(GLOB_RECURSE SOURCES *.cpp *.hpp *.h)
+
+add_library(${TARGET_NAME} STATIC ${SOURCES})
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    # TODO: enable some day and fix all warnings
+    #     target_compile_options(${TARGET_NAME} PRIVATE "-Wall")
+    target_compile_options(${TARGET_NAME} PRIVATE "-Werror=unused-variable")
+    target_compile_options(${TARGET_NAME} PRIVATE "-Werror=unused-function")
+    target_compile_options(${TARGET_NAME} PRIVATE "-Werror=strict-aliasing")
+endif()
+
+target_include_directories(${TARGET_NAME}
+        PUBLIC
+        "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
+target_include_directories(${TARGET_NAME}
+        SYSTEM PUBLIC
+        "${IE_MAIN_SOURCE_DIR}/include"
+        "${IE_MAIN_SOURCE_DIR}/src/inference_engine")
+
+if(WIN32)
+    target_compile_definitions(${TARGET_NAME} PRIVATE NOMINMAX)
+
+    set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
+endif()
+
+add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
+add_cppcheck(${TARGET_NAME})
+
+#
+# developer package
+#
+
+export(TARGETS ${TARGET_NAME} NAMESPACE IE::
+        APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
diff --git a/inference-engine/src/vpu/common/include/vpu/parsed_config_base.hpp b/inference-engine/src/vpu/common/include/vpu/parsed_config_base.hpp
new file mode 100644 (file)
index 0000000..32d6bbe
--- /dev/null
@@ -0,0 +1,93 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <unordered_map>
+#include <unordered_set>
+#include <string>
+
+#include <vpu/vpu_plugin_config.hpp>
+
+#include <vpu/utils/logger.hpp>
+#include <vpu/utils/enums.hpp>
+
+namespace vpu {
+
+VPU_DECLARE_ENUM(ConfigMode,
+        DEFAULT_MODE = 0,
+        RUNTIME_MODE = 1,
+        COMPILE_MODE = 2,
+)
+
+struct ParsedConfigBase {
+    LogLevel deviceLogLevel = LogLevel::None;
+    LogLevel hostLogLevel = LogLevel::None;
+
+    bool exclusiveAsyncRequests = false;
+
+    virtual std::map<std::string, std::string> getDefaultConfig() const { return {}; }
+
+    ~ParsedConfigBase() = default;
+
+protected:
+    explicit ParsedConfigBase(ConfigMode configMode);
+
+    virtual void checkSupportedValues(const std::unordered_map<std::string, std::unordered_set<std::string>> &supported,
+                                                     const std::map<std::string, std::string> &config) const;
+    virtual void checkUnknownOptions(const std::map<std::string, std::string> &config) const;
+    virtual void checkInvalidValues(const std::map<std::string, std::string> &config) const;
+    virtual void checkOptionsAccordingToMode(const std::map<std::string, std::string> &config) const;
+
+    std::map<std::string, std::string> parse(const std::map<std::string, std::string> &config) {
+        checkInvalidValues(config);
+        checkUnknownOptions(config);
+        checkOptionsAccordingToMode(config);
+
+        auto defaultConfig = getDefaultConfig();
+        for (auto &&entry : config) {
+            defaultConfig[entry.first] = entry.second;
+        }
+
+        return defaultConfig;
+    }
+
+    virtual void configure(const std::map<std::string, std::string> &config);
+
+
+    virtual std::unordered_set<std::string> getKnownOptions() const;
+    virtual std::unordered_set<std::string> getCompileOptions() const { return {}; }
+    virtual std::unordered_set<std::string> getRuntimeOptions() const;
+
+protected:
+    Logger::Ptr _log;
+
+private:
+    ConfigMode _mode = ConfigMode::DEFAULT_MODE;
+};
+
+template<typename T, typename V>
+inline void setOption(T &dst, const V &supported, const std::map<std::string, std::string> &config, const std::string &key) {
+    auto value = config.find(key);
+    if (value != config.end()) {
+        dst = supported.at(value->second);
+    }
+}
+
+inline void setOption(std::string &dst, const std::map<std::string, std::string> &config, const std::string &key) {
+    auto value = config.find(key);
+    if (value != config.end()) {
+        dst = value->second;
+    }
+}
+
+template<typename T, typename C>
+inline void setOption(T &dst, const std::map<std::string, std::string> &config, const std::string &key, const C &preprocess) {
+    auto value = config.find(key);
+    if (value != config.end()) {
+        dst = preprocess(value->second);
+    }
+}
+}  // namespace vpu
@@ -74,7 +74,15 @@ public:
     }
 
     template <typename T>
-    inline const T& getOrDefault(const std::string& name, const T& def) const {
+    inline T getOrDefault(const std::string& name) const {
+        auto it = _tbl.find(name);
+        if (it != _tbl.end()) {
+            return it->second.get<T>();
+        }
+        return T();
+    }
+    template <typename T>
+    inline T getOrDefault(const std::string& name, const T& def) const {
         auto it = _tbl.find(name);
         if (it != _tbl.end()) {
             return it->second.get<T>();
@@ -83,6 +91,16 @@ public:
     }
 
     template <typename T>
+    inline T& getOrSet(const std::string& name) {
+        auto it = _tbl.find(name);
+        if (it != _tbl.end()) {
+            return it->second.get<T>();
+        }
+        auto res = _tbl.insert({name, Any(T())});
+        assert(res.second);
+        return res.first->second.get<T>();
+    }
+    template <typename T>
     inline T& getOrSet(const std::string& name, const T& def) {
         auto it = _tbl.find(name);
         if (it != _tbl.end()) {
 namespace vpu {
 
 //
-// SmallBufAllocator
+// SmallVector
 //
 
+namespace details {
+
 template <typename T>
 struct SmallBufElemMemory {
     static constexpr const size_t ElemSize = sizeof(T);
@@ -219,14 +221,12 @@ inline bool operator!=(
     return a1.getBuf() != a2.getBuf() || a1.getBaseAllocator() != a2.getBaseAllocator();
 }
 
-//
-// SmallVector
-//
+}  // namespace details
 
 template <typename T, int Capacity = 8, class BaseAllocator = std::allocator<T>>
 class SmallVector {
-    using BufHolder = SmallBufHolder<T, Capacity>;
-    using Alloc = SmallBufAllocator<T, BufHolder, BaseAllocator>;
+    using BufHolder = details::SmallBufHolder<T, Capacity>;
+    using Alloc = details::SmallBufAllocator<T, BufHolder, BaseAllocator>;
     using BaseCont = std::vector<T, Alloc>;
 
 public:
@@ -236,6 +236,8 @@ public:
 
     using iterator = typename BaseCont::iterator;
     using const_iterator = typename BaseCont::const_iterator;
+    using reverse_iterator = typename BaseCont::reverse_iterator;
+    using const_reverse_iterator = typename BaseCont::const_reverse_iterator;
 
     inline SmallVector() : _allocator(_bufs), _base(_allocator) {
         _base.reserve(Capacity);
@@ -243,7 +245,7 @@ public:
 
     inline ~SmallVector() = default;
 
-    inline explicit SmallVector(size_type count) : _allocator(_bufs), _base(count, _allocator) {}
+    inline explicit SmallVector(size_type count) : _allocator(_bufs), _base(count, T(), _allocator) {}
     inline SmallVector(size_type count, const T& value) : _allocator(_bufs), _base(count, value, _allocator) {}
     inline SmallVector(std::initializer_list<T> init) : _allocator(_bufs), _base(init, _allocator) {}
 
@@ -311,6 +313,13 @@ public:
     inline const_iterator cbegin() const noexcept { return _base.cbegin(); }
     inline const_iterator cend() const noexcept { return _base.cend(); }
 
+    inline reverse_iterator rbegin() noexcept { return _base.rbegin(); }
+    inline reverse_iterator rend() noexcept { return _base.rend(); }
+    inline const_reverse_iterator rbegin() const noexcept { return _base.rbegin(); }
+    inline const_reverse_iterator rend() const noexcept { return _base.rend(); }
+    inline const_reverse_iterator crbegin() const noexcept { return _base.crbegin(); }
+    inline const_reverse_iterator crend() const noexcept { return _base.crend(); }
+
     inline bool empty() const noexcept { return _base.empty(); }
     inline size_type size() const noexcept { return _base.size(); }
 
@@ -337,6 +346,11 @@ public:
     template <class... Args>
     inline iterator emplace(iterator pos, Args&&... args) { return _base.emplace(pos, std::forward<Args>(args)...); }
 
+    inline void assign(size_type count, const T& value) { _base.assign(count, value); }
+    template <class InputIt>
+    inline void assign(InputIt first, InputIt last) { _base.assign(first, last); }
+    inline void assign(std::initializer_list<T> ilist) { _base.assign(ilist); }
+
     inline void pop_back() { _base.pop_back(); }
 
     inline iterator erase(iterator pos) { return _base.erase(pos); }
@@ -26,7 +26,7 @@ namespace vpu {
     THROW_IE_EXCEPTION << "[VPU] "
 
 #define VPU_THROW_UNLESS(EXPRESSION) \
-    if (!(EXPRESSION)) VPU_THROW_EXCEPTION << "AssertionFailed: " << #EXPRESSION  // NOLINT
+    if (!(EXPRESSION)) VPU_THROW_EXCEPTION << "AssertionFailed: " << #EXPRESSION << " "  // NOLINT
 
 //
 // Packed structure declaration
@@ -28,6 +28,12 @@ public:
             "Mismatch between Func and FuncRef prototype");
     }
 
+    FuncRef(const FuncRef&) = delete;
+    FuncRef& operator=(const FuncRef&) = delete;
+
+    FuncRef(FuncRef&&) = delete;
+    FuncRef& operator=(FuncRef&&) = delete;
+
     R operator()(Args... args) const {
         return _impl(_realFuncPtr, std::forward<Args>(args)...);
     }
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <utility>
+#include <type_traits>
 
 #include <details/ie_exception.hpp>
 
@@ -20,12 +21,26 @@ public:
 
     inline Handle(std::nullptr_t) {}  // NOLINT
 
-    template <typename U>
+    template <
+        typename U,
+        typename = typename std::enable_if<
+            std::is_constructible<
+                std::weak_ptr<T>,
+                std::shared_ptr<U>
+            >::value
+        >::type>
     inline Handle(const std::shared_ptr<U>& ptr) : _weak(ptr), _plain(ptr.get()) {  // NOLINT
         IE_ASSERT(_plain != nullptr);
     }
 
-    template <typename U>
+    template <
+        typename U,
+        typename = typename std::enable_if<
+            std::is_constructible<
+                std::weak_ptr<T>,
+                std::weak_ptr<U>
+            >::value
+        >::type>
     inline Handle(const Handle<U>& other) : _weak(other._weak), _plain(other._plain) {}  // NOLINT
 
     inline Handle(const Handle&) = default;
@@ -75,6 +90,14 @@ public:
     }
 
     template <typename U>
+    inline Handle<U> staticCast() const {
+        if (auto newPtr = std::static_pointer_cast<U>(_weak.lock())) {
+            return Handle<U>(newPtr);
+        }
+        return nullptr;
+    }
+
+    template <typename U>
     inline Handle<U> dynamicCast() const {
         if (auto newPtr = std::dynamic_pointer_cast<U>(_weak.lock())) {
             return Handle<U>(newPtr);
diff --git a/inference-engine/src/vpu/common/include/vpu/utils/heap.hpp b/inference-engine/src/vpu/common/include/vpu/utils/heap.hpp
new file mode 100644 (file)
index 0000000..1adb2da
--- /dev/null
@@ -0,0 +1,104 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+#include <ostream>
+#include <utility>
+
+namespace vpu {
+
+// Max-heap. Collects all elements until capacity is reached; then collects only elements lesser than the max one
+// Maintains its size not more than specified in constructor.
+template <typename T>
+class FixedMaxHeap {
+private:
+    size_t _capacity;
+    std::vector<T> v;
+
+public:
+    explicit FixedMaxHeap(size_t capacity): _capacity(capacity) {
+        v.reserve(_capacity);
+    }
+
+    FixedMaxHeap(const FixedMaxHeap&) = delete;
+
+    FixedMaxHeap(FixedMaxHeap &&other): _capacity(other._capacity), v(std::move(other.v)) {
+    }
+
+    FixedMaxHeap& operator=(FixedMaxHeap &&other) {
+        _capacity = other._capacity;
+        v = std::move(other.v);
+        return *this;
+    }
+
+    auto begin() -> decltype(v.begin()) {
+        return v.begin();
+    }
+
+    auto end() -> decltype(v.begin()) {
+        return v.end();
+    }
+
+    auto begin() const -> decltype(v.begin()) const {
+        return v.begin();
+    }
+
+    auto end() const -> decltype(v.begin()) const {
+        return v.end();
+    }
+
+    bool empty() const {
+        return v.empty();
+    }
+
+    size_t size() const {
+        return v.size();
+    }
+
+    // keep max-heap of constant size: insert only values smaller than max element, discard others
+    bool push(const T& val) {
+        if (_capacity == 0) {
+            return false;
+        }
+
+        if (v.size() < _capacity) {
+            v.push_back(val);
+        } else {
+            if (!(val < v.front())) {
+                return false;
+            }
+            std::pop_heap(v.begin(), v.end());
+            v[_capacity - 1] = val;
+        }
+
+        std::push_heap(v.begin(), v.end());
+
+        return true;
+    }
+
+    std::vector<T> sorted() const {
+        std::vector<T> s = v;
+        std::sort_heap(s.begin(), s.end());
+        return s;
+    }
+
+    void print(std::ostream &o) const {
+        o << "Heap [" << v.size() << "]: ";
+        for (int i : v) {
+            o << i << " ";
+        }
+        o << " is_heap: " << std::is_heap(v.begin(), v.end()) << " ";
+        o << std::endl;
+    }
+
+    friend std::ostream& operator<<(std::ostream& o, const FixedMaxHeap &h) {
+        h.print(o);
+        return o;
+    }
+};
+
+}  // namespace vpu
@@ -5,14 +5,23 @@
 #pragma once
 
 #include <ie_blob.h>
+#include <vpu/utils/enums.hpp>
 
 namespace vpu {
 
 namespace ie = InferenceEngine;
 
+VPU_DECLARE_ENUM(LayoutPreference,
+    AUTO,
+    ChannelMajor,  // CHW, NCHW, NCDHW
+    ChannelMinor   // HWC, NHWC, NDHWC
+)
+
+InferenceEngine::Layout deviceLayout(InferenceEngine::Layout const& layout,
+                                       vpu::LayoutPreference const& layoutPreference);
+
 ie::Blob::Ptr getBlobFP16(const ie::Blob::Ptr& in);
 
-ie::Blob::Ptr copyBlob(const ie::Blob::Ptr& in);
 ie::Blob::Ptr copyBlob(const ie::Blob::Ptr& in, ie::Layout outLayout);
 void copyBlob(const ie::Blob::Ptr& in, const ie::Blob::Ptr& out);
 
@@ -90,16 +90,35 @@ std::string toString(const T& val) noexcept;
 // Implementation
 //
 
+namespace details {
+
 template <typename T>
-void printTo(std::ostream& os, const T& val) noexcept {
+auto printToDefault(std::ostream& os, const T& val, int) noexcept -> decltype(os << val) {
     try {
-        os << val;
+        return os << val;
     } catch (...) {
         std::cerr << "[VPU] Unknown error while printing\n";
         std::abort();
     }
 }
 
+template <typename T>
+void printToDefault(std::ostream& os, const T& val, ...) noexcept {
+    try {
+        os << "<value at " << &val << ">";
+    } catch (...) {
+        std::cerr << "[VPU] Unknown error while printing\n";
+        std::abort();
+    }
+}
+
+}  // namespace details
+
+template <typename T>
+inline void printTo(std::ostream& os, const T& val) noexcept {
+    details::printToDefault(os, val, 0);
+}
+
 template <typename T1, typename T2>
 void printTo(std::ostream& os, const std::pair<T1, T2>& p) noexcept {
     try {
@@ -6,9 +6,13 @@
 
 #include <string>
 #include <map>
+#include <set>
 #include <vector>
+#include <memory>
 
 #include <ie_common.h>
+#include <ie_precision.hpp>
+#include <ie_layouts.h>
 
 #include <vpu/utils/enums.hpp>
 
@@ -18,12 +22,34 @@ namespace ie = InferenceEngine;
 
 struct StageMetaInfo final {
     ie::InferenceEngineProfileInfo::LayerStatus status = ie::InferenceEngineProfileInfo::LayerStatus::NOT_RUN;
+    std::vector<ie::Precision> outPrecisions;
+    std::vector<ie::Layout> outLayouts;
+
+    int inputsNum = 0;
 
     std::string layerName;
     std::string layerType;
 
+    std::string displayStageName;
+
     std::string stageName;
     std::string stageType;
+
+    int execOrder = -1;
+    float execTime = 0;
+};
+
+struct DataMetaInfo final {
+    std::string name;
+    ie::TensorDesc desc;
+    size_t parentIndex;
+    std::vector<size_t> childrenIndices;
+};
+
+struct GraphMetaInfo final {
+    std::string graphName;
+    std::vector<StageMetaInfo> stagesMeta;
+    std::vector<DataMetaInfo> datasMeta;
 };
 
 VPU_DECLARE_ENUM(PerfReport,
@@ -19,7 +19,7 @@ namespace vpu {
 
 namespace ie = InferenceEngine;
 
-namespace impl {
+namespace details {
 
 inline void insertToContainer(std::vector<std::string>& cont, std::string&& val) {
     cont.emplace_back(val);
@@ -42,7 +42,7 @@ inline void insertToContainer(ie::details::caseless_set<std::string>& cont, std:
     cont.emplace(val);
 }
 
-}  // namespace impl
+}  // namespace details
 
 template <class Cont>
 void splitStringList(const std::string& str, Cont& out, char delim) {
@@ -59,7 +59,7 @@ void splitStringList(const std::string& str, Cont& out, char delim) {
             continue;
         }
 
-        impl::insertToContainer(out, std::move(elem));
+        details::insertToContainer(out, std::move(elem));
     }
 }
 
diff --git a/inference-engine/src/vpu/common/src/parsed_config_base.cpp b/inference-engine/src/vpu/common/src/parsed_config_base.cpp
new file mode 100644 (file)
index 0000000..4039677
--- /dev/null
@@ -0,0 +1,126 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/parsed_config_base.hpp>
+
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+#include <sstream>
+#include <string>
+#include <memory>
+#include <map>
+
+#include <cpp_interfaces/exception2status.hpp>
+#include <details/caseless.hpp>
+#include <ie_plugin_config.hpp>
+
+namespace vpu {
+namespace  {
+template<typename I, typename T, typename C>
+void check_input(const I &input, const T &options, const C &check) {
+    for (const auto& option : options) {
+        auto input_entry = input.find(option.first);
+        if (input_entry == input.end()) {
+            continue;
+        }
+
+        auto input_key = input_entry->first;
+        auto input_val = input_entry->second;
+        auto values = option.second;
+
+        if (!check(values, input_val)) {
+            THROW_IE_EXCEPTION << "Incorrect value " << "\"" << input_val << "\"" << " for key " << input_key;
+        }
+    }
+}
+
+}  // namespace
+
+ParsedConfigBase::ParsedConfigBase(ConfigMode configMode): _mode(configMode) {
+        _log = std::make_shared<Logger>("Config", LogLevel::Warning, consoleOutput());
+}
+
+void ParsedConfigBase::checkSupportedValues(
+        const std::unordered_map<std::string, std::unordered_set<std::string>> &supported,
+        const std::map<std::string, std::string> &config) const {
+    auto contains = [](const std::unordered_set<std::string> &supported, const std::string &option) {
+        return supported.find(option) != supported.end();
+    };
+
+    check_input(config, supported, contains);
+}
+
+void ParsedConfigBase::checkUnknownOptions(const std::map<std::string, std::string> &config) const {
+    auto knownOptions = getKnownOptions();
+    for (auto &&entry : config) {
+        if (knownOptions.find(entry.first) == knownOptions.end()) {
+            THROW_IE_EXCEPTION << NOT_FOUND_str << entry.first << " key is not supported for VPU";
+        }
+    }
+}
+
+void ParsedConfigBase::checkOptionsAccordingToMode(const std::map<std::string, std::string> &config) const {
+    auto compileOptions = getCompileOptions();
+    for (auto &&entry : config) {
+        std::stringstream errorMsgStream;
+        if (compileOptions.find(entry.first) != compileOptions.end() && _mode == ConfigMode::RUNTIME_MODE) {
+            _log->warning("%s option will be ignored. Seems you are using compiled graph", entry.first);
+        }
+    }
+}
+
+void ParsedConfigBase::checkInvalidValues(const std::map<std::string, std::string> &config) const {
+    const std::unordered_map<std::string, std::unordered_set<std::string>> supported_values = {
+        { CONFIG_KEY(LOG_LEVEL),
+          { CONFIG_VALUE(LOG_NONE), CONFIG_VALUE(LOG_WARNING), CONFIG_VALUE(LOG_INFO), CONFIG_VALUE(LOG_DEBUG) }},
+        { VPU_CONFIG_KEY(LOG_LEVEL),
+          { CONFIG_VALUE(LOG_NONE), CONFIG_VALUE(LOG_WARNING), CONFIG_VALUE(LOG_INFO), CONFIG_VALUE(LOG_DEBUG) }},
+        { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),   { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }}
+    };
+
+    checkSupportedValues(supported_values, config);
+}
+
+void ParsedConfigBase::configure(const std::map<std::string, std::string> &config) {
+    static const std::unordered_map<std::string, LogLevel> logLevels = {
+        { CONFIG_VALUE(LOG_NONE), LogLevel::None },
+        { CONFIG_VALUE(LOG_WARNING), LogLevel::Warning },
+        { CONFIG_VALUE(LOG_INFO), LogLevel::Info },
+        { CONFIG_VALUE(LOG_DEBUG), LogLevel::Debug }
+    };
+
+    setOption(hostLogLevel,   logLevels, config, CONFIG_KEY(LOG_LEVEL));
+    setOption(deviceLogLevel, logLevels, config, VPU_CONFIG_KEY(LOG_LEVEL));
+
+#ifndef NDEBUG
+    if (auto envVar = std::getenv("IE_VPU_LOG_LEVEL")) {
+        hostLogLevel = logLevels.at(envVar);
+    }
+#endif
+
+    static const std::unordered_map<std::string, bool> switches = {
+            { CONFIG_VALUE(YES), true },
+            { CONFIG_VALUE(NO), false }
+    };
+
+    setOption(exclusiveAsyncRequests, switches, config, CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS));
+}
+
+std::unordered_set<std::string> ParsedConfigBase::getRuntimeOptions() const {
+        return { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
+                 CONFIG_KEY(LOG_LEVEL),
+                 VPU_CONFIG_KEY(LOG_LEVEL)}; }
+
+std::unordered_set<std::string> ParsedConfigBase::getKnownOptions() const {
+    std::unordered_set<std::string> knownOptions;
+    auto compileOptions = getCompileOptions();
+    knownOptions.insert(compileOptions.begin(), compileOptions.end());
+
+    auto runtimeOptions = getRuntimeOptions();
+    knownOptions.insert(runtimeOptions.begin(), runtimeOptions.end());
+
+    return knownOptions;
+}
+}  // namespace vpu
@@ -2,23 +2,46 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <vector>
+#include <functional>
 #include <vpu/utils/ie_helpers.hpp>
 
 #include <precision_utils.h>
 #include <details/ie_exception.hpp>
 #include <blob_transform.hpp>
 #include <blob_factory.hpp>
+#include <ie_profiling.hpp>
 
 #include <vpu/utils/extra.hpp>
 #include <vpu/utils/numeric.hpp>
-#include <vpu/compile_env.hpp>
 
 namespace vpu {
 
-ie::Blob::Ptr getBlobFP16(const ie::Blob::Ptr& in) {
-    VPU_PROFILE(getBlobFP16);
+InferenceEngine::Layout deviceLayout(InferenceEngine::Layout const& layout,
+                                       vpu::LayoutPreference const& layoutPreference) {
+    using namespace InferenceEngine;
+    auto ChannelMajor = vpu::LayoutPreference::ChannelMajor;
+    auto ChannelMinor = vpu::LayoutPreference::ChannelMinor;
+
+    if (layoutPreference == ChannelMajor) {
+        if (layout == NHWC)
+            return NCHW;
+        if (layout == NDHWC)
+            return NCDHW;
+    }
+
+    if (layoutPreference == ChannelMinor) {
+        if (layout == NCHW)
+            return NHWC;
+        if (layout == NCDHW)
+            return NDHWC;
+    }
+
+    return layout;
+}
 
-    const auto& env = CompileEnv::get();
+ie::Blob::Ptr getBlobFP16(const ie::Blob::Ptr& in) {
+    IE_PROFILING_AUTO_SCOPE(getBlobFP16);
 
     auto inDesc = in->getTensorDesc();
 
@@ -27,7 +50,7 @@ ie::Blob::Ptr getBlobFP16(const ie::Blob::Ptr& in) {
     if (precision == ie::Precision::FP16)
         return in;
 
-    if (precision != ie::Precision::FP32 || !env.config.allowFP32Models) {
+    if (precision != ie::Precision::FP32) {
         VPU_THROW_EXCEPTION << "Unsupported precision " << precision.name();
     }
 
@@ -41,10 +64,6 @@ ie::Blob::Ptr getBlobFP16(const ie::Blob::Ptr& in) {
     return out;
 }
 
-ie::Blob::Ptr copyBlob(const ie::Blob::Ptr& in) {
-    return copyBlob(in, in->getTensorDesc().getLayout());
-}
-
 ie::Blob::Ptr copyBlob(const ie::Blob::Ptr& in, ie::Layout outLayout) {
     auto inDesc = in->getTensorDesc();
 
@@ -59,18 +78,33 @@ ie::Blob::Ptr copyBlob(const ie::Blob::Ptr& in, ie::Layout outLayout) {
 }
 
 void copyBlob(const ie::Blob::Ptr& in, const ie::Blob::Ptr& out) {
-    auto inLayout = in->getTensorDesc().getLayout();
-    auto outLayout = out->getTensorDesc().getLayout();
+    const auto inLayout = in->getTensorDesc().getLayout();
+    const auto outLayout = out->getTensorDesc().getLayout();
+
+    const auto& inDims = in->getTensorDesc().getDims();
+    const auto& outDims = out->getTensorDesc().getDims();
+
+    IE_ASSERT(inDims == outDims);
 
     if (inLayout != outLayout) {
-        IE_ASSERT(inLayout == ie::Layout::NCHW || inLayout == ie::Layout::NHWC);
-        IE_ASSERT(outLayout == ie::Layout::NCHW || outLayout == ie::Layout::NHWC);
+        if (outDims.size() == 4) {
+            IE_ASSERT(inLayout == ie::Layout::NCHW || inLayout == ie::Layout::NHWC);
+            IE_ASSERT(outLayout == ie::Layout::NCHW || outLayout == ie::Layout::NHWC);
+
+            if (outDims[1] != 1 && (outDims[2] != 1 || outDims[3] != 1)) {
+                ie::blob_copy(in, out);
+                return;
+            }
+        }
 
-        const auto& dims = out->getTensorDesc().getDims();
+        if (outDims.size() == 5) {
+            IE_ASSERT(inLayout == ie::Layout::NCDHW || inLayout == ie::Layout::NDHWC);
+            IE_ASSERT(outLayout == ie::Layout::NCDHW || outLayout == ie::Layout::NDHWC);
 
-        if ((dims[0] != 1 || dims[1] != 1) && (dims[2] != 1 || dims[3] != 1)) {
-            ie::blob_copy(in, out);
-            return;
+            if (outDims[1] != 1 && (outDims[2] != 1 || outDims[3] != 1 || outDims[4] != 1)) {
+                ie::blob_copy(in, out);
+                return;
+            }
         }
     }
 
@@ -55,7 +55,7 @@ std::map<std::string, ie::InferenceEngineProfileInfo> parsePerformanceReport(
         }
 
         if (perfReport == PerfReport::PerStage) {
-            outPerfMap[stageMeta.stageName] = profInfo;
+            outPerfMap[stageMeta.displayStageName] = profInfo;
         } else if (perfReport == PerfReport::PerLayer) {
             auto it = outPerfMap.find(stageMeta.layerName);
             if (it == outPerfMap.end()) {
diff --git a/inference-engine/src/vpu/custom_kernels/customLayerBindings.xml b/inference-engine/src/vpu/custom_kernels/customLayerBindings.xml
new file mode 100644 (file)
index 0000000..69ba1ef
--- /dev/null
@@ -0,0 +1,227 @@
+<!-- Should be enabled if support to detect required kernel based on tensor format is added -->
+<!-- <CustomLayer name="ReorgYolo" type="MVCL" version="1">
+    <Kernel entry="reorg">
+        <Source filename="reorg_hwc.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src"    type="input"  port-index="0"                format="BYXF"/>
+        <Tensor arg-name="out"    type="output" port-index="0"                format="BYXF"/>
+        <Scalar arg-name="w"      type="int"    port-index="0" source="I.X"                />
+        <Scalar arg-name="h"      type="int"    port-index="0" source="I.Y"                />
+        <Scalar arg-name="stride" type="int"                   source="stride"             />
+    </Parameters>
+    <WorkSizes dim="input,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
+</CustomLayer> -->
+
+<CustomLayer name="ReorgYolo" type="MVCL" version="1">
+    <Kernel entry="reorg_NCHW">
+        <Source filename="reorg_chw.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src"    type="input"  port-index="0"                 format="BFYX"/>
+        <Tensor arg-name="out"    type="output" port-index="0"                 format="BFYX"/>
+        <Scalar arg-name="W"      type="int"    port-index="0" source="I.X"                 />
+        <Scalar arg-name="H"      type="int"    port-index="0" source="I.Y"                 />
+        <Scalar arg-name="stride" type="int"                   source="stride"              />
+    </Parameters>
+    <WorkSizes dim="input,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
+</CustomLayer>
+
+<!-- The latest version of the code is not backward compatible with Yolo-v2, it is to be checked by benchmark-->
+<!-- Add work group config understand parameter from IR -->
+<!-- <WorkSizes global="Y*X,num,1" local="X,num,1" dim="input,0"/> -->
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+  <Where do_softmax="1" num="5"/>
+  <Kernel entry="region_ocl">
+        <Source filename="region_chw_m7_branch0.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src_data"    type="input"  port-index="0"                format="BFYX"/>
+        <Tensor arg-name="dst_data"    type="output" port-index="0"                format="ANY"/>
+        <Scalar arg-name="W"           type="int"    port-index="0" source="I.X"                />
+        <Scalar arg-name="H"           type="int"    port-index="0" source="I.Y"                />
+        <Scalar arg-name="classes"     type="int"                   source="classes"            />
+        <Scalar arg-name="coords"      type="int"                   source="coords"             />
+    </Parameters>
+    <WorkSizes global="Y*X,5,1" local="X,5,1" dim="input,0"/>
+</CustomLayer>
+
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+    <Where mask="0,1,2" do_softmax="0"/>
+    <Kernel entry="region_ocl">
+        <Source filename="region_chw_m7_branch1.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src_data"    type="input"  port-index="0"                format="BFYX"/>
+        <Tensor arg-name="dst_data"    type="output" port-index="0"                format="ANY"/>
+        <Scalar arg-name="W"           type="int"    port-index="0" source="I.X"                />
+        <Scalar arg-name="H"           type="int"    port-index="0" source="I.Y"                />
+        <Scalar arg-name="classes"     type="int"                   source="classes"            />
+        <Scalar arg-name="coords"      type="int"                   source="coords"             />
+    </Parameters>
+    <WorkSizes global="Y*X,3,1" local="X,3,1" dim="input,0"/>
+</CustomLayer>
+
+<!-- Pixel-wise kernel binding, local work group config is per line in the input tensor  -->
+<CustomLayer name="GRN" type="MVCL" version="1">
+    <Kernel entry="grn_NCHW">
+        <Source filename="grn.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src_data" type="input"  port-index="0"               format="BFYX"/>
+        <Tensor arg-name="dst_data" type="output" port-index="0"               format="BFYX"/>
+        <Scalar arg-name="C"        type="int"    port-index="0" source="I.F"               />
+        <Scalar arg-name="bias"     type="float"                 source="bias"              />
+    </Parameters>
+    <WorkSizes dim="input,0" global="X,Y,1" local="X,1,1"/>
+</CustomLayer>
+
+<!-- Two stage layer binding, first kernel computes mean and variance, the second one normalizes input tensor-->
+<CustomLayer name="MVN" stage="0" type="MVCL" version="1">
+    <Kernel entry="reduction_mean">
+        <Source filename="mvn.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src"                type="input"         port-index="0"                          format="BFYX"/>
+        <Tensor arg-name="mean"               type="output_buffer" port-index="0" dim="output,0"           size="Y*F*4" />
+        <Tensor arg-name="variance"           type="output_buffer" port-index="1" dim="output,0"           size="Y*F*4" />
+        <Scalar arg-name="W"                  type="int"           port-index="0" source="I.X"                          />
+        <Scalar arg-name="H"                  type="int"           port-index="0" source="I.Y"                          />
+        <Scalar arg-name="across_channels"    type="int"                          source="across_channels"              />
+    </Parameters>
+    <WorkSizes dim="output,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
+</CustomLayer>
+<CustomLayer name="MVN" stage="1" type="MVCL" version="1">
+    <Kernel entry="mvn_scale">
+        <Source filename="mvn.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src_data"           type="input"        port-index="0"                            format="BFYX"/>
+        <Tensor arg-name="dst_data"           type="output"       port-index="0"                            format="BFYX"/>
+        <Tensor arg-name="mean_part"          type="input_buffer" port-index="0" dim="output,0"             size="Y*F*4" />
+        <Tensor arg-name="power_mean"         type="input_buffer" port-index="1" dim="output,0"             size="Y*F*4" />
+        <Scalar arg-name="W"                  type="int"          port-index="0" source="I.X"                            />
+        <Scalar arg-name="H"                  type="int"          port-index="0" source="I.Y"                            />
+        <Scalar arg-name="across_channels"    type="int"                         source="across_channels"                />
+        <Scalar arg-name="normalize_variance" type="int"                         source="normalize_variance"             />
+    </Parameters>
+    <WorkSizes dim="output,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
+</CustomLayer>
+
+<!-- Single work group kernel for not embarrassingly-parallel use-case -->
+<CustomLayer name="CTCGreedyDecoder" type="MVCL" version="1" max-shaves="1">
+    <Kernel entry="ctc_ref_fp16">
+        <Source filename="ctc.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="probabilities"    type="input"  port-index="0" format="FYX"  />
+        <Tensor arg-name="seq_ind"          type="input"  port-index="1" format="ANY"  />
+        <Tensor arg-name="output_sequences" type="output" port-index="0" format="BFYX" />
+        <Scalar arg-name="H"                type="int"    port-index="0" source="I.Y"  />
+        <Scalar arg-name="W"                type="int"    port-index="0" source="I.X"  />
+        <Scalar arg-name="C"                type="int"    port-index="0" source="I.F"  />
+    </Parameters>
+    <WorkSizes dim="output,0" global="1,1,1" local="1,1,1"/>
+</CustomLayer>
+
+<CustomLayer name="ShuffleChannel" type="MVCL" version="1">
+    <!-- artificially added where closure for testing reasons, kernel itself supports arbitrary grouping -->
+    <!-- <Where group="2"/> -->
+    <Kernel entry="ShuffleChannel">
+        <Source filename="shuffle_channels.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src_data" type="input"  port-index="0"               format="BFYX"/>
+        <Tensor arg-name="dst_data" type="output" port-index="0"               format="BFYX"/>
+        <Scalar arg-name="C"        type="int"    port-index="0" source="I.F"               />
+        <Scalar arg-name="H"        type="int"    port-index="0" source="I.Y"               />
+        <Scalar arg-name="W"        type="int"    port-index="0" source="I.X"               />
+        <Scalar arg-name="G"        type="int"                   source="group"             />
+    </Parameters>
+    <WorkSizes dim="input,0" global="F,1,1" local="1,1,1"/>
+</CustomLayer>
+
+<!-- Reference version of generic quantize layer, should be changed to FakeQuantize-->
+<CustomLayer name="Quantize" type="MVCL" version="1">
+    <!-- <Where levels="2"/> -->
+    <Kernel entry="quantize">
+        <Source filename="binary_layers.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src"              type="input"  port-index="0"                  format="BFYX"/>
+        <Tensor arg-name="input_low"        type="input"  port-index="1"                  format="ANY" />
+        <Tensor arg-name="input_high"       type="input"  port-index="2"                  format="ANY" />
+        <Tensor arg-name="output_low"       type="input"  port-index="3"                  format="ANY" />
+        <Tensor arg-name="output_high"      type="input"  port-index="4"                  format="ANY" />
+        <Tensor arg-name="dst"              type="output" port-index="0"                  format="BFYX"/>
+        <Scalar arg-name="levels"           type="int"                   source="levels"               />
+        <Scalar arg-name="input_low_size"   type="int"    port-index="1" source="I.F"                  />
+        <Scalar arg-name="input_high_size"  type="int"    port-index="2" source="I.F"                  />
+        <Scalar arg-name="output_low_size"  type="int"    port-index="3" source="I.F"                  />
+        <Scalar arg-name="output_high_size" type="int"    port-index="4" source="I.F"                  />
+    </Parameters>
+    <WorkSizes dim="input,0" global="X,Y,F" local="1,1,1"/>
+</CustomLayer>
+
+<!-- Reference version of generic quantize binary convolution -->
+<!-- An example of a kernel binding that uses data blob from IR -->
+<CustomLayer name="BinaryConvolution" type="MVCL" version="1">
+    <Kernel entry="binary_convolution">
+        <Source filename="binary_layers.bin"/>
+    </Kernel>
+    <Parameters>
+        <Tensor arg-name="src_data"      type="input"   port-index="0"                      format="BFYX"/>
+        <Data   arg-name="weights_data"  type="data"                     source="weights"   format="ANY" />
+        <Tensor arg-name="dst_data"      type="output"  port-index="0"                      format="BFYX"/>
+        <Scalar arg-name="pad_value"     type="float"                    source="pad_value"              />
+        <Scalar arg-name="IW"            type="int"     port-index="0"   source="I.X"                    />
+        <Scalar arg-name="IH"            type="int"     port-index="0"   source="I.Y"                    />
+        <Scalar arg-name="IC"            type="int"     port-index="0"   source="I.F"                    />
+        <Scalar arg-name="DW"            type="int"     port-index="0"   source="dilations"              />
+        <Scalar arg-name="DH"            type="int"     port-index="1"   source="dilations"              />
+        <Scalar arg-name="GC"            type="int"                      source="group"                  />
+        <Scalar arg-name="KW"            type="int"     port-index="0"   source="kernel"                 />
+        <Scalar arg-name="KH"            type="int"     port-index="1"   source="kernel"                 />
+        <Scalar arg-name="PW"            type="int"     port-index="0"   source="pads_begin"             />
+        <Scalar arg-name="PH"            type="int"     port-index="1"   source="pads_begin"             />
+        <Scalar arg-name="SW"            type="int"     port-index="0"   source="strides"                />
+        <Scalar arg-name="SH"            type="int"     port-index="1"   source="strides"                />
+    </Parameters>
+    <WorkSizes dim="output,0" global="X,Y,F" local="1,1,1"/>
+</CustomLayer>
+
+<CustomLayer name="Resample" type="MVCL" version="1">
+   <Where antialias="0" />
+ <Kernel entry="resample_nearest">
+   <Source filename="resample_nn.bin" />
+ </Kernel>
+ <Parameters>
+   <Tensor arg-name="src"      type="input"  port-index="0" format="BFYX"   />
+   <Tensor arg-name="dst"      type="output" port-index="0" format="BFYX"   />
+   <Scalar arg-name="iw"       type="int"    port-index="0" source="I.X"    />
+   <Scalar arg-name="ih"       type="int"    port-index="0" source="I.Y"    />
+   <Scalar arg-name="factor"   type="float"  port-index="0" source="factor" />
+   <Scalar arg-name="ow"       type="int"    port-index="0" source="O.X"    />
+   <Scalar arg-name="oh"       type="int"    port-index="0" source="O.Y"    />
+   <Scalar arg-name="channels" type="int"    port-index="0" source="I.F"    />
+ </Parameters>
+ <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+</CustomLayer>
+
+<CustomLayer name="Resample" type="MVCL" version="1">
+   <Where antialias="1"/>
+<Kernel entry="resample_with_antialias">
+  <Source filename="resample_with_antialias.bin" />
+</Kernel>
+<Parameters>
+  <Tensor arg-name="src"      type="input"  port-index="0" format="BFYX"/>
+  <Tensor arg-name="dst"      type="output" port-index="0" format="BFYX"/>
+  <Scalar arg-name="iw"       type="int"    port-index="0" source="I.X" />
+  <Scalar arg-name="ih"       type="int"    port-index="0" source="I.Y" />
+  <Scalar arg-name="factor"   type="float"  port-index="0" source="factor"  />
+  <Scalar arg-name="ow"       type="int"    port-index="0" source="O.X" />
+  <Scalar arg-name="oh"       type="int"    port-index="0" source="O.Y" />
+  <Scalar arg-name="channels" type="int"    port-index="0" source="I.F" />
+</Parameters>
+<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+</CustomLayer>
diff --git a/inference-engine/src/vpu/custom_kernels/region_chw.cl b/inference-engine/src/vpu/custom_kernels/region_chw.cl
new file mode 100644 (file)
index 0000000..2aae3a6
--- /dev/null
@@ -0,0 +1,85 @@
+// Copyright (C) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define NUM_CLASSES 80
+
+static void logistic_activate(__global const half* restrict src_data,
+                              __global       half* restrict dst_data,
+                              int offset)
+{
+    half val = src_data[offset];
+    val = 1.0f/(1.0f + half_exp(-val));
+    dst_data[offset] = val;
+}
+
+__kernel void region_ocl(__global const half* restrict src_data,
+                         __global       half* restrict dst_data,
+                         int W,
+                         int H,
+                         int classes,
+                         int coords,
+                         int num,
+                         int maskSize,
+                         int doSoftmax)
+{
+    int box_sz = H * W * (classes + coords + 1);
+    int pixel_pos = Â min((int)get_global_id(0), H*W);
+    int box = get_global_id(1);
+
+    //if (pixel_pos >= H*W) return;
+
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 0*H*W);
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 1*H*W);
+
+    //copy plane 2 and 3
+    dst_data[box * box_sz + pixel_pos + 2*H*W] = src_data[box * box_sz + pixel_pos + 2*H*W];
+    dst_data[box * box_sz + pixel_pos + 3*H*W] = src_data[box * box_sz + pixel_pos + 3*H*W];
+
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 4*H*W);
+
+    int data_offset =  box * box_sz + (coords + 1) * W * H;
+
+    __private half data[NUM_CLASSES];
+
+    if (doSoftmax) {
+        half max_val = src_data[data_offset + 0*H*W + pixel_pos];
+        for (int c = 0; c < classes; c++) {
+            half tmp = src_data[data_offset + c*H*W + pixel_pos];
+            data[c] = tmp;
+            max_val = max( max_val, tmp);
+        }
+
+        half expSum = 0.0f;
+
+        for (int c = 0; c < classes; c++) {
+            half tmp = half_exp(data[c] - max_val);
+            data[c] = tmp;
+            expSum += tmp;
+        }
+        for (int c = 0; c < classes; c++) {
+            data[c] = data[c] / expSum;
+        }
+
+        for (int c = 0; c < classes; c++) {
+            dst_data[data_offset + c*H*W + pixel_pos + 0] = data[c];
+        }
+    }
+    else {
+        for (int i = 0;  i < classes; i++) {
+            logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + (5 + i)*H*W);
+        }
+    }
+}
diff --git a/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch0.cl b/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch0.cl
new file mode 100644 (file)
index 0000000..4a8b3f0
--- /dev/null
@@ -0,0 +1,68 @@
+// Copyright (C) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define NUM_CLASSES 80
+
+static void logistic_activate(__global const half* restrict src_data,
+                              __global       half* restrict dst_data,
+                              int offset)
+{
+    half val = src_data[offset];
+    val = 1.0f/(1.0f + native_exp(-val));
+    dst_data[offset] = val;
+}
+
+__kernel void region_ocl(__global const half* restrict src_data,
+                         __global       half* restrict dst_data,
+                         int W,
+                         int H,
+                         int classes,
+                         int coords)
+{
+    const int box_sz = H * W * (classes + coords + 1);
+    const int pixel_pos = min((int)get_global_id(0), ((H*W) - 1));
+    const int box = get_global_id(1);
+
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 0*H*W);
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 1*H*W);
+
+    //copy plane 2 and 3
+    dst_data[box * box_sz + pixel_pos + 2*H*W] = src_data[box * box_sz + pixel_pos + 2*H*W];
+    dst_data[box * box_sz + pixel_pos + 3*H*W] = src_data[box * box_sz + pixel_pos + 3*H*W];
+
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 4*H*W);
+    int data_offset =  box * box_sz + (coords + 1) * W * H;
+
+    __private half data[NUM_CLASSES];
+
+    half max_val = src_data[data_offset + 0*H*W + pixel_pos];
+    for (int c = 0; c < classes; c++) {
+        half tmp = src_data[data_offset + c*H*W + pixel_pos];
+        data[c] = tmp;
+        max_val = max( max_val, tmp);
+    }
+
+    half expSum = 0.0f;
+
+    for (int c = 0; c < classes; c++) {
+        half tmp = half_exp(data[c] - max_val);
+        data[c] = tmp;
+        expSum += tmp;
+    }
+    for (int c = 0; c < classes; c++) {
+        dst_data[data_offset + c*H*W + pixel_pos + 0] = data[c] / expSum;
+    }
+}
diff --git a/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch1.cl b/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch1.cl
new file mode 100644 (file)
index 0000000..059e3dd
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright (C) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define NUM_CLASSES 80
+
+static void logistic_activate(__global const half* restrict src_data,
+                              __global       half* restrict dst_data,
+                              int offset)
+{
+    half val = src_data[offset];
+    val = 1.0f/(1.0f + native_exp(-val));
+    dst_data[offset] = val;
+}
+
+__kernel void region_ocl(__global const half* restrict src_data,
+                         __global       half* restrict dst_data,
+                         int W,
+                         int H,
+                         int classes,
+                         int coords)
+{
+    int box_sz = H * W * (classes + coords + 1);
+    int pixel_pos = min((int)get_global_id(0), ((H*W) - 1));
+    int box = get_global_id(1);
+
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 0*H*W);
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 1*H*W);
+
+    //copy plane 2 and 3
+    dst_data[box * box_sz + pixel_pos + 2*H*W] = src_data[box * box_sz + pixel_pos + 2*H*W];
+    dst_data[box * box_sz + pixel_pos + 3*H*W] = src_data[box * box_sz + pixel_pos + 3*H*W];
+
+    logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + 4*H*W);
+
+    int data_offset =  box * box_sz + (coords + 1) * W * H;
+
+    for (int i = 0;  i < classes; i++) {
+        logistic_activate(src_data, dst_data, box * box_sz + pixel_pos + (5 + i)*H*W);
+    }
+}
diff --git a/inference-engine/src/vpu/custom_kernels/reorg_hwc.cl b/inference-engine/src/vpu/custom_kernels/reorg_hwc.cl
new file mode 100644 (file)
index 0000000..9d0d475
--- /dev/null
@@ -0,0 +1,64 @@
+// Copyright (C) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define MIN(v1, v2) ((v1) < (v2) ? (v1) : (v2))
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+__kernel void reorg(__global half* restrict src,
+                    __global half* restrict out,
+                    int h,
+                    int w,
+                    int stride)
+{
+    int j = MIN(get_global_id(0), h-1);
+
+    int k = get_global_id(1);
+    int c = get_global_size(1);
+
+    int out_c = c / (stride * stride);
+    int oc    = c * (stride * stride);
+    int oh    = h / stride;
+    int ow    = w / stride;
+
+    int in_index = w * (j + h*k);
+
+    int new_z = in_index / (oh*ow);
+    int new_y = (in_index %(oh*ow)) / ow;
+    int new_x = (in_index %(oh*ow)) % ow;
+    int new_index = new_z + new_x * oc + new_y * oc * ow;
+
+    in_index++;
+
+    int c2 = k % out_c;
+    int offset = k / out_c;
+    int w2 = 0 * stride + offset % stride;
+    int h2 = j * stride + offset / stride;
+    int out_index = w2 + w * stride * (h2 + h * stride * c2);
+
+    for (int i = 0; i < w; ++i, out_index+=stride, in_index++)
+    {
+        // repacking coordinates
+        int k0 =  out_index / (h*w);
+        int j0 = (out_index % (h*w)) / w;
+        int i0 = (out_index % (h*w)) % w;
+        int out_index_repack = k0 + c * i0 + c * w * j0;
+        out[new_index] = src[out_index_repack];
+
+        int new_z =  in_index / (oh*ow);
+        int new_y = (in_index %(oh*ow)) / ow;
+        int new_x = (in_index %(oh*ow)) % ow;
+        new_index = new_z + new_x * oc + new_y * oc * ow;
+    }
+}
@@ -18,8 +18,7 @@ kernel void resample_nearest(__global const half* restrict src,
                              __global       half* restrict dst,
                              int iw,
                              int ih,
-                             float fx,
-                             float fy,
+                             float factor,
                              int ow,
                              int oh,
                              int channels)
@@ -28,6 +27,9 @@ kernel void resample_nearest(__global const half* restrict src,
     int c = get_global_id(1);
     int b = get_global_id(2);
 
+    float fx = 1.f / factor;
+    float fy = 1.f / factor;
+
     __global const half* start_src = src + b * iw * ih * channels + iw * ih * c;
     __global       half* start_dst = dst + b * ow * oh * channels + ow * oh * c;
 
diff --git a/inference-engine/src/vpu/custom_kernels/resample_with_antialias.cl b/inference-engine/src/vpu/custom_kernels/resample_with_antialias.cl
new file mode 100644 (file)
index 0000000..618c84e
--- /dev/null
@@ -0,0 +1,75 @@
+// Copyright (C) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+static inline float triangleCoeff(float x)
+{
+    return 1.0f - fabs(x);//fmax(0.0f, 1 - fabs(x));
+}
+__kernel void resample_with_antialias(const __global half* restrict src,
+                                      __global half* restrict dst,
+                                      int iw,
+                                      int ih,
+                                      float factor,
+                                      int ow,
+                                      int oh,
+                                      int channels)
+{
+    int oy = min((int)get_global_id(0), oh-1);
+    int c = get_global_id(1);
+    int b = get_global_id(2);
+
+    float fx = 1.f / factor;
+    float fy = 1.f / factor;
+
+    float ax = 1.0f / fx;
+    float ay = 1.0f / fy;
+
+    int rx = (fx < 1.0f) ? 2 : ceil((1.0f)/ax);
+    int ry = (fy < 1.0f) ? 2 : ceil((1.0f)/ay);
+
+    const __global half* restrict start_src = src + b * iw * ih * channels + iw * ih * c;
+    __global half* restrict start_dst = dst + b * ow * oh * channels + ow * oh * c;
+
+    for (int ox = 0; ox < ow; ox++)
+    {
+        float ix_r0 = ox*fx + fx / 2.0f - 0.5f;
+        float iy_r0 = oy*fy + fy / 2.0f - 0.5f;
+        int ix_r1 = (int)(round(ix_r0));
+        int iy_r1 = (int)(round(iy_r0));
+
+        float wsum = 0.f;
+        float sum = 0.f;
+
+        for (int y = iy_r1 - ry; y <= iy_r1 + ry; y++)
+        {
+            for (int x = ix_r1 - rx; x <= ix_r1 + rx; x++)
+            {
+                if (y < 0 || x < 0) continue;
+                if (y >= (int)ih || x >= (int)iw) continue;
+
+                float dx = ix_r0 - x;
+                float dy = iy_r0 - y;
+
+                float w = ax*triangleCoeff(ax*dx) * ay*triangleCoeff(ay*dy);
+
+                sum += w * start_src[y*iw + x];
+                wsum += w;
+            }
+        }
+
+        start_dst[oy*ow + ox] = (!wsum) ? (half)0.0f : (half)(sum / wsum);
+    }
+}
index 80cbcba..879d583 100644 (file)
@@ -21,6 +21,7 @@ endif()
 target_include_directories(${TARGET_NAME}
     PUBLIC
         "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
 target_include_directories(${TARGET_NAME}
     SYSTEM PUBLIC
         "${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src"
@@ -28,7 +29,7 @@ target_include_directories(${TARGET_NAME}
         "${IE_MAIN_SOURCE_DIR}/src/inference_engine"
         "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/mvnc/include")
 
-target_link_libraries(${TARGET_NAME} PUBLIC pugixml)
+target_link_libraries(${TARGET_NAME} PUBLIC pugixml vpu_common_lib)
 
 if(WIN32)
     target_compile_definitions(${TARGET_NAME} PRIVATE NOMINMAX)
@@ -52,6 +53,8 @@ if (WIN32)
     target_include_directories(${TARGET_NAME}_test_static SYSTEM PUBLIC ${target_includes})
 
     set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)
+
+    target_link_libraries(${TARGET_NAME}_test_static PUBLIC vpu_common_lib)
 else()
     add_library(${TARGET_NAME}_test_static ALIAS ${TARGET_NAME})
 endif()
index f7b5cab..123ec2b 100644 (file)
@@ -41,7 +41,7 @@ private:
     void getMetaData(
             const Model::Ptr& model,
             const std::vector<ie::CNNLayerPtr>& allLayers,
-            std::vector<StageMetaInfo>& metaData);
+            GraphMetaInfo& graphMetaData);
 
     void extractDataInfo(
             const Model::Ptr& model,
index 7d13751..28a784d 100644 (file)
@@ -23,10 +23,12 @@ namespace vpu {
 namespace ie = InferenceEngine;
 
 VPU_DECLARE_ENUM(CustomDataFormat,
-    BYXF = 0,  // HWC used in most software layers
-    BFYX = 1,  // CHW used if HW module is enabled
-    Any  = 2,  // doesn't really matter
-    None = 3
+    BYXF = 0,  // NHWC used in most software layers
+    BFYX = 1,  // NCHW used if HW module is enabled
+    YXF  = 2,  // HWC used in most software layers
+    FYX  = 3,  // CHW used if HW module is enabled
+    Any  = 4,  // doesn't really matter
+    None = 5
 )
 
 VPU_DECLARE_ENUM(CustomParamType,
index d3521e7..6cc4eb5 100644 (file)
@@ -118,9 +118,13 @@ public:
     void parseRNN(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
     void parseGEMM(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
     void parseLog(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
+    void parseExp(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
     void parseReverseSequence(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
     void parseGather(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
     void parseReduce(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
+    void parseFloor(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
+    void parseTopK(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
+    void parseSelect(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
 
     //
     // Special layers
@@ -131,6 +135,7 @@ public:
     void parseReshape(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
     void parseConcat(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
     void parseSplit(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
+    void parseStridedSlice(const Model::Ptr& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
 
 //
 // Utility
@@ -162,6 +167,8 @@ private:
 
     ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> _customLayers;
     ie::details::caseless_map<std::string, vpu::Data> kernelNodes;
+    std::unordered_map<ie::Blob::Ptr, vpu::Data> lstmWeights;
+    std::unordered_map<ie::Blob::Ptr, vpu::Data> lstmBiases;
     vpu::IeNetworkParser _ieNetworkParser;
 };
 
index bb0bcc8..bfba1cc 100644 (file)
@@ -150,7 +150,7 @@ public:
             const Data& biases,
             Data output);
 
-    Stage addBroadcastStage(
+    Stage addExpandStage(
             const Model::Ptr& model,
             const std::string& name,
             const ie::CNNLayerPtr& layer,
@@ -191,9 +191,7 @@ public:
             float beta,
             bool transposeA,
             bool transposeB,
-            const Data& inputA,
-            const Data& inputB,
-            const Data& inputC,
+            const DataVector& inputs,
             const Data& output);
 
 
@@ -210,9 +208,9 @@ public:
             const Model::Ptr& model,
             const std::string& name,
             const ie::CNNLayerPtr& layer,
-            const DataVector& input,
-            const DataVector& output,
-            const SmallVector<int, MAX_DIMS_64>& ieOrder);
+            const Data& input,
+            const Data& output,
+            const DimValues_<Dim>& permutation);
 };
 
 }  // namespace vpu
index e345118..ad71f09 100644 (file)
@@ -7,6 +7,7 @@
 #include <cstdint>
 
 #include <string>
+#include <map>
 #include <vector>
 #include <memory>
 #include <unordered_map>
@@ -44,7 +45,9 @@ VPU_DECLARE_ENUM(ExecutionMode,
 VPU_DECLARE_ENUM(ComputeLayout,
     AUTO,
     NCHW,
-    NHWC
+    NHWC,
+    NCDHW,
+    NDHWC
 )
 
 struct CompilationConfig final {
@@ -73,8 +76,6 @@ struct CompilationConfig final {
 
     bool detectBatch = true;
 
-    bool allowFP32Models = false;
-
     std::string hwWhiteList;
     std::string hwBlackList;
 
@@ -96,6 +97,7 @@ struct CompilationConfig final {
     float inputBias = 0.0f;
 
     bool hwDilation = false;
+    std::map<std::string, std::vector<int>> ioStrides;
 };
 
 
@@ -122,7 +124,7 @@ struct CompiledGraph final {
 
     int networkBatch = 0;
 
-    std::vector<StageMetaInfo> stagesMeta;
+    GraphMetaInfo graphMeta;
     int numActiveStages = 0;
 
     DataInfo inputInfo;
index f090e33..a2f292f 100644 (file)
@@ -16,15 +16,16 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override;
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override;
 
-    void propagateDataOrderImpl() const override;
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override;
 
-    void getDataStridesRequirementsImpl() const override;
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override;
 
     void finalizeDataLayoutImpl() override;
 
-    void getBatchSupportInfoImpl() const override;
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override;
 
     void finalCheckImpl() const override;
 
index c12613c..42ee16f 100644 (file)
@@ -38,9 +38,10 @@ const int CNN_MAX_INPUT_CHANNELS = 2048;
 const int CNN_MAX_OUTPUT_CHANNELS = 2048;
 
 const int CNN_MAX_BYTES = 128 * 1024;
-const int CNN_MAX_CHANNELS_PER_BLOCK = 2048;
 const int CNN_MAX_COEFF_PER_BLOCK = 256;
 
+const int CMX_DATA_BYTE_WIDTH = 16;
+
 //
 // Tiling scheme
 //
@@ -222,19 +223,38 @@ SmallVector<HwPlaneTileInfo> splitIntoPlaneTilesWithPool(
         int pad,
         int maxOutputSize);
 
+// Due to possible junk may return more tiles than requested (1) (O -> I)
 SmallVector<HwPlaneTileInfo> splitIntoPlaneTiles(
         int inputSize, int outputSize,
         int kernelSize, int kernelStride,
         int padBefore, int padAfter,
+        // max size of output tile with junk included
         int maxOutputSize,
-        bool alignInputTile,
         bool useCeil);
 
 //
+// Check HW-unit memory restrictions for tile.
+//
+
+bool checkPoolingHWRestrictions(
+    int inTileWidth, int inTileHeight,
+    int inTileChannels, int outTileChannels,
+    int kernelSizeX, int kernelSizeY,
+    int kernelStride);
+
+bool checkConvHWRestrictions(
+    int inTileWidth, int inTileHeight,
+    int inTileChannels, int outTileChannels,
+    int kernelSizeX, int kernelSizeY,
+    int kernelStride,
+    HwOpMode mode);
+
+//
 // HW Convolution tiling over output channels.
 //
 
 // This function tries to split the output over channels.
+// split OC is invoked at the very end (3)
 HwConvTileInfo splitHwConvIntoOutChannelsTiles(
         int inTileWidth, int inTileHeight, int inTileChannels,
         int outTileChannels,
index 7253a80..0c26454 100644 (file)
@@ -263,7 +263,10 @@ public:
 
     bool checkStrides(const StridesRequirement& reqs) const;
 
-    inline void resetRequiredStrides() { _requiredStrides = StridesRequirement::empty(); }
+    inline void resetRequiredStrides() {
+        _requiredStrides = StridesRequirement::empty();
+    }
+
     void updateRequiredStrides(const StridesRequirement& newReqs);
 
     //
@@ -306,7 +309,7 @@ public:
             const Stage& stage,
             BlobSerializer& serializer,
             DimsOrder newOrder = DimsOrder(),
-            const EnumMap<Dim, SmallVector<Dim, MAX_DIMS_64>>& dimsReloc = EnumMap<Dim, SmallVector<Dim, MAX_DIMS_64>>());
+            const EnumMap<Dim, DimVector>& dimsReloc = EnumMap<Dim, DimVector>());
 
     void serializeOldBufferNC(
             const Stage& stage,
index 19b6175..86b7102 100644 (file)
@@ -15,7 +15,6 @@
 
 #include <ie_layouts.h>
 
-#include <vpu/model/base.hpp>
 #include <vpu/utils/enums.hpp>
 #include <vpu/utils/io.hpp>
 #include <vpu/utils/dot_io.hpp>
@@ -44,11 +43,29 @@ namespace ie = InferenceEngine;
 VPU_DECLARE_ENUM(DataType,
     FP16 = 0,
     U8 = 1,
-//     S32 = 2,  // TODO: remove from MvTensor
+    S32 = 2,
     FP32 = 3,
     I8 = 4
 )
 
+DataType fromIEPrecision(const InferenceEngine::Precision& precision);
+
+//
+// StorageOrder
+//
+
+//
+// Types that are used to store order permutation in packed format.
+//
+
+using StorageOrder64 = uint64_t;
+using StorageOrder32 = uint32_t;
+
+// High-order digit excluded.
+const int MAX_DIMS_64 = std::numeric_limits<StorageOrder64>::digits / 4 - 1;
+
+const int MAX_DIMS_32 = std::numeric_limits<StorageOrder32>::digits / 4;
+
 //
 // Dim
 //
@@ -63,27 +80,15 @@ VPU_DECLARE_ENUM(Dim,
     H = 1,
     C = 2,
     N = 3,
-    _5 = 4,
-    _6 = 5,
-    _7 = 6,
-    _8 = 7
+    D = 4
 )
 
-//
-// StorageOrder
-//
+// TODO: identify casts like static_cast<int>(Dim),
+//       and replace all with calling this function
+// JIRA: #21163
+int dimToIeInd(vpu::Dim const& dim, int numDims);
 
-//
-// Types that are used to store order permutation in packed format.
-//
-
-using StorageOrder64 = uint64_t;
-using StorageOrder32 = uint32_t;
-
-// High-order digit excluded.
-const int MAX_DIMS_64 = std::numeric_limits<StorageOrder64>::digits / 4 - 1;
-
-const int MAX_DIMS_32 = std::numeric_limits<StorageOrder32>::digits / 4;
+using DimVector = SmallVector<Dim, MAX_DIMS_64>;
 
 //
 // DimValues
@@ -252,29 +257,29 @@ public:
         auto ind = static_cast<int32_t>(d);
         IE_ASSERT(ind >= 0 && ind < MAX_DIMS_64);
 
-        return _flags[ind];
+        return _flags[static_cast<size_t>(ind)];
     }
 
     const T& operator[](Dim d) const {
         auto ind = static_cast<int32_t>(d);
         IE_ASSERT(ind >= 0 && ind < MAX_DIMS_64);
-        IE_ASSERT(_flags[ind]);
+        IE_ASSERT(_flags[static_cast<size_t>(ind)]);
 
-        return _values[ind].second;
+        return _values[static_cast<size_t>(ind)].second;
     }
     const T& get(Dim d, const T& def) const {
         auto ind = static_cast<int32_t>(d);
         IE_ASSERT(ind >= 0 && ind < MAX_DIMS_64);
 
-        return _flags[ind] ? _values[ind].second : def;
+        return _flags[static_cast<size_t>(ind)] ? _values[static_cast<size_t>(ind)].second : def;
     }
 
     void set(Dim d, const T& val) {
         auto ind = static_cast<int32_t>(d);
         IE_ASSERT(ind >= 0 && ind < MAX_DIMS_64);
 
-        if (!_flags[ind]) {
-            _flags[ind] = true;
+        if (!_flags[static_cast<size_t>(ind)]) {
+            _flags[static_cast<size_t>(ind)] = true;
             ++_size;
         }
 
@@ -352,6 +357,12 @@ private:
 };
 
 template <typename T>
+std::ostream& operator<<(std::ostream& o, const DimValues_<T>& dimValues) {
+    dimValues.printTo(o);
+    return o;
+}
+
+template <typename T>
 void printTo(std::ostream& os, const DimValues_<T>& dims) {
     dims.printTo(os);
 }
@@ -378,6 +389,8 @@ public:
     static DimsOrder NCHW;
     static DimsOrder NHWC;
     static DimsOrder NHCW;
+    static DimsOrder NCDHW;
+    static DimsOrder NDHWC;
 
     //
     // Constructor
@@ -386,7 +399,8 @@ public:
     DimsOrder() = default;
     static DimsOrder fromCode(StorageOrder64 code);
     static DimsOrder fromNumDims(int numDims);
-    static DimsOrder fromPermutation(const SmallVector<Dim, MAX_DIMS_64>& perm);
+    static DimsOrder fromPermutation(const DimVector& perm);
+    static DimsOrder fromLayout(ie::Layout const& layout);
 
     //
     // Accessors
@@ -518,6 +532,12 @@ public:
 
     void reorder(DimsOrder dimsOrder);
 
+    //
+    // Export
+    //
+
+    ie::TensorDesc toTensorDesc() const;
+
 private:
     DataType _type = DataType::FP16;
     DimsOrder _dimsOrder;
@@ -534,7 +554,8 @@ void printTo(DotLabel& lbl, const DataDesc& desc);
 VPU_DECLARE_ENUM(DimStride,
     Any,
     Compact,
-    Aligned
+    Aligned,
+    Fixed
 )
 
 const int STRIDE_ALIGNMENT = 16;
@@ -553,22 +574,23 @@ public:
 
     static StridesRequirement empty() { return StridesRequirement().add(0, DimStride::Any); }
     static StridesRequirement compact();
+    static StridesRequirement fixed(const std::vector<int>& strides, const DataDesc& desc);
 
     StridesRequirement& add(int index, DimStride stride) {
         IE_ASSERT(index >= 0 && index < MAX_DIMS_64);
-        _map[index] = stride;
+        _map[static_cast<size_t>(index)] = stride;
         return *this;
     }
 
     StridesRequirement& remove(int index) {
         IE_ASSERT(index >= 0 && index < MAX_DIMS_64);
-        _map[index] = DimStride::Any;
+        _map[static_cast<size_t>(index)] = DimStride::Any;
         return *this;
     }
 
     DimStride get(int index) const {
         IE_ASSERT(index >= 0 && index < MAX_DIMS_64);
-        return _map[index];
+        return _map[static_cast<size_t>(index)];
     }
 
     bool operator==(const StridesRequirement& other) const {
@@ -578,8 +600,13 @@ public:
         return (_map != other._map);
     }
 
+    const DimValues& fixedStrides() const { return _fixedStrides; }
+
+    int getFixedStride(Dim d) const { return _fixedStrides[d]; }
+
 private:
     std::array<DimStride, MAX_DIMS_64> _map{{DimStride::Any}};
+    DimValues _fixedStrides;
 };
 
 void printTo(std::ostream& os, const StridesRequirement& reqs);
@@ -591,7 +618,7 @@ bool checkStride(
         const DimValues& strides,
         const DataDesc& desc,
         int ind,
-        DimStride req);
+        const StridesRequirement& req);
 bool checkStrides(
         const DataDesc& desc,
         const DimValues& strides,
index 9ce39fb..3e2d03b 100644 (file)
@@ -123,8 +123,8 @@ public:
             const DataVector& outputs);
 
     Stage duplicateStage(
-            const std::string& name,
             const Stage& origStage,
+            const std::string& postfix,
             const DataVector& inputs,
             const DataVector& outputs);
 
@@ -238,13 +238,13 @@ public:
     // Nodes removal
     //
 
-    void disconnectStageDatas(const Stage& stage);
+    void disconnectStage(const Stage& stage);
 
     void removeStage(const Stage& stage);
 
     void removeUnusedData(const Data& data);
 
-    void cleanUpDatas();
+    void cleanUp();
 
     //
     // Stage order
index 44b48b7..683ec4d 100644 (file)
@@ -48,8 +48,9 @@ VPU_DECLARE_ENUM(StageType,
     Concat,
     Split,
     Reshape,
-    Broadcast,
+    Expand,
     Shrink,
+    StridedSlice,
 
     Empty = -1,
 
@@ -136,6 +137,10 @@ VPU_DECLARE_ENUM(StageType,
     ReduceAnd = 93,
     ReverseSequence = 94,
     Gather = 100,
+    Exp = 101,
+    Floor = 102,
+    TopK = 104,
+    ReduceMin = 105,
 )
 
 //
@@ -172,7 +177,7 @@ VPU_DECLARE_ENUM(StageSHAVEsRequirements,
 );
 
 //
-// StageNode
+// ScalePropagationStep
 //
 
 VPU_DECLARE_ENUM(ScalePropagationStep,
@@ -181,6 +186,29 @@ VPU_DECLARE_ENUM(ScalePropagationStep,
     Propagate
 );
 
+//
+// TopKMode
+//
+
+// Firmware implementations must be aligned with these values
+VPU_DECLARE_ENUM(TopKMode,
+    Max = 0,
+    Min = 1)
+
+//
+// TopKSort
+//
+
+// Firmware implementations must be aligned with these values
+VPU_DECLARE_ENUM(TopKSort,
+    None = 0,
+    Value = 1,
+    Index = 2)
+
+//
+// StageDataInfo
+//
+
 template <typename Val>
 class StageDataInfo final {
 public:
@@ -258,6 +286,10 @@ private:
     SmallVector<Optional<Val>> _outputVals;
 };
 
+//
+// StageNode
+//
+
 class StageNode :
         public EnableHandleFromThis<StageNode>,
         public EnableCustomAttributes {
@@ -297,6 +329,8 @@ class StageNode :
     // Edges wrappers
     //
 
+    VPU_MODEL_ATTRIBUTE(Handle<Model>, model, nullptr)
+
 public:
     struct StageNameCmp final {
         inline bool operator()(const Stage& left, const Stage& right) const {
@@ -445,7 +479,9 @@ public:
     // Bindings with IE
     //
 
-    inline std::string origLayerName() const { return _origLayer != nullptr ? _origLayer->name : std::string(); }
+    inline std::string origLayerName() const {
+        return _origLayer != nullptr ? _origLayer->name : std::string();
+    }
 
     //
     // SHAVEs allocation
@@ -463,25 +499,27 @@ public:
             ScalePropagationStep step);
 
     // Data order propagation from inputs to outputs.
-    const StageDataInfo<DimsOrder>& propagateDataOrder() const;
+    const StageDataInfo<DimsOrder>& propagateDataOrder();
 
     // Get Data strides requirements
-    const StageDataInfo<StridesRequirement>& getDataStridesRequirements() const;
+    const StageDataInfo<StridesRequirement>& getDataStridesRequirements();
 
     // Finalize internal parameter to final Data layout.
     void finalizeDataLayout();
 
     // Information about batch support.
-    const StageDataInfo<BatchSupport>& getBatchSupportInfo() const;
+    const StageDataInfo<BatchSupport>& getBatchSupportInfo();
 
     // Resources requirements.
     StageSHAVEsRequirements getSHAVEsRequirements() const;
 
-    // Final check.
+    void initialCheck() const;
     void finalCheck() const;
 
     // Name postfix for modified stage
-    inline void appendNamePostfix(const std::string& postfix) { _name = _name + postfix; }
+    inline void appendNamePostfix(const std::string& postfix) {
+        _name = _name + postfix;
+    }
 
     //
     // Backend utilities
@@ -498,19 +536,21 @@ protected:
 
     virtual void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step);
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo);
 
-    virtual void propagateDataOrderImpl() const = 0;
+    virtual void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) = 0;
 
-    virtual void getDataStridesRequirementsImpl() const = 0;
+    virtual void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) = 0;
 
     virtual void finalizeDataLayoutImpl() = 0;
 
-    virtual void getBatchSupportInfoImpl() const = 0;
+    virtual void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) = 0;
 
     virtual StageSHAVEsRequirements getSHAVEsRequirementsImpl() const;
 
-    virtual void finalCheckImpl() const = 0;
+    virtual void initialCheckImpl() const {}
+    virtual void finalCheckImpl() const {}
 
     virtual void serializeParamsImpl(BlobSerializer& serializer) const = 0;
 
@@ -535,15 +575,12 @@ protected:
             _posInModel(this) {
     }
 
-protected:
-    Handle<Model> _model;
-
-    mutable StageDataInfo<float> _scaleInfo;
-    mutable StageDataInfo<DimsOrder> _orderInfo;
-    mutable StageDataInfo<StridesRequirement> _stridesInfo;
-    mutable StageDataInfo<BatchSupport> _batchInfo;
-
 private:
+    StageDataInfo<float> _scaleInfo;
+    StageDataInfo<DimsOrder> _orderInfo;
+    StageDataInfo<StridesRequirement> _stridesInfo;
+    StageDataInfo<BatchSupport> _batchInfo;
+
     StagePtrList::iterator _ptrPosInModel;
     IntrusivePtrListNode<StageNode> _posInModel;
 
@@ -552,4 +589,12 @@ private:
 
 void printTo(std::ostream& os, const Stage& stage);
 
+void assertAllInputsOutputsTypes(const StageNode* stage,
+                                 const DataType& expectedInputsType,
+                                 const DataType& expectedOutputsType);
+
+void assertInputsOutputsTypes(const StageNode* stage,
+                              const std::vector<EnumSet<DataType>>& expectedInputsTypes,
+                              const std::vector<EnumSet<DataType>>& expectedOutputsTypes);
+
 }  // namespace vpu
index 97e6338..e81fa48 100644 (file)
@@ -12,6 +12,8 @@
 #include <vpu/vpu_plugin_config.hpp>
 #include <vpu/private_plugin_config.hpp>
 
+#include <vpu/parsed_config_base.hpp>
+
 #include <vpu/graph_transformer.hpp>
 #include <vpu/utils/perf_report.hpp>
 #include <vpu/utils/logger.hpp>
 
 namespace vpu {
 
-VPU_DECLARE_ENUM(ConfigMode,
-    DEFAULT_MODE = 0,
-    RUNTIME_MODE = 1,
-    COMPILE_MODE = 2,
-)
-
-struct ParsedConfig {
+struct ParsedConfig : public ParsedConfigBase{
     CompilationConfig compileConfig;
 
     bool printReceiveTensorTime = false;
-    bool exclusiveAsyncRequests = false;
     bool perfCount              = false;
 
-    LogLevel deviceLogLevel = LogLevel::None;
-    LogLevel hostLogLevel = LogLevel::None;
-
     PerfReport perfReport = PerfReport::PerLayer;
 
-    virtual std::map<std::string, std::string> getDefaultConfig() const;
+    std::map<std::string, std::string> getDefaultConfig() const override;
 
-    virtual ~ParsedConfig() = default;
+    ~ParsedConfig() = default;
 
 protected:
-    explicit ParsedConfig(ConfigMode configMode = ConfigMode::DEFAULT_MODE);
-
-    void checkUnknownOptions(const std::map<std::string, std::string> &config) const;
-    virtual void checkInvalidValues(const std::map<std::string, std::string> &config) const;
-    std::unordered_set<std::string> getKnownOptions() const;
+    explicit ParsedConfig(ConfigMode configMode);
 
-    std::map<std::string, std::string> parse(const std::map<std::string, std::string> &config) {
-        checkInvalidValues(config);
-        checkUnknownOptions(config);
-        checkOptionsAccordingToMode(config);
+    void checkInvalidValues(const std::map<std::string, std::string> &config) const override;
 
-        auto defaultConfig = getDefaultConfig();
-        for (auto &&entry : config) {
-            defaultConfig[entry.first] = entry.second;
-        }
+    void configure(const std::map<std::string, std::string> &config) override;
 
-        return defaultConfig;
-    }
-
-    void configure(const std::map<std::string, std::string> &config);
-    void checkSupportedValues(const std::unordered_map<std::string, std::unordered_set<std::string>> &supported,
-                              const std::map<std::string, std::string> &config) const;
-
-    virtual void checkOptionsAccordingToMode(const std::map<std::string, std::string> &config) const;
-    virtual std::unordered_set<std::string> getCompileOptions() const;
-    virtual std::unordered_set<std::string> getRuntimeOptions() const;
+    std::unordered_set<std::string> getKnownOptions() const override;
+    std::unordered_set<std::string> getCompileOptions() const override;
+    std::unordered_set<std::string> getRuntimeOptions() const override;
 
 private:
     ConfigMode _mode = ConfigMode::DEFAULT_MODE;
-    Logger::Ptr _log;
 };
-
-template<typename T, typename V>
-inline void setOption(T &dst, const V &supported, const std::map<std::string, std::string> &config, const std::string &key) {
-    auto value = config.find(key);
-    if (value != config.end()) {
-        dst = supported.at(value->second);
-    }
-}
-
-inline void setOption(std::string &dst, const std::map<std::string, std::string> &config, const std::string &key) {
-    auto value = config.find(key);
-    if (value != config.end()) {
-        dst = value->second;
-    }
-}
-
-template<typename T, typename C>
-inline void setOption(T &dst, const std::map<std::string, std::string> &config, const std::string &key, const C &preprocess) {
-    auto value = config.find(key);
-    if (value != config.end()) {
-        dst = preprocess(value->second);
-    }
-}
-
 }  // namespace vpu
index 6384576..9644e5a 100644 (file)
@@ -140,6 +140,12 @@ public:
     Pass::Ptr mergeEltwiseAndReLU();
 
     //
+    // StridedSlice processing
+    //
+
+    Pass::Ptr stridedSlice();
+
+    //
     // Data layout adjustment
     //
 
@@ -162,6 +168,7 @@ public:
     //
 
     Pass::Ptr eliminateCopyStages();
+    Pass::Ptr removeUnusedStagesOutputs();
 
     //
     // HW/SW injection
@@ -199,6 +206,9 @@ public:
 
     Pass::Ptr reshapeDilationConv();
 
+    Pass::Ptr addCopyForOutputsInsideNetwork();
+
+    Pass::Ptr initialCheck();
 
 protected:
     StageBuilder::Ptr _stageBuilder;
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_conv_tiling/hw_convolution_tiler.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_conv_tiling/hw_convolution_tiler.hpp
new file mode 100644 (file)
index 0000000..a379ca4
--- /dev/null
@@ -0,0 +1,319 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <utility>
+#include <memory>
+#include <list>
+#include <string>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include <unordered_map>
+#include <vpu/model/data_desc.hpp>
+#include <vpu/hw/tiling.hpp>
+#include <vpu/compile_env.hpp>
+#include <vpu/utils/heap.hpp>
+
+namespace vpu {
+
+namespace HWTilingNS {
+
+struct ConvolutionOptions final {
+    const std::string _stageName;
+
+    const DimValues _inputDims;
+    const DimValues _outputDims;
+    const DimValues _origOutputDims;
+
+    const int _kernelSizeX;
+    const int _kernelSizeY;
+    const int _kernelStride;
+    const int _paddingLeft;
+    const int _paddingRight;
+    const int _paddingTop;
+    const int _paddingBottom;
+
+    const bool _withPool;
+
+public:
+    ConvolutionOptions(const std::string &stageName, const DimValues &inputDims, const DimValues &outputDims,
+                       const DimValues &origOutputDims, const int kernelSizeX, const int kernelSizeY,
+                       const int kernelStride, const int paddingLeft, const int paddingRight,
+                       const int paddingTop, const int paddingBottom, const bool withPool)
+            : _stageName(stageName), _inputDims(inputDims), _outputDims(outputDims),
+              _origOutputDims(origOutputDims), _kernelSizeX(kernelSizeX), _kernelSizeY(kernelSizeY),
+              _kernelStride(kernelStride), _paddingLeft(paddingLeft), _paddingRight(paddingRight),
+              _paddingTop(paddingTop), _paddingBottom(paddingBottom), _withPool(withPool) {}
+};
+
+struct TilingOption final {
+    int numWidthTiles;
+    int numHeightTiles;
+    int numChannelTiles;
+    int totalNumTiles;
+    double cost;
+};
+
+bool operator<(const TilingOption &a, const TilingOption &b);
+
+std::ostream &operator<<(std::ostream &o, const TilingOption &to);
+
+enum class Direction {
+    INPUT_TO_OUTPUT = 0, OUTPUT_TO_INPUT = 1
+};
+
+// Tensors can be split going either from input to output or vice versa
+class GraphDataTiling {
+protected:
+    const ConvolutionOptions &_co;
+    // size of every tile for input tensor in each dimension
+    DimValues _inputTileDims;
+    // size of every tile for output tensor in each dimension
+    DimValues _outputTileDims;
+    bool _useCeil = false;
+    const enum Direction _direction;
+
+public:
+    GraphDataTiling() = delete;
+    virtual ~GraphDataTiling() = default;
+    GraphDataTiling(const GraphDataTiling &other): _co(other._co), _inputTileDims(other._inputTileDims),
+       _outputTileDims(other._outputTileDims), _useCeil(other._useCeil), _direction(other._direction) {
+    }
+
+    explicit GraphDataTiling(const ConvolutionOptions &__co, Direction direction) :
+            _co(__co), _direction(direction) {}
+
+    const DimValues &getInputTileDims() const { return _inputTileDims; }
+
+    const DimValues &getOutputTileDims() const { return _outputTileDims; }
+
+    DimValues &getInputTileDims() { return _inputTileDims; }
+
+    DimValues &getOutputTileDims() { return _outputTileDims; }
+
+    void resetInputTileDims(const DimValues &dimVals) { _inputTileDims = dimVals; }
+
+    void resetOutputTileDims(const DimValues &dimVals) { _outputTileDims = dimVals; }
+
+    virtual void initTileSizes() = 0;
+
+    virtual void applyTilingOption(const TilingOption &tilingOption) = 0;
+
+    virtual void setInputNOutputTileDimensions(const int tileDimW, const int tileDimH, const int tileDimC) = 0;
+
+    virtual void correctPlaneSize() = 0;
+
+    virtual const DimValues &splitOverTensorDims() = 0;
+
+    virtual void patternMatching() = 0;
+
+    bool useCeil() const {
+        return _useCeil;
+    }
+
+    Direction getDirection() const {
+        return _direction;
+    }
+
+    const ConvolutionOptions& co() const { return _co; }
+};
+
+class ConvGraphDataTilingFactory final {
+public:
+    static std::unique_ptr<GraphDataTiling> makeDirTiling(const ConvolutionOptions &co, Direction direction);
+    static std::unique_ptr<GraphDataTiling> makeDirTiling(const GraphDataTiling &o);
+};
+
+class HWConvolutionTileLayoutCut;
+
+// iterates over all the tiling options and chooses few with minimal cost
+class HWConvolutionTilingSearcher {
+    const ConvolutionOptions _co;
+    const size_t _maxTilingOptions;
+    const std::unique_ptr<GraphDataTiling> _dirTiling;
+    std::vector<TilingOption> _tilingOptions;
+
+public:
+    HWConvolutionTilingSearcher() = delete;
+    HWConvolutionTilingSearcher(const HWConvolutionTilingSearcher &other): _co(other._co),
+        _maxTilingOptions(other._maxTilingOptions),
+        _dirTiling(ConvGraphDataTilingFactory::makeDirTiling(*other._dirTiling)),
+        _tilingOptions(other._tilingOptions) {
+    }
+
+    HWConvolutionTilingSearcher(const ConvolutionOptions &co,
+                                Direction direction,
+                                size_t maxTilingOptions) : _co(co),
+       _dirTiling(ConvGraphDataTilingFactory::makeDirTiling(_co, direction)),
+       _maxTilingOptions(maxTilingOptions) {
+        IE_ASSERT(maxTilingOptions > 0);
+        _dirTiling->initTileSizes();
+        _tilingOptions = selectBetterTiling();
+    }
+
+    const std::vector<TilingOption> &tilingOptions() const {
+        return _tilingOptions;
+    }
+
+    size_t tilingOptionsCount() const {
+        return _tilingOptions.size();
+    }
+
+    const ConvolutionOptions& co() const { return _co; }
+
+    HWConvolutionTileLayoutCut tileLayoutCut(const TilingOption &option) const;
+
+private:
+    std::vector<TilingOption> selectBetterTiling() const;
+};
+
+// Search for tiling options and applies them to prepare hw tilings
+class HWConvolutionTiler final {
+private:
+    const ConvolutionOptions _co;
+    std::vector<HwConvTilingPtr> _hwTilings;
+    bool _tilingPossible;
+    const HWConvolutionTilingSearcher _searcher;
+
+public:
+    HWConvolutionTiler() = delete;
+
+    HWConvolutionTiler(const HWConvolutionTiler &other): _co(other._co), _hwTilings(other._hwTilings),
+        _searcher(other._searcher), _tilingPossible(other._tilingPossible) {
+    }
+
+    explicit HWConvolutionTiler(const ConvolutionOptions &co,
+                                Direction direction,
+                                size_t maxTilingOptions);
+
+
+    bool isTilingPossible() const {
+        return _tilingPossible;
+    }
+
+    bool withPool() const {
+        return _co._withPool;
+    }
+
+    const std::vector<HwConvTilingPtr> &getHwTilings() const {
+        return _hwTilings;
+    }
+
+private:
+    bool tileForHW();
+};
+
+SmallVector<HwPlaneTileInfo> calcHeightTiles(const ConvolutionOptions &_co,
+                                             const DimValues &outputTileDims, bool useCeil);
+SmallVector<HwPlaneTileInfo> calcWidthTiles(const ConvolutionOptions &_co,
+                                            const DimValues &outputTileDims, bool useCeil);
+
+// Based on chosen { inputTileDims, outputTileDims } constructs plane's tiling structure;
+// (same for both input and output, contains only number of tiles in each dimension)
+class HWConvolutionTileLayoutCut {
+private:
+    const ConvolutionOptions &_co;
+    GraphDataTiling &_dirTiling;
+    HwConvTilingPtr _hwTiling;
+    bool _tileCutPossible;
+
+public:
+    HWConvolutionTileLayoutCut() = delete;
+    HWConvolutionTileLayoutCut(const HWConvolutionTileLayoutCut &other): _co(other._co), _dirTiling(other._dirTiling),
+        _hwTiling(other._hwTiling), _tileCutPossible(other._tileCutPossible) {
+    }
+
+    HWConvolutionTileLayoutCut(HWConvolutionTileLayoutCut &&other): _co(other._co), _dirTiling(other._dirTiling) {
+        _hwTiling = std::move(other._hwTiling);
+        _tileCutPossible = other.tileCutPossible();
+    }
+    HWConvolutionTileLayoutCut(GraphDataTiling &dirTiling, const TilingOption &tilingOption) :
+            _dirTiling(dirTiling),
+            _co(dirTiling.co()), _hwTiling(std::make_shared<HwConvTiling>()) {
+        dirTiling.applyTilingOption(tilingOption);
+
+        dirTiling.patternMatching();
+
+        // Merged Pooling and SoC can't be used together.
+        if (_co._withPool) {
+            IE_ASSERT(!hasSoC(dirTiling));
+        }
+
+        _tileCutPossible = createTiles(calcHeightTiles(_co, dirTiling.getOutputTileDims(), dirTiling.useCeil()),
+                                      calcWidthTiles(_co, dirTiling.getOutputTileDims(), dirTiling.useCeil()),
+                                      dirTiling.getInputTileDims(), dirTiling.getOutputTileDims());
+    }
+
+    bool tileCutPossible() const { return _tileCutPossible; }
+
+    HwConvTilingPtr hwTiling() const {
+        IE_ASSERT(_tileCutPossible);
+        return _hwTiling;
+    }
+
+private:
+    bool createTiles(const SmallVector<HwPlaneTileInfo> &heightTiles,
+                     const SmallVector<HwPlaneTileInfo> &widthTiles,
+                     const DimValues &inputTileDims, const DimValues &outputTileDims) const {
+        IE_ASSERT(!heightTiles.empty());
+        IE_ASSERT(!widthTiles.empty());
+
+        _hwTiling->sohTiles = heightTiles.size();
+        _hwTiling->sowTiles = widthTiles.size();
+        _hwTiling->socTiles = divUp(_co._inputDims[Dim::C], inputTileDims[Dim::C]);
+
+        for (int sohInd = 0; sohInd < _hwTiling->sohTiles; ++sohInd) {
+            const auto &heightTileInfo = heightTiles[sohInd];
+
+            for (int sowInd = 0; sowInd < _hwTiling->sowTiles; ++sowInd) {
+                const auto &widthTileInfo = widthTiles[sowInd];
+
+                auto planeTile = std::make_shared<HwConvPlaneTile>();
+                planeTile->parent = _hwTiling;
+
+                planeTile->sohInd = sohInd;
+                planeTile->sowInd = sowInd;
+
+                planeTile->heightInfo = heightTileInfo;
+                planeTile->widthInfo = widthTileInfo;
+
+                for (int socInd = 0; socInd < _hwTiling->socTiles; ++socInd) {
+                    auto channelTile = std::make_shared<HwConvChannelTile>();
+                    channelTile->parent = planeTile;
+
+                    channelTile->socInd = socInd;
+
+                    channelTile->finalTiles = splitHwConvIntoOutChannelsTiles(
+                            widthTileInfo.inputWithJunk, heightTileInfo.inputWithJunk, inputTileDims[Dim::C],
+                            outputTileDims[Dim::C],
+                            _co._kernelSizeX, _co._kernelSizeY, _co._kernelStride);
+
+                    if (channelTile->finalTiles.numDescr == 0) {
+                        return false;
+                    }
+
+                    channelTile->extendedInputDimC = channelTile->finalTiles.extendedInputDimC;
+                    channelTile->extendedOutputDimC = channelTile->finalTiles.extendedOutputDimC;
+
+                    channelTile->channelStartIndex = socInd * inputTileDims[Dim::C];
+                    channelTile->numInputChannels = inputTileDims[Dim::C];
+
+                    planeTile->channelTiles.emplace_back(channelTile);
+                }
+
+                _hwTiling->planeTiles.emplace_back(planeTile);
+            }
+        }
+        return true;
+    }
+
+    bool hasSoC(const GraphDataTiling &dirTile) const {
+        return dirTile.getInputTileDims()[Dim::C] != _co._inputDims[Dim::C];
+    }
+};
+}  // namespace HWTilingNS
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_conv_tiling/hw_stage_tiler.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_conv_tiling/hw_stage_tiler.hpp
new file mode 100644 (file)
index 0000000..3804e8a
--- /dev/null
@@ -0,0 +1,126 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <vpu/model/base.hpp>
+#include <vpu/frontend/stage_builder.hpp>
+#include <vpu/hw/tiling.hpp>
+
+namespace vpu {
+
+struct HWConvStageOptions;
+struct HWConvStageIO;
+
+// Builds graph which composes tiled analogue of the single stage 'origStage'
+class HWConvStageTiler {
+private:
+    HWConvStageTiler() = delete;
+    HWConvStageTiler(const HWConvStageTiler&) = delete;
+
+public:
+    DataVector hwInputTiles;
+    std::vector<DimValues> hwInputTilesOffsets;
+
+    DataVector hwOutputTiles;
+    std::vector<DimValues> hwOutputTilesOffsets;
+
+    Data hwInput;
+    Data hwOutput;
+
+    HWConvStageTiler(const HWConvStageOptions &so, const HWConvStageIO &sio,
+                 const Model::Ptr &model, const Handle <StageNode> &origStage,
+                 const StageBuilder::Ptr &stageBuilder, const HwConvTilingPtr &tiling,
+                 const bool makeExplicitPoolStage);
+};
+
+struct HWConvStageIO {
+private:
+    HWConvStageIO() = delete;
+    HWConvStageIO(const HWConvStageIO&) = delete;
+
+public:
+    Data origInput;
+    Data origWeights;
+    Data origBiases;
+    Data origOutput;
+    DataDesc origOutputDesc;
+
+    explicit HWConvStageIO(const Handle<StageNode> &origStage, const Data &originOutput) {
+        origInput = origStage->input(0);
+        origWeights = origStage->input(1);
+        origBiases = origStage->input(2);
+        origOutput = originOutput;
+        origOutputDesc = origStage->attrs().getOrDefault<DataDesc>("origConvOutput", origOutput->desc());
+    }
+};
+
+// Attributes of the stage collected into the structure
+struct HWConvStageOptions {
+private:
+    HWConvStageOptions() = delete;
+    HWConvStageOptions(const HWConvStageOptions&) = delete;
+
+public:
+    int kernelSizeX;
+    int kernelSizeY;
+    int kernelStride;
+    int padLeft;
+    int padRight;
+    int padTop;
+    int padBottom;
+
+    bool withReLU;
+    float negativeSlope;
+    uint32_t a0;
+    uint32_t a1;
+    float reluScale;
+
+    bool withClamp;
+    float clampMax;
+
+    bool withPool;
+    int poolKernelSizeX;
+    int poolKernelSizeY;
+    int poolKernelStride;
+    int poolPadLeft;
+    int poolPadRight;
+    int poolPadTop;
+    int poolPadBottom;
+
+    float scaleFactor;
+
+    explicit HWConvStageOptions(const Handle<StageNode> &origStage) {
+        kernelSizeX = origStage->attrs().get<int>("kernelSizeX");
+        kernelSizeY = origStage->attrs().get<int>("kernelSizeY");
+        kernelStride = origStage->attrs().get<int>("kernelStrideX");
+        padLeft = origStage->attrs().get<int>("padLeft");
+        padRight = origStage->attrs().get<int>("padRight");
+        padTop = origStage->attrs().get<int>("padTop");
+        padBottom = origStage->attrs().get<int>("padBottom");
+
+        withReLU = origStage->attrs().getOrDefault<bool>("withReLU", false);
+        negativeSlope = origStage->attrs().getOrDefault<float>("negativeSlope", 0.0f);
+        a0 = origStage->attrs().getOrDefault<uint32_t>("a0", 0);
+        a1 = origStage->attrs().getOrDefault<uint32_t>("a1", 0);
+        reluScale = origStage->attrs().getOrDefault<float>("reluScale", 1.0f);
+
+        withClamp = origStage->attrs().getOrDefault<bool>("withClamp", false);
+        clampMax = origStage->attrs().getOrDefault<float>("clampMax", 6.0);
+
+        withPool = origStage->attrs().getOrDefault<bool>("withPool", false);
+        poolKernelSizeX = origStage->attrs().getOrDefault<int>("poolKernelSizeX", 0);
+        poolKernelSizeY = origStage->attrs().getOrDefault<int>("poolKernelSizeY", 0);
+        poolKernelStride = origStage->attrs().getOrDefault<int>("poolKernelStride", 0);
+        poolPadLeft = origStage->attrs().getOrDefault<int>("poolPadLeft", 0);
+        poolPadRight = origStage->attrs().getOrDefault<int>("poolPadRight", 0);
+        poolPadTop = origStage->attrs().getOrDefault<int>("poolPadTop", 0);
+        poolPadBottom = origStage->attrs().getOrDefault<int>("poolPadBottom", 0);
+
+        scaleFactor = origStage->attrs().getOrDefault<float>("scaleFactor", 1.0f);
+    }
+};
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_pooling_tiling/hw_pooling_tiler.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_pooling_tiling/hw_pooling_tiler.hpp
new file mode 100644 (file)
index 0000000..b1266ce
--- /dev/null
@@ -0,0 +1,209 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <utility>
+#include <memory>
+#include <list>
+#include <string>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include <unordered_map>
+#include <vpu/model/data_desc.hpp>
+#include <vpu/hw/tiling.hpp>
+#include <vpu/compile_env.hpp>
+#include <vpu/utils/heap.hpp>
+#include <vpu/passes/hw_conv_tiling/hw_convolution_tiler.hpp>
+
+namespace vpu {
+
+namespace HWTilingNS {
+
+using HWTilingNS::GraphDataTiling;
+using HWTilingNS::ConvolutionOptions;
+using HWTilingNS::Direction;
+using HWTilingNS::TilingOption;
+
+const int CHANS_PER_DESCR = 16;
+
+HwPoolTileInfo splitPooling(int outZ);
+
+class PoolGraphDataTilingFactory final {
+public:
+    static std::unique_ptr<GraphDataTiling> makeDirTiling(const ConvolutionOptions &co, Direction direction);
+    static std::unique_ptr<GraphDataTiling> makeDirTiling(const GraphDataTiling &o);
+};
+
+class HWPoolingTileLayoutCut;
+
+// iterates over all the tiling options and chooses few with minimal cost
+class HWPoolingTilingSearcher {
+    const ConvolutionOptions _co;
+    const size_t _maxTilingOptions;
+    const std::unique_ptr<GraphDataTiling> _dirTiling;
+    std::vector<TilingOption> _tilingOptions;
+
+public:
+    HWPoolingTilingSearcher() = delete;
+    HWPoolingTilingSearcher(const HWPoolingTilingSearcher &other): _co(other._co),
+       _maxTilingOptions(other._maxTilingOptions),
+       _dirTiling(PoolGraphDataTilingFactory::makeDirTiling(*other._dirTiling)),
+       _tilingOptions(other._tilingOptions) {
+    }
+
+    HWPoolingTilingSearcher(const ConvolutionOptions &co,
+                                Direction direction,
+                                size_t maxTilingOptions) : _co(co),
+                           _dirTiling(PoolGraphDataTilingFactory::makeDirTiling(_co, direction)),
+                           _maxTilingOptions(maxTilingOptions) {
+        IE_ASSERT(maxTilingOptions > 0);
+        _dirTiling->initTileSizes();
+        _tilingOptions = selectBetterTiling();
+    }
+
+    const std::vector<TilingOption> &tilingOptions() const {
+        return _tilingOptions;
+    }
+
+    size_t tilingOptionsCount() const {
+        return _tilingOptions.size();
+    }
+
+    const ConvolutionOptions& co() const { return _co; }
+
+    const HWPoolingTileLayoutCut tileLayoutCut(const TilingOption &option) const;
+
+private:
+    std::vector<TilingOption> selectBetterTiling() const;
+};
+
+// Search for tiling options and applies them to prepare hw tilings
+class HWPoolingTiler final {
+private:
+    const ConvolutionOptions _co;
+    std::vector<HwPoolTilingPtr> _hwTilings;
+    bool _tilingPossible;
+    const HWPoolingTilingSearcher _searcher;
+
+public:
+    HWPoolingTiler() = delete;
+
+    HWPoolingTiler(const HWPoolingTiler &other): _co(other._co), _hwTilings(other._hwTilings),
+                                                         _searcher(other._searcher), _tilingPossible(other._tilingPossible) {
+    }
+
+    explicit HWPoolingTiler(const ConvolutionOptions &co,
+                                Direction direction,
+                                size_t maxTilingOptions);
+
+    bool isTilingPossible() const {
+        return _tilingPossible;
+    }
+
+    const std::vector<HwPoolTilingPtr> &getHwTilings() const {
+        return _hwTilings;
+    }
+
+private:
+    bool tileForHW();
+};
+
+SmallVector<HwPlaneTileInfo> calcHeightTilesP(const ConvolutionOptions &_co,
+                                              const DimValues &outputTileDims, bool useCeil);
+SmallVector<HwPlaneTileInfo> calcWidthTilesP(const ConvolutionOptions &_co,
+                                             const DimValues &outputTileDims, bool useCeil);
+
+// Based on chosen { inputTileDims, outputTileDims } constructs plane's tiling structure;
+// (same for both input and output, contains only number of tiles in each dimension)
+class HWPoolingTileLayoutCut {
+private:
+    const ConvolutionOptions &_co;
+    GraphDataTiling &_dirTiling;
+    HwPoolTilingPtr _hwTiling;
+    bool _tileCutPossible;
+
+public:
+    HWPoolingTileLayoutCut() = delete;
+    HWPoolingTileLayoutCut(const HWPoolingTileLayoutCut &other): _co(other._co), _dirTiling(other._dirTiling),
+                                               _hwTiling(other._hwTiling), _tileCutPossible(other._tileCutPossible) {
+    }
+
+    HWPoolingTileLayoutCut(HWPoolingTileLayoutCut &&other): _co(other._co), _dirTiling(other._dirTiling) {
+        _hwTiling = std::move(other._hwTiling);
+        _tileCutPossible = other.tileCutPossible();
+    }
+    HWPoolingTileLayoutCut(GraphDataTiling &dirTiling, const TilingOption &tilingOption) :
+            _dirTiling(dirTiling),
+            _co(dirTiling.co()), _hwTiling(std::make_shared<HwPoolTiling>()) {
+        dirTiling.applyTilingOption(tilingOption);
+
+        _tileCutPossible = createTiles(calcHeightTilesP(_co, dirTiling.getOutputTileDims(), dirTiling.useCeil()),
+                                       calcWidthTilesP(_co, dirTiling.getOutputTileDims(), dirTiling.useCeil()),
+                                       dirTiling.getInputTileDims(), dirTiling.getOutputTileDims());
+    }
+
+    bool tileCutPossible() const { return _tileCutPossible; }
+
+    HwPoolTilingPtr hwTiling() const {
+        IE_ASSERT(_tileCutPossible);
+        return _hwTiling;
+    }
+
+private:
+    bool createTiles(const SmallVector<HwPlaneTileInfo> &heightTiles,
+                     const SmallVector<HwPlaneTileInfo> &widthTiles,
+                     const DimValues &inputTileDims, const DimValues &outputTileDims) const {
+        IE_ASSERT(!heightTiles.empty());
+        IE_ASSERT(!widthTiles.empty());
+
+        _hwTiling->sohTiles = heightTiles.size();
+        _hwTiling->sowTiles = widthTiles.size();
+        _hwTiling->socTiles = divUp(_co._inputDims.get(Dim::N, 1), inputTileDims[Dim::N]);
+
+        for (int sohInd = 0; sohInd < _hwTiling->sohTiles; ++sohInd) {
+            const auto& heightTileInfo = heightTiles[sohInd];
+
+            for (int sowInd = 0; sowInd < _hwTiling->sowTiles; ++sowInd) {
+                const auto& widthTileInfo = widthTiles[sowInd];
+
+                auto planeTile = std::make_shared<HwPoolPlaneTile>();
+                planeTile->parent = _hwTiling;
+
+                planeTile->sohInd = sohInd;
+                planeTile->sowInd = sowInd;
+
+                planeTile->heightInfo = heightTileInfo;
+                planeTile->widthInfo = widthTileInfo;
+
+                for (int socInd = 0; socInd < _hwTiling->socTiles; ++socInd) {
+                    auto channelTile = std::make_shared<HwPoolChannelTile>();
+                    channelTile->parent = planeTile;
+
+                    channelTile->socInd = socInd;
+
+                    channelTile->finalTiles = splitPooling(inputTileDims[Dim::C] * inputTileDims[Dim::N]);
+
+                    if (channelTile->finalTiles.numDescr == 0) {
+                        return false;
+                    }
+
+                    channelTile->channelStartIndex = socInd * inputTileDims[Dim::N];
+                    channelTile->numInputChannels = inputTileDims[Dim::N];
+
+                    planeTile->channelTiles.emplace_back(channelTile);
+                }
+
+                _hwTiling->planeTiles.emplace_back(planeTile);
+            }
+        }
+
+        return true;
+    }
+};
+
+}  // namespace HWTilingNS
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_pooling_tiling/hw_stage_tiler.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/passes/hw_pooling_tiling/hw_stage_tiler.hpp
new file mode 100644 (file)
index 0000000..bff3178
--- /dev/null
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <vpu/model/base.hpp>
+#include <vpu/frontend/stage_builder.hpp>
+#include <vpu/hw/tiling.hpp>
+
+namespace vpu {
+
+struct HWPoolStageOptions;
+struct HWPoolStageIO;
+
+// Builds graph which composes tiled analogue of the single stage 'origStage'
+class HWPoolStageTiler {
+private:
+    HWPoolStageTiler() = delete;
+    HWPoolStageTiler(const HWPoolStageTiler&) = delete;
+
+public:
+    DataVector hwInputTiles;
+    std::vector<DimValues> hwInputTilesOffsets;
+
+    DataVector hwOutputTiles;
+    std::vector<DimValues> hwOutputTilesOffsets;
+
+    Data hwInput;
+    Data hwOutput;
+
+    HWPoolStageTiler(const HWPoolStageOptions &so, const HWPoolStageIO &sio,
+                 const Model::Ptr &model, const Handle <StageNode> &origStage,
+                 const StageBuilder::Ptr &stageBuilder, const HwPoolTilingPtr &tiling);
+};
+
+struct HWPoolStageIO {
+private:
+    HWPoolStageIO() = delete;
+    HWPoolStageIO(const HWPoolStageIO&) = delete;
+
+public:
+    Data origInput;
+    Data origOutput;
+
+    explicit HWPoolStageIO(const Handle<StageNode> &origStage, const Data &originOutput) {
+        origInput = origStage->input(0);
+        origOutput = originOutput;
+    }
+};
+
+// Attributes of the stage collected into the structure
+struct HWPoolStageOptions {
+private:
+    HWPoolStageOptions() = delete;
+    HWPoolStageOptions(const HWPoolStageOptions&) = delete;
+
+public:
+    int kernelSizeX;
+    int kernelSizeY;
+    int kernelStride;
+    int padLeft;
+    int padRight;
+    int padTop;
+    int padBottom;
+
+    bool withReLU;
+
+    explicit HWPoolStageOptions(const Handle<StageNode> &origStage) {
+        kernelSizeX = origStage->attrs().get<int>("kernelSizeX");
+        kernelSizeY = origStage->attrs().get<int>("kernelSizeY");
+        kernelStride = origStage->attrs().get<int>("kernelStrideX");
+        padLeft = origStage->attrs().get<int>("padLeft");
+        padRight = origStage->attrs().get<int>("padRight");
+        padTop = origStage->attrs().get<int>("padTop");
+        padBottom = origStage->attrs().get<int>("padBottom");
+
+        withReLU = origStage->attrs().getOrDefault<bool>("withReLU", false);
+    }
+};
+
+}  // namespace vpu
index 7cc5dfb..9f2670e 100644 (file)
@@ -19,6 +19,7 @@ namespace VPUConfigParams {
 
 DECLARE_VPU_CONFIG_KEY(NUMBER_OF_SHAVES);
 DECLARE_VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES);
+DECLARE_VPU_CONFIG_KEY(TENSOR_STRIDES);
 
 DECLARE_VPU_CONFIG_KEY(HW_ADAPTIVE_MODE);
 
@@ -39,8 +40,6 @@ DECLARE_VPU_CONFIG_KEY(HW_DILATION);
 
 DECLARE_VPU_CONFIG_KEY(DETECT_NETWORK_BATCH);
 
-DECLARE_VPU_CONFIG_KEY(ALLOW_FP32_MODELS);
-
 DECLARE_VPU_CONFIG_KEY(HW_WHITE_LIST);
 DECLARE_VPU_CONFIG_KEY(HW_BLACK_LIST);
 
@@ -52,6 +51,16 @@ DECLARE_VPU_CONFIG_KEY(IGNORE_UNKNOWN_LAYERS);
 // Myriad plugin options
 //
 
+// Power Manager
+
+DECLARE_VPU_MYRIAD_CONFIG_KEY(POWER_MANAGEMENT);
+
+DECLARE_VPU_MYRIAD_CONFIG_VALUE(POWER_FULL);
+DECLARE_VPU_MYRIAD_CONFIG_VALUE(POWER_INFER);
+DECLARE_VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE);
+DECLARE_VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE_SHAVES);
+DECLARE_VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE_NCES);
+
 DECLARE_VPU_MYRIAD_CONFIG_KEY(WATCHDOG);
 INFERENCE_ENGINE_DEPRECATED
 DECLARE_VPU_CONFIG_KEY(WATCHDOG);
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/special_stage_processor.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/special_stage_processor.hpp
new file mode 100644 (file)
index 0000000..30a445f
--- /dev/null
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include <vpu/model/stage.hpp>
+#include <vpu/model/model.hpp>
+#include <vpu/frontend/stage_builder.hpp>
+
+namespace vpu {
+
+class SpecialStageProcessor final {
+public:
+    inline explicit SpecialStageProcessor(const StageBuilder::Ptr& stageBuilder) :
+            _stageBuilder(stageBuilder) {
+    }
+
+    void processSplit(
+            const Model::Ptr& model,
+            const Stage& stage);
+
+    void processConcat(
+            const Model::Ptr& model,
+            const Stage& stage);
+
+    void processReshape(
+            const Model::Ptr& model,
+            const Stage& stage);
+
+    void processExpand(
+            const Model::Ptr& model,
+            const Stage& stage);
+
+    void processShrink(
+            const Model::Ptr& model,
+            const Stage& stage);
+
+private:
+    StageBuilder::Ptr _stageBuilder;
+};
+
+}  // namespace vpu
index 72264c4..383cdb6 100644 (file)
@@ -16,15 +16,18 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override;
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override;
 
-    void propagateDataOrderImpl() const override;
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override;
 
-    void getDataStridesRequirementsImpl() const override;
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override;
 
     void finalizeDataLayoutImpl() override;
 
-    void getBatchSupportInfoImpl() const override;
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override;
+
+    void initialCheckImpl() const override;
 
     void finalCheckImpl() const override;
 
index 27df041..77ec017 100644 (file)
@@ -10,17 +10,17 @@ namespace vpu {
 
 class PostOpStage : public StageNode {
 protected:
-    void propagateDataOrderImpl() const override;
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override;
 
-    void getDataStridesRequirementsImpl() const override;
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override;
 
     void finalizeDataLayoutImpl() override;
 
-    void getBatchSupportInfoImpl() const override;
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override;
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override;
 
-    void finalCheckImpl() const override;
+    void initialCheckImpl() const override;
 
     void serializeDataImpl(BlobSerializer& serializer) const override;
 };
index 8f07987..bc30e8d 100644 (file)
@@ -124,7 +124,6 @@ bool Allocator::allocateData(const Data& data) {
     if (data->usage() == DataUsage::Input) {
         if (_allocatedData.count(data) == 0) {
             IE_ASSERT(data->parentDataEdge() == nullptr);
-            IE_ASSERT(data->checkStrides(StridesRequirement::compact()));
 
             auto finalByteSize = alignVal(data->totalByteSize() * _modelBatchSize, DATA_ALIGNMENT);
 
@@ -146,7 +145,6 @@ bool Allocator::allocateData(const Data& data) {
     if (data->usage() == DataUsage::Output) {
         if (_allocatedData.count(data) == 0) {
             IE_ASSERT(data->parentDataEdge() == nullptr);
-            IE_ASSERT(data->checkStrides(StridesRequirement::compact()));
 
             int finalByteSize = 0;
             if (data->attrs().getOrDefault<bool>("unbatched", false)) {
index 3cc1f29..307a358 100644 (file)
@@ -59,7 +59,7 @@ CompiledGraph::Ptr BackEnd::build(
     extractDataInfo(model, compiledGraph->inputInfo, compiledGraph->outputInfo);
 
     serialize(model, compiledGraph->blob, compiledGraph->blobHeader, compiledGraph->numActiveStages);
-    getMetaData(model, allLayers, compiledGraph->stagesMeta);
+    getMetaData(model, allLayers, compiledGraph->graphMeta);
 
     return compiledGraph;
 }
index 8f251b4..2b3dc5e 100644 (file)
@@ -42,51 +42,111 @@ namespace vpu {
 void BackEnd::getMetaData(
         const Model::Ptr& model,
         const std::vector<ie::CNNLayerPtr>& allLayers,
-        std::vector<StageMetaInfo>& metaData) {
+        GraphMetaInfo& graphMeta) {
     VPU_PROFILE(getMetaData);
 
-    metaData.clear();
-    metaData.reserve(3 * model->numStages() / 2 + 1);
+    std::vector<StageMetaInfo> stagesMeta;
+    std::vector<DataMetaInfo> datasMeta;
 
     std::unordered_set<ie::CNNLayerPtr> visitedLayers;
+    int execOrder{};
+    StageMap<size_t> stageToMetaIndex;
 
-    auto getStageMeta = [&visitedLayers](const Stage& stage) -> StageMetaInfo {
-        StageMetaInfo meta;
+    stagesMeta.reserve(3 * model->numStages() / 2 + 1);
+    datasMeta.reserve(3 * model->numDatas() / 2 + 1);
 
-        meta.stageName = stage->name();
-        meta.stageType = toString(stage->type());
+    graphMeta.graphName = model->name();
+
+    auto getStageMeta = [&](const Stage& stage) -> StageMetaInfo {
+        StageMetaInfo stageMeta;
+
+        stageMeta.displayStageName = stageMeta.stageName = stage->name();
+        stageMeta.stageType = toString(stage->type());
+
+        if (stage->category() != StageCategory::Special) {
+            stageMeta.execOrder = execOrder++;
+        } else {
+            stageMeta.execOrder = -1;
+        }
 
         if (stage->numInjectedStages() > 0) {
-            meta.stageName += " + injected[";
-            meta.stageType += " + injected[";
+            stageMeta.displayStageName += " + injected[";
+            stageMeta.stageType += " + injected[";
 
             int ind = 0;
             for (const auto& injectedStageEdge : stage->injectedStageEdges()) {
                 if (ind != 0) {
-                    meta.stageName += ", ";
-                    meta.stageType += ", ";
+                    stageMeta.displayStageName += ", ";
+                    stageMeta.stageType += ", ";
                 }
 
-                meta.stageName += injectedStageEdge->child()->name();
-                meta.stageType += toString(injectedStageEdge->child()->type());
+                stageMeta.displayStageName += injectedStageEdge->child()->name();
+                stageMeta.stageType += toString(injectedStageEdge->child()->type());
 
                 ++ind;
             }
 
-            meta.stageName += "]";
-            meta.stageType += "]";
+            stageMeta.displayStageName += "]";
+            stageMeta.stageType += "]";
         }
 
         if (stage->origLayer() == nullptr) {
-            meta.layerName = "<Extra>";
-            meta.layerType = "<Extra>";
+            stageMeta.layerName = "<Extra>";
+            stageMeta.layerType = "<Extra>";
         } else {
-            meta.layerName = stage->origLayer()->name;
-            meta.layerType = stage->origLayer()->type;
+            stageMeta.layerName = stage->origLayer()->name;
+            stageMeta.layerType = stage->origLayer()->type;
             visitedLayers.insert(stage->origLayer());
         }
 
-        return meta;
+        return stageMeta;
+    };
+
+    auto getDataMeta = [&](const Data& data) -> DataMetaInfo {
+        DataMetaInfo dataMeta;
+
+        dataMeta.name = data->name();
+        dataMeta.desc = data->desc().toTensorDesc();
+
+        if (data->usage() == DataUsage::Input) {
+            // Create fake input layer
+            StageMetaInfo inputInfo;
+
+            inputInfo.layerType = "Input";
+            inputInfo.layerName = inputInfo.stageName = inputInfo.displayStageName = data->name();
+            inputInfo.stageType = "NONE";
+            inputInfo.outPrecisions.push_back(dataMeta.desc.getPrecision());
+            inputInfo.outLayouts.push_back(dataMeta.desc.getLayout());
+            stagesMeta.push_back(std::move(inputInfo));
+
+            dataMeta.parentIndex = stagesMeta.size() - 1;
+        }  else {
+            auto it = stageToMetaIndex.find(data->producer());
+
+            if (it != stageToMetaIndex.end()) {
+                StageMetaInfo& meta = stagesMeta[it->second];
+
+                meta.outPrecisions.push_back(dataMeta.desc.getPrecision());
+                meta.outLayouts.push_back(dataMeta.desc.getLayout());
+
+                dataMeta.parentIndex = it->second;
+            }
+        }
+
+        if (data->usage() != DataUsage::Output) {
+            for (const auto &child : data->consumers()) {
+                auto it = stageToMetaIndex.find(child);
+
+                if (it != stageToMetaIndex.end()) {
+                    StageMetaInfo& meta = stagesMeta[it->second];
+
+                    meta.inputsNum++;
+                    dataMeta.childrenIndices.push_back(it->second);
+                }
+            }
+        }
+
+        return dataMeta;
     };
 
     //
@@ -98,9 +158,11 @@ void BackEnd::getMetaData(
             continue;
         }
 
-        auto meta = getStageMeta(stage);
-        meta.status = ie::InferenceEngineProfileInfo::EXECUTED;
-        metaData.emplace_back(std::move(meta));
+        auto stageMeta = getStageMeta(stage);
+
+        stageMeta.status = ie::InferenceEngineProfileInfo::EXECUTED;
+        stagesMeta.emplace_back(std::move(stageMeta));
+        stageToMetaIndex[stage] = stagesMeta.size() - 1;
     }
 
     //
@@ -109,12 +171,12 @@ void BackEnd::getMetaData(
 
     // TODO : support config to disable timings and not to add this meta if it is not required by user
     StageMetaInfo receiveTensorMeta;
-    receiveTensorMeta.stageName = "<Receive-Tensor>";
+    receiveTensorMeta.displayStageName = receiveTensorMeta.stageName = "<Receive-Tensor>";
     receiveTensorMeta.stageType = "<Receive-Tensor>";
     receiveTensorMeta.layerName = "<Receive-Tensor>";
     receiveTensorMeta.layerType = "<Receive-Tensor>";
     receiveTensorMeta.status = ie::InferenceEngineProfileInfo::EXECUTED;
-    metaData.emplace_back(std::move(receiveTensorMeta));
+    stagesMeta.emplace_back(std::move(receiveTensorMeta));
 
     //
     // Add special stages
@@ -125,9 +187,10 @@ void BackEnd::getMetaData(
             continue;
         }
 
-        auto meta = getStageMeta(stage);
-        meta.status = ie::InferenceEngineProfileInfo::OPTIMIZED_OUT;
-        metaData.emplace_back(std::move(meta));
+        auto stageMeta = getStageMeta(stage);
+        stageMeta.status = ie::InferenceEngineProfileInfo::NOT_RUN;
+        stagesMeta.emplace_back(std::move(stageMeta));
+        stageToMetaIndex[stage] = stagesMeta.size() - 1;
     }
 
     //
@@ -139,14 +202,32 @@ void BackEnd::getMetaData(
             continue;
         }
 
-        StageMetaInfo meta;
-        meta.stageName = "<none>";
-        meta.stageType = "<none>";
-        meta.layerName = layer->name;
-        meta.layerType = layer->type;
-        meta.status = ie::InferenceEngineProfileInfo::LayerStatus::OPTIMIZED_OUT;
-        metaData.emplace_back(std::move(meta));
+        StageMetaInfo stageMeta;
+        stageMeta.stageName = "<none>";
+        stageMeta.stageType = "<none>";
+        stageMeta.layerName = layer->name;
+        stageMeta.layerType = layer->type;
+        stageMeta.status = ie::InferenceEngineProfileInfo::LayerStatus::OPTIMIZED_OUT;
+        stagesMeta.emplace_back(std::move(stageMeta));
     }
+
+    //
+    // Add data info
+    //
+
+    for (const auto& data : model->datas()) {
+        if (data->usage() != DataUsage::Input &&
+            data->usage() != DataUsage::Intermediate &&
+            data->usage() != DataUsage::Output) {
+            continue;
+        }
+
+        auto dataMeta = getDataMeta(data);
+        datasMeta.emplace_back(std::move(dataMeta));
+    }
+
+    graphMeta.stagesMeta = std::move(stagesMeta);
+    graphMeta.datasMeta = std::move(datasMeta);
 }
 
 }  // namespace vpu
index 3ed7516..1179077 100644 (file)
@@ -29,58 +29,6 @@ T readFromBlob(const std::vector<char>& blob, uint32_t& offset) {
     return *reinterpret_cast<const T*>(srcPtr);
 }
 
-ie::Precision vpuDataTypeToIE(DataType dataType) {
-    auto iePrecision = ie::Precision::UNSPECIFIED;
-
-    switch (dataType) {
-    case DataType::U8:
-        iePrecision = ie::Precision::U8;
-        break;
-    case DataType::FP16:
-        iePrecision = ie::Precision::FP16;
-        break;
-    case DataType::FP32:
-        iePrecision = ie::Precision::FP32;
-        break;
-    default:
-        VPU_THROW_EXCEPTION << "BlobReader error: unsupported dataType " << dataType;
-    }
-
-    return iePrecision;
-}
-
-ie::Layout vpuDimsOrderToIE(DimsOrder dimsOrder) {
-    auto ieLayout = ie::Layout::ANY;
-
-    if (DimsOrder::C == dimsOrder) {
-        ieLayout = ie::Layout::C;
-    } else if (DimsOrder::NC == dimsOrder) {
-        ieLayout = ie::Layout::NC;
-    } else if (DimsOrder::CHW == dimsOrder) {
-        ieLayout = ie::Layout::CHW;
-    } else if (DimsOrder::NCHW == dimsOrder) {
-        ieLayout = ie::Layout::NCHW;
-    } else if (DimsOrder::NHWC == dimsOrder) {
-        ieLayout = ie::Layout::NHWC;
-    } else {
-        VPU_THROW_EXCEPTION << "BlobReader error: unsupported dimsOrder " << toString(dimsOrder);
-    }
-
-    return ieLayout;
-}
-
-ie::SizeVector vpuDimsToIE(const DimValues& dimValues) {
-    auto order = DimsOrder::fromNumDims(dimValues.size());
-    auto perm = order.toPermutation();
-
-    ie::SizeVector ieDims(perm.size());
-    for (int i = 0; i < perm.size(); ++i) {
-        ieDims[ieDims.size() - 1 - i] = dimValues[perm[i]];
-    }
-
-    return ieDims;
-}
-
 }  // namespace
 
 void BlobReader::parse(const std::vector<char>& blob) {
@@ -134,11 +82,7 @@ void BlobReader::parse(const std::vector<char>& blob) {
         // Skip strides
         inputInfoSecOffset += perm.size() * sizeof(uint32_t);
 
-        auto iePrecision = vpuDataTypeToIE(dataType);
-        auto ieLayout    = vpuDimsOrderToIE(dimsOrder);
-        auto ieDims = vpuDimsToIE(vpuDims);
-
-        ie::TensorDesc ieDesc(iePrecision, ieDims, ieLayout);
+        ie::TensorDesc ieDesc = DataDesc(dataType, dimsOrder, vpuDims).toTensorDesc();
         ie::Data inputData(inputName, ieDesc);
 
         ie::InputInfo input;
@@ -181,11 +125,7 @@ void BlobReader::parse(const std::vector<char>& blob) {
         // Skip strides
         outputInfoSecOffset += perm.size() * sizeof(uint32_t);
 
-        auto iePrecision = vpuDataTypeToIE(dataType);
-        auto ieLayout    = vpuDimsOrderToIE(dimsOrder);
-        auto ieDims = vpuDimsToIE(vpuDims);
-
-        ie::TensorDesc ieDesc(iePrecision, ieDims, ieLayout);
+        ie::TensorDesc ieDesc = DataDesc(dataType, dimsOrder, vpuDims).toTensorDesc();
         ie::Data outputData(outputName, ieDesc);
 
         _networkOutputs[outputData.getName()]    = std::make_shared<ie::Data>(outputData);
index d95263c..bae11a8 100644 (file)
@@ -15,7 +15,7 @@
 #include <string>
 #include <vector>
 
-#ifdef __linux__
+#if defined(__linux__) || defined (__APPLE__)
 # include <dlfcn.h>
 #endif
 
@@ -293,7 +293,7 @@ ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> CustomLaye
 #ifdef _WIN32
     char path[MAX_PATH];
     auto abs_path_ptr = _fullpath(path, configFile.c_str(), MAX_PATH);
-#elif __linux__
+#elif defined(__linux__) || defined(__APPLE__)
     char path[PATH_MAX];
     auto abs_path_ptr = realpath(configFile.c_str(), path);
 #endif
@@ -427,8 +427,8 @@ void CustomLayer::processKernelNode(const pugi::xml_node& node) {
         contentStream << inputFile.rdbuf();
         _kernelBinary.append(contentStream.str());
 
-        if (_kernelBinary.size() >= 16*1024) {
-            VPU_THROW_EXCEPTION << "Kernel binary exceeds 16KB." << fileName;
+        if (_kernelBinary.size() >= 32*1024) {
+            VPU_THROW_EXCEPTION << "Kernel binary exceeds 32KB." << fileName;
         }
     }
 
@@ -677,6 +677,8 @@ CustomDataFormat CustomLayer::formatFromString(const std::string & str) {
     static const ie::details::caseless_map<std::string, CustomDataFormat> FormatNameToType = {
         { "BFYX" , CustomDataFormat::BFYX },
         { "BYXF" , CustomDataFormat::BYXF },
+        { "FYX" , CustomDataFormat::FYX },
+        { "YXF" , CustomDataFormat::YXF },
         { "ANY"  , CustomDataFormat::Any },
     };
 
index faea63d..e717144 100644 (file)
@@ -35,9 +35,7 @@ ie::CNNNetwork FrontEnd::detectNetworkBatch(
     if (batchSize == 1 || !env.config.detectBatch) {
         env.log->debug("Keep original network");
 
-        IE_SUPPRESS_DEPRECATED_START
-        return ie::CNNNetwork(const_cast<ie::ICNNNetwork*>(&origNetwork));
-        IE_SUPPRESS_DEPRECATED_END
+        return ie::CNNNetwork(ie::ICNNNetwork::Ptr(const_cast<ie::ICNNNetwork*>(&origNetwork), [](void *) {}));
     }
 
     model->setBatchSize(batchSize);
@@ -72,16 +70,16 @@ ie::CNNNetwork FrontEnd::detectNetworkBatch(
         env.log->debug("Input [%s] : %v", p.first, ieShapes);
 
         switch (ieData->getLayout()) {
-            case ie::Layout::NCHW:
-            case ie::Layout::NHWC:
-            case ie::Layout::NC:
-                ieShapes[0] = 1;
-                break;
-            case ie::Layout::CN:
-                ieShapes[1] = 1;
-                break;
-            default:
-                VPU_THROW_EXCEPTION << "Unexpected input layout : " << ieData->getLayout();
+        case ie::Layout::NCDHW:
+        case ie::Layout::NDHWC:
+        case ie::Layout::NCHW:
+        case ie::Layout::NHWC:
+        case ie::Layout::NC:
+        case ie::Layout::CN:
+            ieShapes[0] = 1;
+            break;
+        default:
+            VPU_THROW_EXCEPTION << "Unexpected input layout : " << ieData->getLayout();
         }
 
         inputShapes[ieData->getName()] = ieShapes;
index ea38055..819e08a 100644 (file)
@@ -77,9 +77,15 @@ ie::details::caseless_map<std::string, parser_t> g_parsers = {
     {"LSTMSequence",       &FrontEnd::parseRNN},
     {"GEMM",               &FrontEnd::parseGEMM},
     {"Log",                &FrontEnd::parseLog},
+    {"Exp",                &FrontEnd::parseExp},
     {"ReverseSequence",    &FrontEnd::parseReverseSequence},
     {"Gather",             &FrontEnd::parseGather},
     {"ReduceAnd",          &FrontEnd::parseReduce},
+    {"Floor",              &FrontEnd::parseFloor},
+    {"TopK",               &FrontEnd::parseTopK},
+    {"ReduceMin",          &FrontEnd::parseReduce},
+    {"StridedSlice",       &FrontEnd::parseStridedSlice},
+    {"Select",             &FrontEnd::parseSelect},
 };
 
 std::atomic<int> g_counter(0);
@@ -211,7 +217,7 @@ Model::Ptr FrontEnd::buildInitialModel(const ie::ICNNNetwork& network) {
 
     eliminatePriorBoxData(model);
 
-    model->cleanUpDatas();
+    model->cleanUp();
 
     return model;
 }
@@ -368,7 +374,6 @@ void FrontEnd::getInputAndOutputData(
 
         inputs[i] = getVpuData(layerInput);
         IE_ASSERT(inputs[i] != nullptr);
-        IE_ASSERT(inputs[i]->desc().type() == DataType::FP16);
     }
 
     outputs.resize(layer->outData.size());
@@ -377,11 +382,13 @@ void FrontEnd::getInputAndOutputData(
         IE_ASSERT(layerOutput != nullptr);
 
         if (auto data = getVpuData(layerOutput)) {
-            IE_ASSERT(data->desc().type() == DataType::FP16);
             outputs[i] = data;
         } else {
             DataDesc dataDesc(layerOutput->getTensorDesc());
-            dataDesc.setType(DataType::FP16);
+            if (dataDesc.type() == DataType::FP32) {
+                // To infer the same FP32 models on different devices (CPU, GPU, VPU and so on)
+                dataDesc.setType(DataType::FP16);
+            }
 
             outputs[i] = model->addNewData(
                 layerOutput->getName(),
index 71a73b3..f7065ac 100644 (file)
@@ -8,6 +8,7 @@
 #include <string>
 #include <set>
 #include <vector>
+#include <utility>
 
 #include <vpu/compile_env.hpp>
 
@@ -23,12 +24,10 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto inputScale = inputScales[0];
 
@@ -36,12 +35,12 @@ protected:
             IE_ASSERT(output->usage() == DataUsage::Output);
             IE_ASSERT(step == ScalePropagationStep::Propagate);
 
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         } else {
             IE_ASSERT(input->usage() == DataUsage::Input);
 
-            _scaleInfo.setOutput(_outputEdges[0], inputScale);
+            scaleInfo.setOutput(outputEdge(0), inputScale);
 
             if (step == ScalePropagationStep::ScaleInput) {
                 attrs().get<float>("scale") *= inputScale;
@@ -50,40 +49,34 @@ protected:
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         if (_type == StageType::Convert_f16f32) {
-            IE_ASSERT(output->usage() == DataUsage::Output);
+            IE_ASSERT(output->usage() == DataUsage::Output || output->usage() == DataUsage::Intermediate);
 
             auto outDimsOrder = output->desc().dimsOrder();
 
             // HCW is not supported
             IE_ASSERT(outDimsOrder.dimInd(Dim::C) != 1);
 
-            _orderInfo.setInput(_inputEdges[0], outDimsOrder);
+            orderInfo.setInput(inputEdge(0), outDimsOrder);
         } else {
-            IE_ASSERT(input->usage() == DataUsage::Input);
+            IE_ASSERT(input->usage() == DataUsage::Input || input->usage() == DataUsage::Intermediate);
 
             auto inDimsOrder = input->desc().dimsOrder();
 
             // HCW is not supported
             IE_ASSERT(inDimsOrder.dimInd(Dim::C) != 1);
 
-            _orderInfo.setOutput(_outputEdges[0], inDimsOrder);
+            orderInfo.setOutput(outputEdge(0), inDimsOrder);
         }
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto inDimsOrder = input->desc().dimsOrder();
 
@@ -95,22 +88,22 @@ protected:
         }
 
         if (_type == StageType::Convert_f16f32) {
-            IE_ASSERT(output->usage() == DataUsage::Output);
+            IE_ASSERT(output->usage() == DataUsage::Output || output->usage() == DataUsage::Intermediate);
 
-            _stridesInfo.setInput(_inputEdges[0], reqs);
-            _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+            stridesInfo.setInput(inputEdge(0), reqs);
+            stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
         } else {
-            IE_ASSERT(input->usage() == DataUsage::Input);
+            IE_ASSERT(input->usage() == DataUsage::Input || input->usage() == DataUsage::Intermediate);
 
-            _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-            _stridesInfo.setOutput(_outputEdges[0], reqs);
+            stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+            stridesInfo.setOutput(outputEdge(0), reqs);
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
         // Convert will support batch by merging it with previous dimension.
     }
 
@@ -119,7 +112,22 @@ protected:
         return StageSHAVEsRequirements::TwoOrOne;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        const auto expectedTypes = EnumMap<StageType, std::pair<DataType, DataType>>{
+            {StageType::Convert_u8f16, {DataType::U8, DataType::FP16}},
+            {StageType::Convert_f16f32, {DataType::FP16, DataType::FP32}},
+            {StageType::Convert_f32f16, {DataType::FP32, DataType::FP16}},
+        };
+
+        auto match = expectedTypes.find(_type);
+        if (match == expectedTypes.end()) {
+            VPU_THROW_EXCEPTION << "unknown type";
+        }
+        const auto& types = match->second;
+
+        const auto& srcType = types.first;
+        const auto& dstType = types.second;
+        assertInputsOutputsTypes(this, {{srcType}}, {{dstType}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -135,12 +143,8 @@ protected:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         if (input->desc().dimsOrder() == DimsOrder::NC) {
             input->serializeOldBuffer(
@@ -230,27 +234,61 @@ void FrontEnd::addDataTypeConvertStages(const Model::Ptr& model) {
         env.log->warning("[VPU] GraphTransformer : INPUT_BIAS option is deprecated");
     }
 
+    const bool hasScaleBias = env.config.inputScale != 1.0f || env.config.inputBias != 0.0f;
     for (const auto& input : model->datas()) {
         if (input->usage() != DataUsage::Input)
             continue;
 
-        if (input->desc().type() != DataType::FP16) {
-            env.log->debug("convert input %s to FP16", input->name());
+        const auto& type = input->desc().type();
+
+        if (type == DataType::FP16 && hasScaleBias) {
+            std::ostringstream postfixOstr;
+            if (env.config.inputScale != 1.0f) {
+                postfixOstr << "@SCALE=" << std::to_string(env.config.inputScale);
+            }
+            if (env.config.inputBias != 0.0f) {
+                postfixOstr << "@BIAS=" << std::to_string(env.config.inputBias);
+            }
+
+            auto postfix = postfixOstr.str();
+
+            auto scaledInput = model->duplicateData(
+                    input,
+                    postfix);
+
+            bindData(scaledInput, input->origData());
+
+            _stageBuilder->addPowerStage(
+                    model,
+                    scaledInput->name(),
+                    nullptr,
+                    env.config.inputScale,
+                    1.0f,
+                    env.config.inputBias,
+                    input,
+                    scaledInput);
+        }
+
+        if (type != DataType::FP32 && type != DataType::U8) {
+            continue;
+        }
+
+        env.log->debug("convert input %s to FP16", input->name());
 
-            auto fp16Desc = input->desc();
-            fp16Desc.setType(DataType::FP16);
+        auto fp16Desc = input->desc();
+        fp16Desc.setType(DataType::FP16);
 
-            auto inputFP16 = model->duplicateData(
+        auto inputFP16 = model->duplicateData(
                 input,
                 "@FP16",
                 fp16Desc);
 
-            input->attrs().set<Data>("fp16_copy", inputFP16);
+        input->attrs().set<Data>("fp16_copy", inputFP16);
 
-            bindData(inputFP16, input->origData());
+        bindData(inputFP16, input->origData());
 
-            auto stageType = StageType::None;
-            switch (input->desc().type()) {
+        auto stageType = StageType::None;
+        switch (input->desc().type()) {
             case DataType::U8:
                 stageType = StageType::Convert_u8f16;
                 break;
@@ -259,9 +297,9 @@ void FrontEnd::addDataTypeConvertStages(const Model::Ptr& model) {
                 break;
             default:
                 VPU_THROW_EXCEPTION << "Unsupported input data type : " << input->desc().type();
-            }
+        }
 
-            _stageBuilder->createConvertStage(
+        _stageBuilder->createConvertStage(
                 model,
                 inputFP16->name(),
                 input,
@@ -269,69 +307,45 @@ void FrontEnd::addDataTypeConvertStages(const Model::Ptr& model) {
                 stageType,
                 env.config.inputScale,
                 env.config.inputBias);
-        } else if (env.config.inputScale != 1.0f || env.config.inputBias != 0.0f) {
-            std::ostringstream postfixOstr;
-            if (env.config.inputScale != 1.0f) {
-                postfixOstr << "@SCALE=" << std::to_string(env.config.inputScale);
-            }
-            if (env.config.inputBias != 0.0f) {
-                postfixOstr << "@BIAS=" << std::to_string(env.config.inputBias);
-            }
-
-            auto postfix = postfixOstr.str();
-
-            auto scaledInput = model->duplicateData(
-                input,
-                postfix);
-
-            bindData(scaledInput, input->origData());
-
-            _stageBuilder->addPowerStage(
-                model,
-                scaledInput->name(),
-                nullptr,
-                env.config.inputScale,
-                1.0f,
-                env.config.inputBias,
-                input,
-                scaledInput);
-        }
     }
 
     for (const auto& output : model->datas()) {
         if (output->usage() != DataUsage::Output)
             continue;
 
-        if (output->desc().type() != DataType::FP16) {
-            env.log->debug("convert output %s from FP16", output->name());
+        const auto& actualType = output->desc().type();
+        if (actualType != DataType::FP32) {
+            // Output datas keep their precision (intermeadiate have been forced to FP16 in case of FP32 from IR).
+            // If FP32 output has been requested VPU executes in FP16 with following convert FP16 -> FP32
+            continue;
+        }
 
-            IE_ASSERT(output->desc().type() == DataType::FP32);
+        env.log->debug("convert output %s from FP16", output->name());
 
-            auto fp16Desc = output->desc();
-            fp16Desc.setType(DataType::FP16);
+        auto fp16Desc = output->desc();
+        fp16Desc.setType(DataType::FP16);
 
-            auto outputFP16 = model->duplicateData(
-                output,
-                "@FP16",
-                fp16Desc);
+        auto outputFP16 = model->duplicateData(
+            output,
+            "@FP16",
+            fp16Desc);
 
-            output->attrs().set<Data>("fp16_copy", outputFP16);
+        output->attrs().set<Data>("fp16_copy", outputFP16);
 
-            bindData(outputFP16, output->origData());
+        bindData(outputFP16, output->origData());
 
-            auto stage = _stageBuilder->createConvertStage(
-                model,
-                outputFP16->name(),
-                outputFP16,
-                output,
-                StageType::Convert_f16f32);
+        auto stage = _stageBuilder->createConvertStage(
+            model,
+            outputFP16->name(),
+            outputFP16,
+            output,
+            StageType::Convert_f16f32);
 
-            auto withDetectionOutput = model->attrs().getOrDefault<bool>("withDetectionOutput", false);
-            stage->attrs().set<bool>("convertFromDetOutput", withDetectionOutput);
+        auto withDetectionOutput = model->attrs().getOrDefault<bool>("withDetectionOutput", false);
+        stage->attrs().set<bool>("convertFromDetOutput", withDetectionOutput);
 
-            auto haveBatch = _unbatchedOutputs.count(output->origData()) == 0;
-            stage->attrs().set<bool>("haveBatch", haveBatch);
-        }
+        auto haveBatch = _unbatchedOutputs.count(output->origData()) == 0;
+        stage->attrs().set<bool>("haveBatch", haveBatch);
     }
 }
 
index 4aea3d2..67205d7 100644 (file)
@@ -7,8 +7,11 @@
 #include <memory>
 #include <algorithm>
 #include <set>
+#include <map>
+#include <string>
 
 #include <vpu/compile_env.hpp>
+#include <vpu/utils/ie_helpers.hpp>
 
 namespace vpu {
 
@@ -17,6 +20,27 @@ void FrontEnd::parseInputAndOutputData(const Model::Ptr& model) {
 
     const auto& env = CompileEnv::get();
 
+    auto layoutPreference = LayoutPreference::AUTO;
+    if (env.config.hwOptimization ||
+        env.config.forceLayout == ComputeLayout::NCHW ||
+        env.config.forceLayout == ComputeLayout::NCDHW) {
+        layoutPreference = LayoutPreference::ChannelMajor;  // CHW, NCHW, NCDHW
+    } else {
+        layoutPreference = LayoutPreference::ChannelMinor;  // HWC, NHWC, NDHWC
+    }
+
+    // TODO: InferenceEngine doesn't support 3D HWC.
+
+    auto parseIOStrides = [&](const std::string& name, Data& data) {
+        const auto& match = env.config.ioStrides.find(name);
+        if (match == env.config.ioStrides.end()) {
+            return;
+        }
+
+        const auto reqs = StridesRequirement::fixed(match->second, data->desc());
+        data->updateRequiredStrides(reqs);
+    };
+
     //
     // Parse network inputs
     //
@@ -29,15 +53,20 @@ void FrontEnd::parseInputAndOutputData(const Model::Ptr& model) {
         IE_ASSERT(ieData != nullptr);
 
         DataDesc vpuDesc(ieData->getTensorDesc());
-        if (vpuDesc.numDims() >= 3) {
-            if (env.config.hwOptimization || env.config.forceLayout == ComputeLayout::NCHW) {
-                vpuDesc.moveDim(Dim::C, 2);
+        if (vpuDesc.numDims() >= 4) {
+            if (LayoutPreference::ChannelMajor == layoutPreference) {
+                if (vpuDesc.dimsOrder() == DimsOrder::NDHWC)
+                    vpuDesc.moveDim(Dim::C, 3);
+                if (vpuDesc.dimsOrder() == DimsOrder::NHWC)
+                    vpuDesc.moveDim(Dim::C, 2);
             } else {
                 vpuDesc.moveDim(Dim::C, 0);
             }
         }
 
         auto vpuData = model->addInputData(ieData->getName(), vpuDesc);
+        parseIOStrides(inputInfo.first, vpuData);
+
         bindData(vpuData, ieData);
     }
 
@@ -52,15 +81,20 @@ void FrontEnd::parseInputAndOutputData(const Model::Ptr& model) {
         IE_ASSERT(ieData != nullptr);
 
         DataDesc vpuDesc(ieData->getTensorDesc());
-        if (vpuDesc.numDims() >= 3) {
-            if (env.config.hwOptimization || env.config.forceLayout == ComputeLayout::NCHW) {
-                vpuDesc.moveDim(Dim::C, 2);
+        if (vpuDesc.numDims() >= 4) {
+            if (LayoutPreference::ChannelMajor == layoutPreference) {
+                if (vpuDesc.dimsOrder() == DimsOrder::NDHWC)
+                    vpuDesc.moveDim(Dim::C, 3);
+                if (vpuDesc.dimsOrder() == DimsOrder::NHWC)
+                    vpuDesc.moveDim(Dim::C, 2);
             } else {
                 vpuDesc.moveDim(Dim::C, 0);
             }
         }
 
         auto vpuData = model->addOutputData(ieData->getName(), vpuDesc);
+        parseIOStrides(outputInfo.first, vpuData);
+
         bindData(vpuData, ieData);
 
         if (_unbatchedOutputs.count(ieData) > 0) {
@@ -81,16 +115,7 @@ void FrontEnd::parseInputAndOutputData(const Model::Ptr& model) {
         auto ieBlob = constInfo.second;
         IE_ASSERT(ieBlob != nullptr);
 
-        auto ieDesc = ieData->getTensorDesc();
-
-        if (ieDesc.getPrecision() != ie::Precision::FP16) {
-            if (ieDesc.getPrecision() != ie::Precision::FP32 || !env.config.allowFP32Models) {
-                VPU_THROW_EXCEPTION << "Unsupported precision " << ieDesc.getPrecision() << "for data " << ieData->getName();
-            }
-        }
-
-        DataDesc vpuDesc(ieDesc);
-        vpuDesc.setType(DataType::FP16);
+        DataDesc vpuDesc(ieData->getTensorDesc());
 
         auto vpuData = model->addConstData(
             ieData->getName(),
@@ -111,42 +136,6 @@ void FrontEnd::parseInputAndOutputData(const Model::Ptr& model) {
 
         bindData(vpuData, ieData);
     }
-
-    //
-    // Add Copy stages after network outputs, if they are in the middle
-    //
-
-    for (const auto& outputInfo : _ieNetworkParser.networkOutputs) {
-        auto ieData = outputInfo.second;
-        IE_ASSERT(ieData != nullptr);
-
-        auto vpuData = getVpuData(ieData);
-        IE_ASSERT(vpuData != nullptr);
-
-        // It might be Const.
-        if (vpuData->usage() != DataUsage::Output)
-            continue;
-
-        // Convert stage will be added.
-        if (vpuData->desc().type() != DataType::FP16)
-            continue;
-
-        if (!ieData->getInputTo().empty()) {
-            auto vpuTempData = model->duplicateData(
-                vpuData,
-                "@intermediate",
-                vpuData->desc());
-
-            _stageBuilder->addCopyStage(
-                model,
-                formatString("%s@copy-to-output", vpuData->name()),
-                nullptr,
-                vpuTempData,
-                vpuData);
-
-            bindData(vpuTempData, ieData);
-        }
-    }
 }
 
 }  // namespace vpu
index 24e3814..00f1d0a 100644 (file)
@@ -97,26 +97,11 @@ void IeNetworkParser::checkNetwork(const ie::CNNNetwork& network) {
     for (const auto& netInput : networkInputs) {
         auto inputInfo = netInput.second;
         IE_ASSERT(inputInfo != nullptr);
-
-        auto inputPrecision = inputInfo->getPrecision();
-
-        if (inputPrecision != ie::Precision::U8 &&
-            inputPrecision != ie::Precision::FP16 &&
-            inputPrecision != ie::Precision::FP32) {
-            THROW_IE_EXCEPTION << "[PARAMETER_MISMATCH] Unsupported input precision: " << inputPrecision.name() << "!";
-        }
     }
 
     for (const auto& netOutput : networkOutputs) {
         auto outputData = netOutput.second;
         IE_ASSERT(outputData != nullptr);
-
-        auto outputPrecision = outputData->getPrecision();
-
-        if (outputPrecision != ie::Precision::FP16 &&
-            outputPrecision != ie::Precision::FP32) {
-            THROW_IE_EXCEPTION << "[PARAMETER_MISMATCH] Unsupported output precision: " << outputPrecision.name() << "!";
-        }
     }
 }
 
index 42cde67..af5a8a6 100644 (file)
@@ -114,7 +114,6 @@ void FrontEnd::addPreProcessStages(const Model::Ptr& model) {
         if (preProcess.getMeanVariant() != ie::NONE) {
             auto input = getVpuData(ieData);
             IE_ASSERT(input != nullptr);
-            IE_ASSERT(input->desc().type() == DataType::FP16);
 
             int numOfChannel = preProcess.getNumberOfChannels();
 
index dd8bb4e..fc9f332 100644 (file)
@@ -16,7 +16,10 @@ StagePtr MyriadXHwStage::cloneImpl() const {
     return std::make_shared<MyriadXHwStage>(*this);
 }
 
-void MyriadXHwStage::propagateScaleFactorsImpl(const SmallVector<float>&, ScalePropagationStep) {
+void MyriadXHwStage::propagateScaleFactorsImpl(
+        const SmallVector<float>&,
+        ScalePropagationStep,
+        StageDataInfo<float>&) {
     VPU_THROW_EXCEPTION << "Must never be called";
 }
 
@@ -44,81 +47,69 @@ StridesRequirement getHwStridesRequirement(const Stage& stage, const DataDesc& d
 
 }  // namespace
 
-void MyriadXHwStage::propagateDataOrderImpl() const {
-    IE_ASSERT(_inputEdges.size() >= 4);
-    IE_ASSERT(_outputEdges.size() >= 1);
-
+void MyriadXHwStage::propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) {
     if (attrs().get<HwOpType>("hwOpType") != HwOpType::POOL) {
-        auto weights = _inputEdges[1]->input();
-        auto biases = _inputEdges[2]->input();
-        auto scales = _inputEdges[3]->input();
+        auto weights = inputEdge(1)->input();
+        auto biases = inputEdge(2)->input();
+        auto scales = inputEdge(3)->input();
 
         IE_ASSERT(weights->usage() == DataUsage::Const);
         IE_ASSERT(biases->usage() == DataUsage::Const || biases->usage() == DataUsage::Fake);
         IE_ASSERT(scales->usage() == DataUsage::Const || scales->usage() == DataUsage::Fake);
     }
 
-    auto input = _inputEdges[0]->input();
-    auto output = _outputEdges[0]->output();
+    auto input = inputEdge(0)->input();
+    auto output = outputEdge(0)->output();
 
     // TODO: support HCW
 
     if (input->desc().numDims() >= 3) {
-        _orderInfo.setInput(_inputEdges[0], input->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setInput(inputEdge(0), input->desc().dimsOrder().createMovedDim(Dim::C, 2));
     } else {
         IE_ASSERT(input->desc().dimsOrder() == DimsOrder::NC);
     }
 
     if (output->desc().numDims() >= 3) {
-        _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setOutput(outputEdge(0), output->desc().dimsOrder().createMovedDim(Dim::C, 2));
     } else {
         IE_ASSERT(output->desc().dimsOrder() == DimsOrder::NC);
     }
 }
 
-void MyriadXHwStage::getDataStridesRequirementsImpl() const {
-    IE_ASSERT(_inputEdges.size() >= 4);
-    IE_ASSERT(_outputEdges.size() >= 1);
-
+void MyriadXHwStage::getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) {
     if (attrs().get<HwOpType>("hwOpType") != HwOpType::POOL) {
-        auto weights = _inputEdges[1]->input();
-        auto biases = _inputEdges[2]->input();
-        auto scales = _inputEdges[3]->input();
+        auto weights = inputEdge(1)->input();
+        auto biases = inputEdge(2)->input();
+        auto scales = inputEdge(3)->input();
 
         IE_ASSERT(weights->usage() == DataUsage::Const);
         IE_ASSERT(biases->usage() == DataUsage::Const || biases->usage() == DataUsage::Fake);
         IE_ASSERT(scales->usage() == DataUsage::Const || scales->usage() == DataUsage::Fake);
     }
 
-    auto input = _inputEdges[0]->input();
-    auto output = _outputEdges[0]->output();
+    auto input = inputEdge(0)->input();
+    auto output = outputEdge(0)->output();
 
-    _stridesInfo.setInput(_inputEdges[0], getHwStridesRequirement(handle_from_this(), input->desc()));
-    _stridesInfo.setOutput(_outputEdges[0], getHwStridesRequirement(handle_from_this(), output->desc()));
+    stridesInfo.setInput(inputEdge(0), getHwStridesRequirement(handle_from_this(), input->desc()));
+    stridesInfo.setOutput(outputEdge(0), getHwStridesRequirement(handle_from_this(), output->desc()));
 }
 
 void MyriadXHwStage::finalizeDataLayoutImpl() {
 }
 
-void MyriadXHwStage::getBatchSupportInfoImpl() const {
+void MyriadXHwStage::getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) {
     if (attrs().get<HwOpType>("hwOpType") != HwOpType::POOL) {
-        IE_ASSERT(_inputEdges.size() >= 4);
-        IE_ASSERT(_outputEdges.size() >= 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 }
 
 void MyriadXHwStage::finalCheckImpl() const {
-    IE_ASSERT(_inputEdges.size() >= 4);
-    IE_ASSERT(_outputEdges.size() >= 1);
-
-    auto input = _inputEdges[0]->input();
-    auto weights = _inputEdges[1]->input();
-    auto biases = _inputEdges[2]->input();
-    auto scales = _inputEdges[3]->input();
-    auto output = _outputEdges[0]->output();
+    auto input = inputEdge(0)->input();
+    auto weights = inputEdge(1)->input();
+    auto biases = inputEdge(2)->input();
+    auto scales = inputEdge(3)->input();
+    auto output = outputEdge(0)->output();
 
     IE_ASSERT(input->memoryOffset() % 16 == 0);
     IE_ASSERT(weights->memoryOffset() % 16 == 0);
@@ -189,9 +180,9 @@ void MyriadXHwStage::serializeParamsImpl(BlobSerializer& serializer) const {
         serializer.append(checked_cast<uint32_t>(hwOpParams.reuseCoeff));
     }
 
-    serializer.append(checked_cast<uint32_t>(_injectedStageEdges.size()));
-    for (const auto& injectedStageEdge : _injectedStageEdges) {
-        injectedStageEdge->child()->serialize(serializer);
+    serializer.append(checked_cast<uint32_t>(numInjectedStages()));
+    for (const auto& injectedStage : injectedStages()) {
+        injectedStage->serialize(serializer);
     }
 }
 
@@ -200,7 +191,7 @@ void MyriadXHwStage::serializeDataImpl(BlobSerializer& serializer) const {
 
     uint32_t numBuffers = 0;
 
-    for (const auto& inEdge : _inputEdges) {
+    for (const auto& inEdge : inputEdges()) {
         if (inEdge->childEdge() != nullptr)
             continue;
 
@@ -212,7 +203,7 @@ void MyriadXHwStage::serializeDataImpl(BlobSerializer& serializer) const {
         ++numBuffers;
     }
 
-    for (const auto& outEdge : _outputEdges) {
+    for (const auto& outEdge : outputEdges()) {
         if (outEdge->childEdge() != nullptr)
             continue;
 
index f545c35..3aad4d2 100644 (file)
@@ -184,8 +184,7 @@ std::tuple<int, int, int, int, int, int, int, int>
         int inputSize,
         int kernelSize, int kernelStride,
         int padBefore, int padAfter,
-        int outputStartIndex, int outputEndIndex,
-        bool alignInputTile) {
+        int outputStartIndex, int outputEndIndex) {
     // Negative value encodes the padding
     int inputStartIndex = outputStartIndex * kernelStride - padBefore;
     int inputEndIndex = (outputEndIndex - 1) * kernelStride + kernelSize - padBefore;
@@ -212,14 +211,6 @@ std::tuple<int, int, int, int, int, int, int, int>
             inputLinesBefore -= kernelStride;
         }
 
-        if (alignInputTile) {
-            const int reqAlignment = 8;
-            while ((inputLinesBefore < inputStartIndex) &&
-                   (inputStartIndex - inputLinesBefore) % reqAlignment != 0) {
-                ++inputLinesBefore;
-            }
-        }
-
         // Compute the junkOutputBefore
         junkOutputBefore = (inputLinesBefore + padBefore) / kernelStride;
     }
@@ -266,14 +257,13 @@ int maximizeOutput(
         int kernelSize, int kernelStride,
         int padBefore, int padAfter,
         int outputStartIndex, int outputEndIndex,
-        bool alignInputTile,
         bool useCeil) {
     int outputSize = calcOutputSize(inputSize, kernelSize, kernelStride, padBefore, padAfter, useCeil);
 
     int _ = 0;
     int junkOutputBefore = 0, junkOutputAfter = 0;
     std::tie(_, _, _, _, _, _, junkOutputBefore, junkOutputAfter) =
-        inputTileForOutputTile(inputSize, kernelSize, kernelStride, padBefore, padAfter, outputStartIndex, outputEndIndex, alignInputTile);
+        inputTileForOutputTile(inputSize, kernelSize, kernelStride, padBefore, padAfter, outputStartIndex, outputEndIndex);
 
     int totalOutputSlice = junkOutputBefore + (outputEndIndex - outputStartIndex) + junkOutputAfter;
 
@@ -286,7 +276,7 @@ int maximizeOutput(
         extraLines -= 1;
 
         std::tie(_, _, _, _, _, _, junkOutputBefore, junkOutputAfter) =
-            inputTileForOutputTile(inputSize, kernelSize, kernelStride, padBefore, padAfter, outputStartIndex, outputEndIndex + extraLines, alignInputTile);
+            inputTileForOutputTile(inputSize, kernelSize, kernelStride, padBefore, padAfter, outputStartIndex, outputEndIndex + extraLines);
 
         totalOutputSlice = junkOutputBefore + (outputEndIndex + extraLines - outputStartIndex) + junkOutputAfter;
     }
@@ -301,7 +291,6 @@ SmallVector<HwPlaneTileInfo> splitIntoPlaneTiles(
         int kernelSize, int kernelStride,
         int padBefore, int padAfter,
         int maxOutputSize,
-        bool alignInputTile,
         bool useCeil) {
     IE_ASSERT(inputSize > 0);
     IE_ASSERT(outputSize > 0);
@@ -320,7 +309,6 @@ SmallVector<HwPlaneTileInfo> splitIntoPlaneTiles(
             kernelSize, kernelStride,
             padBefore, padAfter,
             outputStartIndex, outputEndIndex,
-            alignInputTile,
             useCeil);
         if (newOutputEndIndex <= outputStartIndex) {
             return SmallVector<HwPlaneTileInfo>();
@@ -333,7 +321,7 @@ SmallVector<HwPlaneTileInfo> splitIntoPlaneTiles(
                  inputLinesBefore, inputLinesAfter,
                  outputStartIndex, outputEndIndex,
                  junkOutputBefore, junkOutputAfter) =
-            inputTileForOutputTile(inputSize, kernelSize, kernelStride, padBefore, padAfter, outputStartIndex, newOutputEndIndex, alignInputTile);
+            inputTileForOutputTile(inputSize, kernelSize, kernelStride, padBefore, padAfter, outputStartIndex, newOutputEndIndex);
 
         IE_ASSERT(inputStartIndex >= 0);
         IE_ASSERT(inputEndIndex >= 0);
@@ -376,64 +364,126 @@ SmallVector<HwPlaneTileInfo> splitIntoPlaneTiles(
 }
 
 //
-// HW Convolution tiling over output channels.
+// Check HW-unit memory restrictions for tile.
 //
 
 namespace {
 
-// Returns (status, cost).
-std::tuple<bool, int> checkHwConvMode(
-        int inTileWidth, int inTileHeight, int inTileChannels,
-        int outTileChannels,
+bool checkDimensions(int inTileWidth, int inTileHeight, int inTileChannels, int outTileChannels) {
+    return inTileWidth     <= CNN_MAX_INPUT_WIDTH    &&
+           inTileHeight    <= CNN_MAX_INPUT_HEIGHT   &&
+           inTileChannels  <= CNN_MAX_INPUT_CHANNELS &&
+           outTileChannels <= CNN_MAX_OUTPUT_CHANNELS;
+}
+
+bool checkCoeffPerBlockConv(int kernelSizeX, int kernelSizeY, int noOfBlocks) {
+    auto coeffPerWord = CNN_COEFF_PER_WORD_VALUES[static_cast<int32_t>(CNN_COEFF_TYPE)];
+    auto coeffSetSize = kernelSizeX * kernelSizeY;
+    auto coeffLPB = divUp(noOfBlocks * coeffSetSize, coeffPerWord);
+
+    return coeffLPB <= CNN_MAX_COEFF_PER_BLOCK;
+}
+
+bool checkLinesPerChanRestrictions(
+        int inTileWidth, int inTileHeight,
+        int kernelSizeY, int kernelStride,
+        int noOfBlocks, int chansPerBlock) {
+    const int bytesPerPixel = CNN_BYTES_PER_PIXEL[static_cast<int32_t>(CNN_DATA_TYPE)];
+    const int bytesPerLine  = alignVal(inTileWidth * bytesPerPixel, CMX_DATA_BYTE_WIDTH);
+
+    const int linesPerChan = std::min(CNN_MAX_BYTES / (noOfBlocks * chansPerBlock * bytesPerLine), inTileHeight);
+    const int minLines     = std::min(kernelSizeY + kernelStride + 2 + ((inTileWidth <= 8) ? 1 : 0), inTileHeight);
+
+    return minLines <= linesPerChan;
+}
+
+bool checkLinesPerChanRestrictionsPool(
+        int inTileWidth, int inTileHeight,
+        int kernelSizeY,
+        HwOpMode mode) {
+    const int sizeOfBlock = CNN_MAX_BYTES >> static_cast<int>(mode);
+    const int bytesPerPixel = CNN_BYTES_PER_PIXEL[static_cast<int32_t>(CNN_DATA_TYPE)];
+    const int pixelsPerCMXLine = CMX_DATA_BYTE_WIDTH / bytesPerPixel;
+
+    const int localLineStride = (inTileWidth + (pixelsPerCMXLine - 1)) / pixelsPerCMXLine;
+
+    const int chanPerBlock = 1;
+    const int availableBytesPerChan = sizeOfBlock / chanPerBlock;
+    const int bytesPerLine = localLineStride * pixelsPerCMXLine * bytesPerPixel;
+
+    const int linesPerChan = std::min(availableBytesPerChan / bytesPerLine, inTileHeight);
+
+    return linesPerChan >= kernelSizeY;
+}
+
+bool checkHWRestrictions(
+        int inTileWidth, int inTileHeight,
+        int inTileChannels, int outTileChannels,
         int kernelSizeX, int kernelSizeY,
         int kernelStride,
-        HwOpMode mode) {
-    if (inTileWidth > CNN_MAX_INPUT_WIDTH ||
-        inTileHeight > CNN_MAX_INPUT_HEIGHT ||
-        inTileChannels > CNN_MAX_INPUT_CHANNELS ||
-        outTileChannels > CNN_MAX_OUTPUT_CHANNELS) {
-        return std::make_tuple(false, 0);
-    }
+        HwOpMode mode, HwOpType type) {
+    const int chansPerBlock = 1 << static_cast<int>(mode);
+    int noOfBlocks    = divUp(inTileChannels, chansPerBlock);
 
-    auto noOfBlocks = 1 << static_cast<int>(mode);
-    if (noOfBlocks > inTileChannels) {
-        return std::make_tuple(false, 0);
-    }
+    bool result = true;
 
-    auto inChansPerBlock = inTileChannels / noOfBlocks;
-    if (inChansPerBlock > CNN_MAX_CHANNELS_PER_BLOCK) {
-        return std::make_tuple(false, 0);
-    }
+    result &= checkDimensions(inTileWidth, inTileHeight, inTileChannels, outTileChannels);
 
-    auto coeffPerWord = CNN_COEFF_PER_WORD_VALUES[static_cast<int32_t>(CNN_COEFF_TYPE)];
-    auto coeffSetSize = kernelSizeX * kernelSizeY;
-    auto coeffLPB = (inChansPerBlock * coeffSetSize + coeffPerWord - 1) / coeffPerWord;
-    if (coeffLPB > CNN_MAX_COEFF_PER_BLOCK) {
-        return std::make_tuple(false, 0);
-    }
+    if (type == HwOpType::POOL) {
+        // The number of blocks is 1 because the HW-unit does not use data from other blocks
+        // for calculating pooling. These blocks are loaded into CMX memory one by one.
+        noOfBlocks = 1;
 
-    auto bytesPerPixel = CNN_BYTES_PER_PIXEL[static_cast<int32_t>(CNN_DATA_TYPE)];
-    auto pixelsPerCMXLine = 128 / (bytesPerPixel * 8);
-    auto localLineStride = (inTileWidth + (pixelsPerCMXLine - 1)) / pixelsPerCMXLine;
-    auto bytesPerLine = localLineStride * pixelsPerCMXLine * bytesPerPixel;
-    auto sizeOfBlock = CNN_MAX_BYTES >> static_cast<int>(mode);
-    auto chanPerBlock = inTileChannels / noOfBlocks;
-    if (chanPerBlock == 0) {
-        return std::make_tuple(false, 0);
+        // TODO: verify the code on firmware side.
+        result &= checkLinesPerChanRestrictionsPool(
+            inTileWidth, inTileHeight,
+            kernelSizeY,
+            mode);
+    } else {
+        result &= checkCoeffPerBlockConv(
+            kernelSizeX, kernelSizeY,
+            noOfBlocks);
     }
 
-    auto availableBytesPerChan = sizeOfBlock / chanPerBlock;
-    auto linesPerChan = std::min(availableBytesPerChan / bytesPerLine, inTileHeight);
-    auto minLines = std::min(kernelSizeY / 1 + (kernelStride + 1) + 1 + ((inTileWidth <= 8) ? 1 : 0), inTileHeight);
-    if (minLines > linesPerChan) {
-        return std::make_tuple(false, 0);
-    }
+    result &= checkLinesPerChanRestrictions(
+        inTileWidth, inTileHeight,
+        kernelSizeY, kernelStride,
+        noOfBlocks, chansPerBlock);
 
-    return std::make_tuple(true, (inTileChannels / noOfBlocks) * kernelSizeX * kernelSizeY + CNN_MODES_COST[static_cast<int32_t>(mode)]);
+    return result;
 }
 
 }  // namespace
 
+bool checkPoolingHWRestrictions(
+        int inTileWidth, int inTileHeight,
+        int inTileChannels, int outTileChannels,
+        int kernelSizeX, int kernelSizeY,
+        int kernelStride) {
+    return checkHWRestrictions(inTileWidth, inTileHeight,
+                               inTileChannels, outTileChannels,
+                               kernelSizeX, kernelSizeY,
+                               kernelStride,
+                               HwOpMode::MODE_16_16, HwOpType::POOL);
+}
+
+bool checkConvHWRestrictions(
+        int inTileWidth, int inTileHeight,
+        int inTileChannels, int outTileChannels,
+        int kernelSizeX, int kernelSizeY,
+        int kernelStride,
+        HwOpMode mode) {
+    return checkHWRestrictions(inTileWidth, inTileHeight,
+                               inTileChannels, outTileChannels,
+                               kernelSizeX, kernelSizeY,
+                               kernelStride,
+                               mode, HwOpType::CONV);
+}
+
+//
+// HW Convolution tiling over output channels.
+//
+
 HwConvTileInfo splitHwConvIntoOutChannelsTiles(
         int inTileWidth, int inTileHeight, int inTileChannels,
         int outTileChannels,
@@ -452,26 +502,25 @@ HwConvTileInfo splitHwConvIntoOutChannelsTiles(
     Solution bestSol;
 
     for (auto mode : CNN_MODES) {
-        auto ramBlocks = 1 << static_cast<int>(mode);
+        // inChansPerBlock * outChansPerBlock = 256
+        auto inChansPerBlock  = 1 << static_cast<int>(mode);
+        auto outChansPerBlock = 256 / inChansPerBlock;
 
-        auto extendedInputDimC = alignVal(inTileChannels, ramBlocks);
+        auto extendedInputDimC = alignVal(inTileChannels, inChansPerBlock);
         auto extendedOutputDimC = alignVal(outTileChannels, 8);
 
-        auto outChansPerDescr = std::min(256 / ramBlocks, extendedOutputDimC);
-
-        bool valid = false;
-        int descCost = 0;
-        std::tie(valid, descCost) = checkHwConvMode(
-            inTileWidth, inTileHeight, extendedInputDimC,
-            outChansPerDescr,
-            kernelSizeX, kernelSizeY,
-            kernelStride,
-            mode);
+        auto outChansPerDescr = std::min(outChansPerBlock, extendedOutputDimC);
 
+        bool valid = checkConvHWRestrictions(
+            inTileWidth, inTileHeight, inTileChannels, outChansPerDescr,
+            kernelSizeX, kernelSizeY, kernelStride, mode);
         if (!valid) {
             continue;
         }
 
+        int descCost = (extendedInputDimC / inChansPerBlock) * kernelSizeX * kernelSizeY +
+            CNN_MODES_COST[static_cast<int32_t>(mode)];
+
         auto numDescr = divUp(outTileChannels, outChansPerDescr);
         auto remOutChans = outTileChannels - (numDescr - 1) * outChansPerDescr;
 
index df4c534..9c2ee8a 100644 (file)
@@ -40,7 +40,8 @@ const void* CalculatedDataContent::getRaw() const {
 }
 
 size_t CalculatedDataContent::getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const {
-    return _desc.totalDimSize() * _desc.elemSize();
+    return checked_cast<size_t>(_desc.totalDimSize()) *
+           checked_cast<size_t>(_desc.elemSize());
 }
 
 namespace {
@@ -51,35 +52,61 @@ public:
 
 protected:
     const void* getRaw() const override {
-        IE_ASSERT(_desc.type() == DataType::FP16);
+        if (_desc.type() == DataType::FP16) {
+            if (_blobFp16 == nullptr) {
+                _blobFp16 = getBlobFP16(_blob);
+                _blob.reset();
+            }
 
-        if (_blobFp16 == nullptr) {
-            _blobFp16 = getBlobFP16(_blob);
-            _blob.reset();
-        }
+            if (_repeat == 1) {
+                return _blobFp16->cbuffer();
+            } else {
+                if (_tempFp16.empty()) {
+                    VPU_PROFILE(IeBlobContent);
 
-        if (_repeat == 1) {
-            return _blobFp16->cbuffer();
-        } else {
-            if (_temp.empty()) {
-                VPU_PROFILE(IeBlobContent);
+                    IE_ASSERT(_desc.totalDimSize() % _repeat == 0);
 
-                IE_ASSERT(_desc.totalDimSize() % _repeat == 0);
+                    auto origNumElems = _desc.totalDimSize() / _repeat;
+                    IE_ASSERT(checked_cast<size_t>(origNumElems) <= _blobFp16->size());
 
-                auto origNumElems = _desc.totalDimSize() / _repeat;
-                IE_ASSERT(origNumElems <= _blobFp16->size());
+                    auto origPtr = _blobFp16->cbuffer().as<const fp16_t*>();
+                    IE_ASSERT(origPtr != nullptr);
 
-                auto origPtr = _blobFp16->cbuffer().as<const fp16_t*>();
-                IE_ASSERT(origPtr != nullptr);
+                    _tempFp16.resize(checked_cast<size_t>(_desc.totalDimSize()));
 
-                _temp.resize(_desc.totalDimSize());
+                    ie::parallel_for(_repeat, [this, origPtr, origNumElems](int i) {
+                        std::copy_n(origPtr, origNumElems, _tempFp16.data() + i * origNumElems);
+                    });
+                }
 
-                ie::parallel_for(_repeat, [this, origPtr, origNumElems](int i) {
-                    std::copy_n(origPtr, origNumElems, _temp.data() + i * origNumElems);
-                });
+                return _tempFp16.data();
             }
+        } else if (_desc.type() == DataType::S32) {
+            if (_repeat == 1) {
+                return _blob->cbuffer();
+            } else {
+                if (_tempS32.empty()) {
+                    VPU_PROFILE(IeBlobContent);
 
-            return _temp.data();
+                    IE_ASSERT(_desc.totalDimSize() % _repeat == 0);
+
+                    auto origNumElems = _desc.totalDimSize() / _repeat;
+                    IE_ASSERT(checked_cast<size_t>(origNumElems) <= _blob->size());
+
+                    auto origPtr = _blob->cbuffer().as<const int32_t*>();
+                    IE_ASSERT(origPtr != nullptr);
+
+                    _tempS32.resize(checked_cast<size_t>(_desc.totalDimSize()));
+
+                    ie::parallel_for(_repeat, [this, origPtr, origNumElems](int i) {
+                        std::copy_n(origPtr, origNumElems, _tempS32.data() + i * origNumElems);
+                    });
+                }
+
+                return _tempS32.data();
+            }
+        } else {
+            VPU_THROW_EXCEPTION << "Unsupported data type " << _desc.type();
         }
     }
 
@@ -88,7 +115,8 @@ private:
     int _repeat = 0;
 
     mutable ie::Blob::Ptr _blobFp16;
-    mutable std::vector<fp16_t> _temp;
+    mutable std::vector<fp16_t> _tempFp16;
+    mutable std::vector<int32_t> _tempS32;
 };
 
 }  // namespace
@@ -110,12 +138,12 @@ public:
 protected:
     size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>& baseContents) const override {
         if (baseContents.empty()) {
-            return _count * sizeof(fp16_t);
+            return checked_cast<size_t>(_count) * sizeof(fp16_t);
         } else {
             IE_ASSERT(baseContents.size() == 1);
             IE_ASSERT(_desc.totalDimSize() % _count == 0);
 
-            return _desc.totalDimSize() * sizeof(fp16_t);
+            return checked_cast<size_t>(_desc.totalDimSize()) * sizeof(fp16_t);
         }
     }
 
@@ -265,21 +293,26 @@ void DataNode::updateRequiredStrides(const StridesRequirement& newReqs) {
     auto prevReqs = _requiredStrides;
 
     StridesRequirement mergedReqs;
-    for (int i = 0; i < _desc.numDims(); ++i) {
-        auto prevReq = prevReqs.get(i);
-        auto newReq = newReqs.get(i);
+    const auto& fixedRequirements = prevReqs.fixedStrides().empty() ? newReqs : prevReqs;
+    if (!fixedRequirements.fixedStrides().empty()) {
+        mergedReqs = fixedRequirements;
+    } else {
+        for (int i = 0; i < _desc.numDims(); ++i) {
+            auto prevReq = prevReqs.get(i);
+            auto newReq = newReqs.get(i);
 
-        if (prevReq == DimStride::Any &&
-            newReq == DimStride::Any) {
-            continue;
-        }
+            if (prevReq == DimStride::Any &&
+                newReq == DimStride::Any) {
+                continue;
+            }
 
-        // In case if both requirements are defined, use `prevReq`.
-        // We'll check that both requirements are satisfied at the end.
-        if (prevReq != DimStride::Any) {
-            mergedReqs.add(i, prevReq);
-        } else {
-            mergedReqs.add(i, newReq);
+            // In case if both requirements are defined, use `prevReq`.
+            // We'll check that both requirements are satisfied at the end.
+            if (prevReq != DimStride::Any) {
+                mergedReqs.add(i, prevReq);
+            } else {
+                mergedReqs.add(i, newReq);
+            }
         }
     }
 
@@ -345,8 +378,8 @@ void DataNode::serializeNewBuffer(
         auto origOrder = _desc.dimsOrder();
         auto origPerm = origOrder.toPermutation();
 
-        int origPermInd = 0;
-        for (int i = 0; i < newPerm.size(); i++) {
+        size_t origPermInd = 0;
+        for (size_t i = 0; i < newPerm.size(); i++) {
             auto d = newPerm[i];
 
             if (origPermInd < origPerm.size() && origPerm[origPermInd] == d) {
@@ -383,7 +416,7 @@ void rebaseOrderToOne(DimsOrder& ord, DimValues& dims, DimValues& strides) {
     DimValues newDims;
     DimValues newStrides;
 
-    for (int i = 0; i < perm.size(); ++i) {
+    for (size_t i = 0; i < perm.size(); ++i) {
         auto oldDim = perm[i];
         auto newDim = static_cast<Dim>(static_cast<int>(oldDim) - minDim);
 
@@ -403,7 +436,7 @@ void DataNode::serializeOldBuffer(
         const Stage& stage,
         BlobSerializer& serializer,
         DimsOrder newOrder,
-        const EnumMap<Dim, SmallVector<Dim, MAX_DIMS_64>>& dimsReloc) {
+        const EnumMap<Dim, DimVector>& dimsReloc) {
     const int OLD_FORMAT_NUM_DIMS = 3;
 
     auto newDims = _desc.dims();
@@ -435,7 +468,7 @@ void DataNode::serializeOldBuffer(
         EnumSet<Dim> usedOrigDims;
         int prevOrigDimInd = -1;
 
-        for (int i = 0; i < newPerm.size(); ++i) {
+        for (size_t i = 0; i < newPerm.size(); ++i) {
             auto newDim = newPerm[i];
 
             int newDimVal = 1;
@@ -451,7 +484,7 @@ void DataNode::serializeOldBuffer(
                 auto origDimsToReloc = it->second;
                 IE_ASSERT(!origDimsToReloc.empty());
 
-                for (int j = 0; j < origDimsToReloc.size(); ++j) {
+                for (size_t j = 0; j < origDimsToReloc.size(); ++j) {
                     auto origDim = origDimsToReloc[j];
                     auto origDimInd = origIndeces[origDim];
 
@@ -462,7 +495,7 @@ void DataNode::serializeOldBuffer(
                     usedOrigDims.insert(origDim);
 
                     if (j > 0 && origDims[origDim] > 1) {
-                        IE_ASSERT(checkStride(origStrides, _desc, origDimInd, DimStride::Compact));
+                        IE_ASSERT(checkStride(origStrides, _desc, origDimInd, StridesRequirement::compact()));
                     }
 
                     newDimVal *= origDims[origDim];
@@ -498,7 +531,7 @@ void DataNode::serializeOldBuffer(
     IE_ASSERT(maxDimDigit >= 0);
 
     if (newPerm.size() < OLD_FORMAT_NUM_DIMS) {
-        for (int i = newPerm.size(); i < OLD_FORMAT_NUM_DIMS; i++) {
+        for (size_t i = newPerm.size(); i < OLD_FORMAT_NUM_DIMS; i++) {
             auto lastDim = newPerm.back();
             auto newLastDim = static_cast<Dim>(++maxDimDigit);
 
@@ -512,7 +545,7 @@ void DataNode::serializeOldBuffer(
     }
 
     if (newPerm.size() > OLD_FORMAT_NUM_DIMS) {
-        for (int i = OLD_FORMAT_NUM_DIMS; i < newPerm.size(); i++) {
+        for (size_t i = OLD_FORMAT_NUM_DIMS; i < newPerm.size(); i++) {
             IE_ASSERT(newDims[newPerm[i]] == 1);
             newDims.erase(newPerm[i]);
             newStrides.erase(newPerm[i]);
index d97efe2..9001946 100644 (file)
@@ -44,6 +44,8 @@ DimsOrder DimsOrder::HCW = DimsOrder::fromCode(0x231);
 DimsOrder DimsOrder::NCHW = DimsOrder::fromCode(0x4321);
 DimsOrder DimsOrder::NHWC = DimsOrder::fromCode(0x4213);
 DimsOrder DimsOrder::NHCW = DimsOrder::fromCode(0x4231);
+DimsOrder DimsOrder::NCDHW = DimsOrder::fromCode(0x43521);
+DimsOrder DimsOrder::NDHWC = DimsOrder::fromCode(0x45213);
 
 namespace {
 
@@ -109,12 +111,18 @@ DimsOrder DimsOrder::fromNumDims(int numDims) {
         return DimsOrder::C;
     } else if (numDims == 2) {
         return DimsOrder::NC;
+    } else if (numDims == 3) {
+        return DimsOrder::CHW;
+    } else if (numDims == 4) {
+        return DimsOrder::NCHW;
+    } else if (numDims == 5) {
+        return DimsOrder::NCDHW;
     } else {
         return DimsOrder::fromCode(maskOrder(FULL_ORDER_DEFAULT, numDims));
     }
 }
 
-DimsOrder DimsOrder::fromPermutation(const SmallVector<Dim, MAX_DIMS_64>& perm) {
+DimsOrder DimsOrder::fromPermutation(const DimVector& perm) {
     StorageOrder64 code = 0;
 
     for (int sh = 0, i = 0; i < perm.size(); i++, sh += 4) {
@@ -124,6 +132,20 @@ DimsOrder DimsOrder::fromPermutation(const SmallVector<Dim, MAX_DIMS_64>& perm)
     return DimsOrder::fromCode(code);
 }
 
+DimsOrder DimsOrder::fromLayout(ie::Layout const& layout) {
+    switch (layout) {
+    case ie::Layout::C     : return DimsOrder::C;
+    case ie::Layout::NC    : return DimsOrder::NC;
+    case ie::Layout::CHW   : return DimsOrder::CHW;
+    case ie::Layout::NCHW  : return DimsOrder::NCHW;
+    case ie::Layout::NHWC  : return DimsOrder::NHWC;
+    case ie::Layout::NCDHW : return DimsOrder::NCDHW;
+    case ie::Layout::NDHWC : return DimsOrder::NDHWC;
+    default:
+        VPU_THROW_EXCEPTION << "Unsupported layout " << layout;
+    }
+}
+
 int DimsOrder::numDims() const {
     int out = 0;
 
@@ -262,7 +284,8 @@ void printTo(std::ostream& os, DimsOrder order) {
         {1, 'W'},
         {2, 'H'},
         {3, 'C'},
-        {4, 'N'}
+        {4, 'N'},
+        {5, 'D'}
     });
 
     auto code = order.code();
@@ -289,6 +312,17 @@ void printTo(std::ostream& os, DimsOrder order) {
 }
 
 //
+// Dim
+//
+
+int dimToIeInd(vpu::Dim const& dim, int numDims) {
+    IE_ASSERT(1 <= numDims && numDims <= 8);
+    auto dimsOrder =  DimsOrder::fromNumDims(numDims);
+    int dimInd = dimsOrder.dimInd(dim);
+    return (numDims - 1) - dimInd;
+}
+
+//
 // DataDesc
 //
 
@@ -297,22 +331,7 @@ DataDesc::DataDesc(const ie::TensorDesc& ieDesc) {
     // Parse precision
     //
 
-    switch (ieDesc.getPrecision()) {
-    case ie::Precision::U8:
-        _type = DataType::U8;
-        break;
-    case ie::Precision::I8:
-         _type = DataType::I8;
-        break;
-    case ie::Precision::FP16:
-        _type = DataType::FP16;
-        break;
-    case ie::Precision::FP32:
-        _type = DataType::FP32;
-        break;
-    default:
-        VPU_THROW_EXCEPTION << "Unsupported precision " << ieDesc.getPrecision().name();
-    }
+    _type = fromIEPrecision(ieDesc.getPrecision());
 
     //
     // Parse dimensions and layout
@@ -342,10 +361,14 @@ int DataDesc::elemSize() const {
     switch (_type) {
     case DataType::U8:
         return sizeof(uint8_t);
+    case DataType::I8:
+        return sizeof(int8_t);
     case DataType::FP16:
         return sizeof(fp16_t);
     case DataType::FP32:
         return sizeof(float);
+    case DataType::S32:
+        return sizeof(int32_t);
     default:
         VPU_THROW_EXCEPTION << "Unknown data type " << _type;
     }
@@ -372,6 +395,62 @@ void DataDesc::reorder(DimsOrder dimsOrder) {
     _dimsOrder = dimsOrder;
 }
 
+ie::TensorDesc DataDesc::toTensorDesc() const {
+    ie::TensorDesc desc;
+
+    switch (this->type()) {
+        case DataType::FP16:
+            desc.setPrecision(ie::Precision::FP16);
+            break;
+        case DataType::FP32:
+            desc.setPrecision(ie::Precision::FP32);
+            break;
+        case DataType::I8:
+            desc.setPrecision(ie::Precision::I8);
+            break;
+        case DataType::U8:
+            desc.setPrecision(ie::Precision::U8);
+            break;
+        case DataType::S32:
+            desc.setPrecision(ie::Precision::I32);
+            break;
+        default:
+            desc.setPrecision(ie::Precision::UNSPECIFIED);
+    }
+
+    ie::SizeVector dims{};
+
+    DataDesc descCopy = *this;
+    descCopy.reorder(DimsOrder::fromNumDims(this->numDims()));
+    auto perm = descCopy.dimsOrder().toPermutation();
+    std::reverse(perm.begin(), perm.end());
+    for (auto &p : perm) {
+        dims.push_back(descCopy.dim(p));
+    }
+
+    desc.setDims(dims);
+
+    if (DimsOrder::C == this->dimsOrder()) {
+        desc.setLayout(ie::Layout::C);
+    } else if (DimsOrder::NC == this->dimsOrder()) {
+        desc.setLayout(ie::Layout::NC);
+    } else if (DimsOrder::CHW == this->dimsOrder()) {
+        desc.setLayout(ie::Layout::CHW);
+    } else if (DimsOrder::NCHW == this->dimsOrder()) {
+        desc.setLayout(ie::Layout::NCHW);
+    } else if (DimsOrder::NHWC == this->dimsOrder()) {
+        desc.setLayout(ie::Layout::NHWC);
+    } else if (DimsOrder::NCDHW == this->dimsOrder()) {
+        desc.setLayout(ie::Layout::NCDHW);
+    } else if (DimsOrder::NDHWC == this->dimsOrder()) {
+        desc.setLayout(ie::Layout::NDHWC);
+    } else {
+        desc.setLayout(ie::Layout::BLOCKED);
+    }
+
+    return desc;
+}
+
 void printTo(std::ostream& os, const DataDesc& desc) {
     os << "[" << std::endl;
 
@@ -409,6 +488,35 @@ StridesRequirement StridesRequirement::compact() {
     return reqs;
 }
 
+StridesRequirement StridesRequirement::fixed(const std::vector<int>& strides, const DataDesc& desc) {
+    StridesRequirement reqs;
+
+    const auto dims = desc.dims();
+    const auto dimsOrder = desc.dimsOrder();
+    const auto dimOrderVec = dimsOrder.toPermutation();
+    auto setStride = [&] (Dim d, int val) {
+        IE_ASSERT(dimsOrder.hasDim(d));
+
+        auto perm = dimsOrder.toPermutation();
+        auto idx = dimsOrder.dimInd(d);
+
+        auto minStrideVal = idx == 0 ? desc.elemSize() : reqs._fixedStrides[perm[idx - 1]] * dims[perm[idx - 1]];
+        IE_ASSERT(val >= minStrideVal);
+
+        reqs._fixedStrides.set(d, val);
+    };
+
+    for (const auto& dim : dimOrderVec) {
+        const auto idx = dimToIeInd(dim, dims.size());
+        setStride(dim, strides[idx]);
+    }
+
+    for (int i = 0; i < MAX_DIMS_64; ++i) {
+        reqs.add(i, DimStride::Fixed);
+    }
+    return reqs;
+}
+
 void printTo(std::ostream& os, const StridesRequirement& reqs) {
     os << "[" << std::endl;
 
@@ -457,12 +565,16 @@ DimValues calcStrides(const DataDesc& desc, const StridesRequirement& reqs) {
     auto perm = desc.dimsOrder().toPermutation();
     IE_ASSERT(!perm.empty());
 
-    strides.set(perm[0], desc.elemSize());
-    strides.set(perm[0], applyStrideRequirement(strides[perm[0]], 0, reqs));
+    strides = reqs.fixedStrides();
+
+    if (strides.empty()) {
+        strides.set(perm[0], desc.elemSize());
+        strides.set(perm[0], applyStrideRequirement(strides[perm[0]], 0, reqs));
 
-    for (int i = 1; i < perm.size(); i++) {
-        strides.set(perm[i], strides[perm[i - 1]] * desc.dim(perm[i - 1]));
-        strides.set(perm[i], applyStrideRequirement(strides[perm[i]], i, reqs));
+        for (std::size_t i = 1; i < perm.size(); i++) {
+            strides.set(perm[i], strides[perm[i - 1]] * desc.dim(perm[i - 1]));
+            strides.set(perm[i], applyStrideRequirement(strides[perm[i]], i, reqs));
+        }
     }
 
     return strides;
@@ -472,7 +584,8 @@ bool checkStride(
         const DimValues& strides,
         const DataDesc& desc,
         int ind,
-        DimStride req) {
+        const StridesRequirement& reqs) {
+    const auto req = reqs.get(ind);
     if (req == DimStride::Any) {
         return true;
     }
@@ -496,6 +609,10 @@ bool checkStride(
         if (strideVal % STRIDE_ALIGNMENT != 0) {
             return false;
         }
+    } else if (req == DimStride::Fixed) {
+        if (strideVal != reqs.getFixedStride(perm[ind])) {
+            return false;
+        }
     } else {
         VPU_THROW_EXCEPTION << "Unsupported stride requirement : " << req;
     }
@@ -511,7 +628,7 @@ bool checkStrides(
     IE_ASSERT(!perm.empty());
 
     for (int i = 0; i < perm.size(); i++) {
-        if (!checkStride(strides, desc, i, reqs.get(i))) {
+        if (!checkStride(strides, desc, i, reqs)) {
             return false;
         }
     }
@@ -524,4 +641,15 @@ int calcTotalByteSize(const DataDesc& desc, const DimValues& strides) {
     return strides[perm.back()] * desc.dim(perm.back());
 }
 
+DataType fromIEPrecision(const InferenceEngine::Precision& precision) {
+    switch (precision) {
+        case InferenceEngine::Precision::U8:   return DataType::U8;
+        case InferenceEngine::Precision::I8:   return DataType::I8;
+        case InferenceEngine::Precision::I32:  return DataType::S32;
+        case InferenceEngine::Precision::FP16: return DataType::FP16;
+        case InferenceEngine::Precision::FP32: return DataType::FP32;
+        default: VPU_THROW_EXCEPTION << precision << " isn't supported";
+    }
+}
+
 }  // namespace vpu
index 9c65d7b..52541b3 100644 (file)
@@ -186,8 +186,8 @@ Data Model::duplicateData(
 }
 
 Stage Model::duplicateStage(
-        const std::string& name,
         const Stage& origStage,
+        const std::string& postfix,
         const DataVector& inputs,
         const DataVector& outputs) {
     //
@@ -230,7 +230,7 @@ Stage Model::duplicateStage(
 
     auto stage = origStage->cloneImpl();
 
-    stage->_name = name;
+    stage->_name = origStage->name() + postfix;
     stage->_type = origStage->_type;
     stage->_origLayer = origStage->_origLayer;
     stage->_model = handle_from_this();
@@ -1305,7 +1305,7 @@ SharedAllocation Model::connectDatasImpl(
         //
 
         if (connectionStage->_type == StageType::Concat ||
-            connectionStage->_type == StageType::Broadcast) {
+            connectionStage->_type == StageType::Expand) {
             IE_ASSERT(producer == child);
             IE_ASSERT(consumer == parent);
         } else if (connectionStage->_type == StageType::Split ||
@@ -1415,7 +1415,7 @@ SharedAllocation Model::connectDatasImpl(
     return edge;
 }
 
-void Model::disconnectStageDatas(const Stage& stage) {
+void Model::disconnectStage(const Stage& stage) {
     //
     // Check that objects belong to the same Model.
     //
@@ -1507,7 +1507,7 @@ void Model::removeStage(const Stage& stage) {
 
     _resetStageOrder = true;;
 
-    disconnectStageDatas(stage);
+    disconnectStage(stage);
 
     _initialStages.erase(stage);
 
@@ -1515,7 +1515,7 @@ void Model::removeStage(const Stage& stage) {
     _stagePtrList.erase(stage->_ptrPosInModel);
 }
 
-void Model::cleanUpDatas() {
+void Model::cleanUp() {
     bool needAllocatorPreprocess = false;
 
     for (const auto& data : datas()) {
index 98a7059..21d8fff 100644 (file)
@@ -7,6 +7,7 @@
 #include <queue>
 #include <algorithm>
 #include <vector>
+#include <string>
 
 #include <vpu/model/edges.hpp>
 #include <vpu/model/data.hpp>
@@ -66,7 +67,7 @@ const StageDataInfo<float>& StageNode::propagateScaleFactors(
     //
 
     _scaleInfo.init(_inputEdges.size(), _outputEdges.size());
-    propagateScaleFactorsImpl(inputScales, step);
+    propagateScaleFactorsImpl(inputScales, step, _scaleInfo);
 
     //
     // Check that implementation returned valid map.
@@ -81,13 +82,13 @@ const StageDataInfo<float>& StageNode::propagateScaleFactors(
     return _scaleInfo;
 }
 
-const StageDataInfo<DimsOrder>& StageNode::propagateDataOrder() const {
+const StageDataInfo<DimsOrder>& StageNode::propagateDataOrder() {
     //
     // Get result from Stage implementation.
     //
 
     _orderInfo.init(_inputEdges.size(), _outputEdges.size());
-    propagateDataOrderImpl();
+    propagateDataOrderImpl(_orderInfo);
 
     //
     // Merge with the results from injected Stages.
@@ -114,13 +115,13 @@ const StageDataInfo<DimsOrder>& StageNode::propagateDataOrder() const {
     return _orderInfo;
 }
 
-const StageDataInfo<StridesRequirement>& StageNode::getDataStridesRequirements() const {
+const StageDataInfo<StridesRequirement>& StageNode::getDataStridesRequirements() {
     //
     // Get result from Stage implementation.
     //
 
     _stridesInfo.init(_inputEdges.size(), _outputEdges.size());
-    getDataStridesRequirementsImpl();
+    getDataStridesRequirementsImpl(_stridesInfo);
 
     //
     // Merge with the results from injected Stages.
@@ -158,13 +159,13 @@ void StageNode::finalizeDataLayout() {
     finalizeDataLayoutImpl();
 }
 
-const StageDataInfo<BatchSupport>& StageNode::getBatchSupportInfo() const {
+const StageDataInfo<BatchSupport>& StageNode::getBatchSupportInfo() {
     //
     // Get result from Stage implementation.
     //
 
     _batchInfo.init(_inputEdges.size(), _outputEdges.size());
-    getBatchSupportInfoImpl();
+    getBatchSupportInfoImpl(_batchInfo);
 
     //
     // Check that implemenation returned valid map.
@@ -262,11 +263,35 @@ StageSHAVEsRequirements StageNode::getSHAVEsRequirements() const {
     return reqs;
 }
 
+void StageNode::initialCheck() const {
+    try {
+        initialCheckImpl();
+    } catch (const InferenceEngine::details::InferenceEngineException& exception) {
+        VPU_THROW_EXCEPTION << name() << " of type " << type() << ": " << exception.what();
+    }
+
+    for (const auto& injectedStageEdge : injectedStageEdges()) {
+        try {
+            injectedStageEdge->child()->initialCheck();
+        } catch (const InferenceEngine::details::InferenceEngineException& exception) {
+            VPU_THROW_EXCEPTION << name() << " of type " << type() << ": " << exception.what();
+        }
+    }
+}
+
 void StageNode::finalCheck() const {
-    finalCheckImpl();
+    try {
+        finalCheckImpl();
+    } catch (const InferenceEngine::details::InferenceEngineException& exception) {
+        VPU_THROW_EXCEPTION << name() << " of type " << type() << ": " << exception.what();
+    }
 
     for (const auto& injectedStageEdge : injectedStageEdges()) {
-        injectedStageEdge->child()->finalCheck();
+        try {
+            injectedStageEdge->child()->finalCheck();
+        } catch (const InferenceEngine::details::InferenceEngineException& exception) {
+            VPU_THROW_EXCEPTION << name() << " of type " << type() << ": " << exception.what();
+        }
     }
 }
 
@@ -296,16 +321,17 @@ void StageNode::serialize(BlobSerializer& serializer) const {
 
 void StageNode::propagateScaleFactorsImpl(
         const SmallVector<float>&,
-        ScalePropagationStep) {
+        ScalePropagationStep,
+        StageDataInfo<float>& scaleInfo) {
     //
     // Default implementation assumes no scaling support.
     //
 
     for (const auto& inEdge : _inputEdges) {
-        _scaleInfo.setInput(inEdge, 1.0f);
+        scaleInfo.setInput(inEdge, 1.0f);
     }
     for (const auto& outEdge : _outputEdges) {
-        _scaleInfo.setOutput(outEdge, 1.0f);
+        scaleInfo.setOutput(outEdge, 1.0f);
     }
 }
 
@@ -321,4 +347,49 @@ void printTo(std::ostream& os, const Stage& stage) {
     os << (stage == nullptr ? "<null>" : stage->name());
 }
 
+void assertAllInputsOutputsTypes(const StageNode* stage,
+                                 const DataType& expectedInputsType,
+                                 const DataType& expectedOutputsType) {
+    auto assertTypes = [](const DataType& expectedType,
+                          const std::vector<Data>& datas, const std::string& token) {
+        for (decltype(datas.size()) idx = 0; idx < datas.size(); ++idx) {
+            if (datas[idx]->usage() == DataUsage::Fake)
+                continue;
+            const auto& actualType = datas[idx]->desc().type();
+
+            IE_ASSERT(actualType == expectedType)
+                << ": " << token << "#" << std::to_string(idx) << " of type " << actualType << " given, but one of "
+                << expectedType << " is expected";
+        }
+    };
+
+    assertTypes(expectedInputsType, toVector(stage->inputs()), "input");
+    assertTypes(expectedOutputsType, toVector(stage->outputs()), "output");
+}
+
+
+void assertInputsOutputsTypes(const StageNode* stage,
+                              const std::vector<EnumSet<DataType>>& expectedInputsTypes,
+                              const std::vector<EnumSet<DataType>>& expectedOutputsTypes) {
+    auto assertTypes = [](const std::vector<EnumSet<DataType>>& expectedTypes,
+                          const std::vector<Data>& datas, const std::string& token) {
+        IE_ASSERT(expectedTypes.size() == datas.size())
+            << ": " << datas.size() << " " << token << "s given, but " << expectedTypes.size() << " is expected";
+
+        for (decltype(datas.size()) idx = 0; idx < datas.size(); ++idx) {
+            if (datas[idx]->usage() == DataUsage::Fake)
+                continue;
+            const auto& possibleTypes = expectedTypes[idx];
+            const auto& actualType = datas[idx]->desc().type();
+
+            IE_ASSERT(possibleTypes.find(actualType) != possibleTypes.end())
+                << ": " << token << "#" << std::to_string(idx) << " of type " << actualType << " given, but one of "
+                << toString(possibleTypes) << " is expected";
+        }
+    };
+
+    assertTypes(expectedInputsTypes, toVector(stage->inputs()), "input");
+    assertTypes(expectedOutputsTypes, toVector(stage->outputs()), "output");
+}
+
 }  // namespace vpu
index 437d013..ca774be 100644 (file)
@@ -12,6 +12,7 @@
 #include <memory>
 #include <map>
 
+#include <debug.h>
 #include <cpp_interfaces/exception2status.hpp>
 #include <details/caseless.hpp>
 #include <ie_plugin_config.hpp>
@@ -38,39 +39,50 @@ void check_input(const I &input, const T &options, const C &check) {
     }
 }
 
-}  // namespace
+void checkStridesConfig(std::string configStrides) {
+    try {
+        configStrides.pop_back();
 
-ParsedConfig::ParsedConfig(ConfigMode configMode): _mode(configMode) {
-    _log = std::make_shared<Logger>("Config", LogLevel::Warning, consoleOutput());
-}
+        auto tensorStrides = InferenceEngine::details::split(configStrides, "],");
 
-void ParsedConfig::checkSupportedValues(
-    const std::unordered_map<std::string, std::unordered_set<std::string>> &supported,
-    const std::map<std::string, std::string> &config) const {
+        for (const auto& stride : tensorStrides) {
+            auto pair = InferenceEngine::details::split(stride, "[");
+            auto message = "Invalid config value '" + stride + "' for VPU_TENSOR_STRIDES, does not match the pattern: tensor_name[strides]";
+            IE_ASSERT(pair.size() == 2) << message;
 
-    auto contains = [](const std::unordered_set<std::string> &supported, const std::string &option) {
-        return supported.find(option) != supported.end();
-    };
+            auto strideValues = InferenceEngine::details::split(pair.at(1), ",");
+            for (auto entry : strideValues) {
+                std::stoi(entry);
+            }
+        }
+    }
+    catch(const std::out_of_range& e) {
+        auto message = "Invalid config value for VPU_TENSOR_STRIDES, values out of range of unsigned int";
+        THROW_IE_EXCEPTION << message;
+    }
 
-    check_input(config, supported, contains);
+    catch(const std::invalid_argument& e) {
+        auto message = "Invalid config value for VPU_TENSOR_STRIDES, can't cast values to unsigned int";
+        THROW_IE_EXCEPTION << message;
+    }
 }
 
+}  // namespace
+
+ParsedConfig::ParsedConfig(ConfigMode configMode): ParsedConfigBase(configMode) {}
+
 void ParsedConfig::checkInvalidValues(const std::map<std::string, std::string> &config) const {
+    ParsedConfigBase::checkInvalidValues(config);
+
     const std::unordered_map<std::string, std::unordered_set<std::string>> supported_values = {
-        { CONFIG_KEY(LOG_LEVEL),
-          { CONFIG_VALUE(LOG_NONE), CONFIG_VALUE(LOG_WARNING), CONFIG_VALUE(LOG_INFO), CONFIG_VALUE(LOG_DEBUG) }},
-        { VPU_CONFIG_KEY(LOG_LEVEL),
-          { CONFIG_VALUE(LOG_NONE), CONFIG_VALUE(LOG_WARNING), CONFIG_VALUE(LOG_INFO), CONFIG_VALUE(LOG_DEBUG) }},
         { VPU_CONFIG_KEY(COMPUTE_LAYOUT),
-            { VPU_CONFIG_VALUE(AUTO), VPU_CONFIG_VALUE(NCHW), VPU_CONFIG_VALUE(NHWC) }},
+            { VPU_CONFIG_VALUE(AUTO), VPU_CONFIG_VALUE(NCHW), VPU_CONFIG_VALUE(NHWC), VPU_CONFIG_VALUE(NCDHW), VPU_CONFIG_VALUE(NDHWC) }},
         { VPU_CONFIG_KEY(COPY_OPTIMIZATION),      { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { VPU_CONFIG_KEY(PACK_DATA_IN_CMX),      { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { VPU_CONFIG_KEY(IGNORE_UNKNOWN_LAYERS),  { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { CONFIG_KEY(PERF_COUNT),                 { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
-        { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),   { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { VPU_CONFIG_KEY(HW_ADAPTIVE_MODE),       { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
-        { VPU_CONFIG_KEY(ALLOW_FP32_MODELS),      { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { VPU_CONFIG_KEY(HW_INJECT_STAGES),       { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { VPU_CONFIG_KEY(HW_POOL_CONV_MERGE),     { CONFIG_VALUE(YES), CONFIG_VALUE(NO) }},
         { VPU_CONFIG_KEY(PERF_REPORT_MODE),
@@ -125,34 +137,21 @@ IE_SUPPRESS_DEPRECATED_END
     if ((number_of_shaves == config.end()) && (number_of_CMX != config.end())) {
         THROW_IE_EXCEPTION << "You should set both option for resource management: VPU_NUMBER_OF_CMX_SLICES and VPU_NUMBER_OF_SHAVES";
     }
-}
 
-void ParsedConfig::checkUnknownOptions(const std::map<std::string, std::string> &config) const {
-    auto knownOptions = getKnownOptions();
-    for (auto &&entry : config) {
-        if (knownOptions.find(entry.first) == knownOptions.end()) {
-            THROW_IE_EXCEPTION << NOT_FOUND_str << entry.first << " key is not supported for VPU";
-        }
-    }
-}
+    auto tensor_strides = config.find(VPU_CONFIG_KEY(TENSOR_STRIDES));
 
-void ParsedConfig::checkOptionsAccordingToMode(const std::map<std::string, std::string> &config) const {
-    auto compileOptions = getCompileOptions();
-    for (auto &&entry : config) {
-        std::stringstream errorMsgStream;
-        if (compileOptions.find(entry.first) != compileOptions.end() && _mode == ConfigMode::RUNTIME_MODE) {
-            _log->warning("%s option will be ignored. Seems you are using compiled graph", entry.first);
-        }
+    if (tensor_strides != config.end()) {
+        checkStridesConfig(tensor_strides->second);
     }
 }
 
 std::unordered_set<std::string> ParsedConfig::getCompileOptions() const {
 IE_SUPPRESS_DEPRECATED_START
     return {
+        VPU_CONFIG_KEY(TENSOR_STRIDES),
         VPU_CONFIG_KEY(COMPUTE_LAYOUT),
         VPU_CONFIG_KEY(NETWORK_CONFIG),
         VPU_CONFIG_KEY(HW_ADAPTIVE_MODE),
-        VPU_CONFIG_KEY(ALLOW_FP32_MODELS),
         VPU_CONFIG_KEY(COPY_OPTIMIZATION),
         VPU_CONFIG_KEY(PACK_DATA_IN_CMX),
         VPU_CONFIG_KEY(DETECT_NETWORK_BATCH),
@@ -176,15 +175,17 @@ IE_SUPPRESS_DEPRECATED_END
 }
 
 std::unordered_set<std::string> ParsedConfig::getRuntimeOptions() const {
-    return {
-        CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
-        CONFIG_KEY(LOG_LEVEL),
-        VPU_CONFIG_KEY(LOG_LEVEL),
+    auto runtimeOptions = ParsedConfigBase::getRuntimeOptions();
+
+    std::unordered_set<std::string> specificOptions = {
         CONFIG_KEY(PERF_COUNT),
         VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME),
         CONFIG_KEY(CONFIG_FILE),
-        VPU_CONFIG_KEY(PERF_REPORT_MODE),
-    };
+        VPU_CONFIG_KEY(PERF_REPORT_MODE) };
+
+    runtimeOptions.insert(specificOptions.begin(), specificOptions.end());
+
+    return runtimeOptions;
 }
 
 std::unordered_set<std::string> ParsedConfig::getKnownOptions() const {
@@ -203,10 +204,14 @@ std::map<std::string, std::string> ParsedConfig::getDefaultConfig() const {
 }
 
 void ParsedConfig::configure(const std::map<std::string, std::string> &config) {
+    ParsedConfigBase::configure(config);
+
     static const std::unordered_map<std::string, ComputeLayout> layouts {
         { VPU_CONFIG_VALUE(AUTO), ComputeLayout::AUTO },
         { VPU_CONFIG_VALUE(NCHW), ComputeLayout::NCHW },
         { VPU_CONFIG_VALUE(NHWC), ComputeLayout::NHWC },
+        { VPU_CONFIG_VALUE(NCDHW), ComputeLayout::NCDHW },
+        { VPU_CONFIG_VALUE(NDHWC), ComputeLayout::NDHWC }
     };
 
     setOption(compileConfig.forceLayout, layouts, config, VPU_CONFIG_KEY(COMPUTE_LAYOUT));
@@ -222,7 +227,6 @@ void ParsedConfig::configure(const std::map<std::string, std::string> &config) {
     setOption(compileConfig.ignoreUnknownLayers, switches, config, VPU_CONFIG_KEY(IGNORE_UNKNOWN_LAYERS));
     setOption(compileConfig.hwOptimization,      switches, config, VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION));
     setOption(compileConfig.hwAdaptiveMode,      switches, config, VPU_CONFIG_KEY(HW_ADAPTIVE_MODE));
-    setOption(compileConfig.allowFP32Models,     switches, config, VPU_CONFIG_KEY(ALLOW_FP32_MODELS));
     setOption(compileConfig.injectSwOps,         switches, config, VPU_CONFIG_KEY(HW_INJECT_STAGES));
     setOption(compileConfig.mergeHwPoolToConv,   switches, config, VPU_CONFIG_KEY(HW_POOL_CONV_MERGE));
     setOption(compileConfig.ignoreIRStatistic,   switches, config, VPU_CONFIG_KEY(IGNORE_IR_STATISTIC));
@@ -245,19 +249,33 @@ void ParsedConfig::configure(const std::map<std::string, std::string> &config) {
     setOption(compileConfig.numCMXSlices, config, VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES),
               [](const std::string &src) { return std::stoi(src); });
 
-    setOption(exclusiveAsyncRequests, switches, config, CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS));
+    setOption(compileConfig.ioStrides, config, VPU_CONFIG_KEY(TENSOR_STRIDES),
+              [](const std::string &src) {
+                  auto configStrides = src;
+                  configStrides.pop_back();
+
+                  auto inputs = InferenceEngine::details::split(configStrides, "],");
+                  std::map<std::string, std::vector<int> > stridesMap;
+
+                  for (const auto& input : inputs) {
+                      std::vector<int> strides;
+
+                      auto pair = InferenceEngine::details::split(input, "[");
+                      auto strideValues = InferenceEngine::details::split(pair.at(1), ",");
+
+                      for (const auto& stride : strideValues) {
+                          strides.insert(strides.begin(), std::stoi(stride));
+                      }
+
+                      stridesMap.insert({pair.at(0), strides});
+                  }
+
+                  return stridesMap;
+                });
+
     setOption(printReceiveTensorTime, switches, config, VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME));
     setOption(perfCount,              switches, config, CONFIG_KEY(PERF_COUNT));
 
-    static const std::unordered_map<std::string, LogLevel> logLevels = {
-        { CONFIG_VALUE(LOG_NONE), LogLevel::None },
-        { CONFIG_VALUE(LOG_WARNING), LogLevel::Warning },
-        { CONFIG_VALUE(LOG_INFO), LogLevel::Info },
-        { CONFIG_VALUE(LOG_DEBUG), LogLevel::Debug }
-    };
-
-    setOption(hostLogLevel,   logLevels, config, CONFIG_KEY(LOG_LEVEL));
-    setOption(deviceLogLevel, logLevels, config, VPU_CONFIG_KEY(LOG_LEVEL));
 
     static const std::unordered_map<std::string, PerfReport> perfReports {
         { VPU_CONFIG_VALUE(PER_LAYER), PerfReport::PerLayer },
@@ -273,12 +291,6 @@ IE_SUPPRESS_DEPRECATED_START
     setOption(compileConfig.inputBias, config, VPU_CONFIG_KEY(INPUT_BIAS),
               [](const std::string &src) { return std::stof(src); });
 IE_SUPPRESS_DEPRECATED_END
-
-#ifndef NDEBUG
-    if (auto envVar = std::getenv("IE_VPU_LOG_LEVEL")) {
-        hostLogLevel = logLevels.at(envVar);
-    }
-#endif
 }
 
 }  // namespace vpu
index 84985d3..8368eeb 100644 (file)
@@ -45,7 +45,8 @@ void PassSet::run(const Model::Ptr& model) const {
 
         auto startTime = std::chrono::high_resolution_clock::now();
 
-        model->cleanUpDatas();
+        model->cleanUp();
+
         p.first->run(model);
 
         auto endTime = std::chrono::high_resolution_clock::now();
@@ -58,7 +59,7 @@ void PassSet::run(const Model::Ptr& model) const {
         ++passInd;
     }
 
-    model->cleanUpDatas();
+    model->cleanUp();
 }
 
 //
@@ -82,6 +83,9 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
 
     _dumpInd = 0;
     ADD_DUMP_PASS("initial");
+    ADD_PASS(addCopyForOutputsInsideNetwork);
+
+    ADD_PASS(initialCheck);
 
     //
     // To overcome fp16 limitations
@@ -105,6 +109,9 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
     // Model common adaptation
     //
 
+    ADD_PASS(removeUnusedStagesOutputs);
+    ADD_DUMP_PASS("removeUnusedStagesOutputs");
+
     ADD_PASS(splitGroupedConv);
     ADD_DUMP_PASS("splitGroupedConv");
 
@@ -148,6 +155,13 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
     ADD_DUMP_PASS("adjustDataBatch");
 
     //
+    // Replace StridedSlice to other stages
+    //
+
+    ADD_PASS(stridedSlice);
+    ADD_DUMP_PASS("stridedSlice");
+
+    //
     // HW stages tiling
     //
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/add_copy_for_outputs_inside_network.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/add_copy_for_outputs_inside_network.cpp
new file mode 100644 (file)
index 0000000..e28123a
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <vpu/pass_manager.hpp>
+
+#include <memory>
+
+namespace vpu {
+namespace {
+
+class PassImpl final : public Pass {
+public:
+    explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
+
+    void run(const Model::Ptr& model) override {
+        VPU_PROFILE(initialCheck);
+
+        for (const auto& outputData : model->datas()) {
+            if (outputData->usage() != DataUsage::Output || outputData->numConsumers() == 0) {
+                continue;
+            }
+
+            auto newIntermediateData = model->duplicateData(
+                outputData,
+                "@intermediate",
+                outputData->desc());
+
+            auto producer = outputData->producerEdge();
+            model->replaceStageOutput(producer, newIntermediateData);
+            for (auto consumerEdge : outputData->consumerEdges()) {
+                model->replaceStageInput(consumerEdge, newIntermediateData);
+            }
+
+            _stageBuilder->addCopyStage(
+                model,
+                formatString("%s@copy-to-output", outputData->name()),
+                nullptr,
+                newIntermediateData,
+                outputData);
+        }
+    }
+
+private:
+    StageBuilder::Ptr _stageBuilder;
+};
+
+}  // namespace
+
+Pass::Ptr PassManager::addCopyForOutputsInsideNetwork() {
+    return std::make_shared<PassImpl>(_stageBuilder);
+}
+
+}  // namespace vpu
index b704114..b6492f0 100644 (file)
@@ -383,8 +383,8 @@ void PassImpl::replicateStage(
         }
 
         auto tileStage = model->duplicateStage(
-            stage->name() + postfix,
             stage,
+            postfix,
             newInputs,
             newOutputs);
 
index b280994..d9bfd2e 100644 (file)
@@ -25,20 +25,21 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
@@ -46,11 +47,8 @@ private:
     }
 
     void finalCheckImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto inDimsOrder = input->desc().dimsOrder();
         auto outDimsOrder = output->desc().dimsOrder();
@@ -63,12 +61,8 @@ private:
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto inDimsOrder = input->desc().dimsOrder();
         auto outDimsOrder = output->desc().dimsOrder();
@@ -94,12 +88,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
@@ -145,6 +135,12 @@ void PassImpl::run(const Model::Ptr& model) {
         if (data->usage() == DataUsage::Intermediate)
             continue;
 
+        if (data->usage() == DataUsage::Input || data->usage() == DataUsage::Output) {
+            if (!data->requiredStrides().fixedStrides().empty()) {
+                continue;
+            }
+        }
+
         data->updateRequiredStrides(StridesRequirement::compact());
     }
 
@@ -201,6 +197,10 @@ void PassImpl::run(const Model::Ptr& model) {
                 auto output = outEdge->output();
                 auto portInd = outEdge->portInd();
 
+                if (output->usage() == DataUsage::Fake) {
+                    continue;
+                }
+
                 auto requiredOrder = output->desc().dimsOrder();
 
                 if (curStageInfo.hasOutput(outEdge)) {
@@ -310,6 +310,10 @@ void PassImpl::run(const Model::Ptr& model) {
                 auto output = outEdge->output();
                 auto portInd = outEdge->portInd();
 
+                if (output->usage() == DataUsage::Fake) {
+                    continue;
+                }
+
                 auto requiredStrides = StridesRequirement();
 
                 if (curStageInfo.hasOutput(outEdge)) {
index d15e6f2..9c242f7 100644 (file)
@@ -81,6 +81,8 @@ void PassImpl::copyHwNetOutputs(const Model::Ptr& model) {
 
             model->replaceStageOutput(stage->outputEdge(0), newOutput);
 
+            newOutput->updateRequiredStrides(stage->getDataStridesRequirements().getOutput(stage->outputEdge(0)));
+
             _stageBuilder->addCopyStage(
                 model,
                 stage->name() + "@flush-output",
index 5d92644..e9d3613 100644 (file)
@@ -112,7 +112,7 @@ void PassImpl::run(const Model::Ptr& model) {
                 //
 
                 if (connectionStage->type() == StageType::Concat ||
-                    connectionStage->type() == StageType::Broadcast) {
+                    connectionStage->type() == StageType::Expand) {
                     IE_ASSERT(producer == child);
                     IE_ASSERT(consumer == parent);
                 } else if (connectionStage->type() == StageType::Split ||
index d941109..992632d 100644 (file)
 
 #include <vpu/pass_manager.hpp>
 
-#include <tuple>
+#include <precision_utils.h>
 #include <utility>
 #include <memory>
-#include <list>
-#include <string>
-#include <limits>
-#include <algorithm>
-#include <vector>
-#include <unordered_map>
 #include <set>
 
-#include <precision_utils.h>
-
 #include <vpu/compile_env.hpp>
 #include <vpu/stub_stage.hpp>
 #include <vpu/hw/mx_stage.hpp>
 #include <vpu/hw/tiling.hpp>
 #include <vpu/hw/utility.hpp>
+#include <vpu/passes/hw_conv_tiling/hw_convolution_tiler.hpp>
+#include <vpu/passes/hw_conv_tiling/hw_stage_tiler.hpp>
 
 namespace vpu {
 
 namespace {
 
-class Optimizer final {
-public:
-    Optimizer(const std::string& stageName,
-              const DimValues& inputDims, const DimValues& outputDims,
-              const DimValues& origOutputDims,
-              bool withPool,
-              int kernelSizeX, int kernelSizeY,
-              int kernelStride,
-              int paddingX, int paddingY)
-        : _stageName(stageName),
-          _inputDims(inputDims), _outputDims(outputDims),
-          _origOutputDims(origOutputDims),
-          _withPool(withPool),
-          _kernelSizeX(kernelSizeX), _kernelSizeY(kernelSizeY),
-          _kernelStride(kernelStride),
-          _paddingX(paddingX), _paddingY(paddingY) {
-    }
-
-    bool optimize() {
-        initTileSizes();
-
-        if (!selectBestTile()) {
-            if (_withPool) {
-                removePool();
-                return optimize();
-            }
-
-            return false;
-        }
-
-        patternMatching();
-
-        // Merged Pooling and SoC can't be used together.
-        if (_withPool) {
-            IE_ASSERT(!hasSoC());
-        }
-
-        if (!createTiles()) {
-            if (_withPool) {
-                removePool();
-                return optimize();
-            }
-
-            return false;
-        }
-
-        return true;
-    }
-
-    bool withPool() const {
-        return _withPool;
-    }
-
-    const HwConvTilingPtr& getTiling() const {
-        return _tiling;
-    }
-
-private:
-    void initTileSizes() {
-        int tempX = _inputDims[Dim::W] + 2 * _paddingX - _kernelSizeX;
-        int tempY = _inputDims[Dim::H] + 2 * _paddingY - _kernelSizeY;
-
-        int outWidthWithOutCeil = (tempX + _kernelStride) / _kernelStride;
-        int outHeightWithOutCeil = (tempY + _kernelStride) / _kernelStride;
-
-        int outWidthWithCeil =  static_cast<int>(std::ceil(static_cast<double>(tempX) / _kernelStride + 1));
-        int outHeightWithCeil = static_cast<int>(std::ceil(static_cast<double>(tempY) / _kernelStride + 1));
-
-        if ((_origOutputDims[Dim::W] != outWidthWithCeil) && (_origOutputDims[Dim::W] != outWidthWithOutCeil)) {
-            VPU_THROW_EXCEPTION
-                    << "Internal error: Output in " << _stageName << " has incorrect width dimension. Expected: "
-                    << outWidthWithCeil << " or " << outWidthWithOutCeil << " Actual: " << _origOutputDims[Dim::W];
-        }
-
-        if ((_origOutputDims[Dim::H] != outHeightWithCeil) && (_origOutputDims[Dim::H] != outHeightWithOutCeil)) {
-            VPU_THROW_EXCEPTION
-                    << "Internal error: Output in " << _stageName << " has incorrect height dimension. Expected: "
-                    << outHeightWithCeil << " or " << outHeightWithOutCeil << " Actual: " << _origOutputDims[Dim::H];
-        }
-
-        if ((_origOutputDims[Dim::W] == outWidthWithCeil) && (_origOutputDims[Dim::H] == outHeightWithCeil)) {
-            _useCeil = true;
-        } else {
-            IE_ASSERT((_origOutputDims[Dim::W] == outWidthWithOutCeil) && (_origOutputDims[Dim::H] == outHeightWithOutCeil));
-        }
-
-        _inputTileDims.set(Dim::W, std::min(CNN_MAX_INPUT_WIDTH, _inputDims[Dim::W]));
-        _inputTileDims.set(Dim::H, std::min(CNN_MAX_INPUT_HEIGHT, _inputDims[Dim::H]));
-        _inputTileDims.set(Dim::C, std::min(CNN_MAX_INPUT_CHANNELS, _inputDims[Dim::C]));
-
-        _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-        _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-        _outputTileDims.set(Dim::C, _outputDims[Dim::C]);
-
-        correctOutputPlaneSize();
-    }
-
-    void patternMatching() {
-        if (!_withPool &&
-            _kernelSizeX == 3 && _kernelSizeY == 3 && _paddingX == 1 && _paddingY == 1 && _kernelStride == 1 &&
-            _inputDims[Dim::C] == 512 && _inputDims[Dim::H] == 28 && _inputDims[Dim::W] == 28 &&
-            _outputDims[Dim::C] == 512) {
-            _inputTileDims.set(Dim::H, 28);
-            _inputTileDims.set(Dim::C, 172);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 3 && _kernelSizeY == 3 && _paddingX == 1 && _paddingY == 1 && _kernelStride == 1 &&
-            _inputDims[Dim::C] == 256 && _inputDims[Dim::H] == 56 && _inputDims[Dim::W] == 56 &&
-            _outputDims[Dim::C] == 256) {
-            _inputTileDims.set(Dim::H, 30);
-            _inputTileDims.set(Dim::C, 128);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 3 && _kernelSizeY == 3 && _paddingX == 1 && _paddingY == 1 && _kernelStride == 1 &&
-            _inputDims[Dim::C] == 64 && _inputDims[Dim::H] == 224 && _inputDims[Dim::W] == 224 &&
-            _outputDims[Dim::C] == 64) {
-            _inputTileDims.set(Dim::H, 82);
-            _inputTileDims.set(Dim::W, 82);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (_inputDims[Dim::C] == 512 &&
-                _inputDims[Dim::H] == 7 &&
-                _inputDims[Dim::W] == 7 &&
-                _outputDims[Dim::C] == 4096) {
-            _inputTileDims.set(Dim::C, 64);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 3 && _kernelSizeY == 3 && _paddingX == 1 && _paddingY == 1 && _kernelStride == 1 &&
-            _inputDims[Dim::C] == 128 && _inputDims[Dim::H] == 112 && _inputDims[Dim::W] == 112 &&
-            _outputDims[Dim::C] == 128) {
-            _inputTileDims.set(Dim::H, 32);
-            _inputTileDims.set(Dim::W, 112);
-            _inputTileDims.set(Dim::C, 32);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (_inputDims[Dim::C] == 1088 &&
-            _inputDims[Dim::H] == 17 &&
-            _inputDims[Dim::W] == 17 &&
-            (_outputDims[Dim::C] == 128 || _outputDims[Dim::C] == 192)) {
-            _inputTileDims.set(Dim::H, 17);
-            _inputTileDims.set(Dim::C, 544);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (_inputDims[Dim::C] == 1024 &&
-                _inputDims[Dim::H] == 17 &&
-                _inputDims[Dim::W] == 17 &&
-                _outputDims[Dim::C] == 384) {
-            _inputTileDims.set(Dim::H, 17);
-            _inputTileDims.set(Dim::C, 512);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 3 && _kernelSizeY == 3 && _paddingX == 0 && _paddingY == 0 && _kernelStride == 2 &&
-            _inputDims[Dim::C] == 384 && _inputDims[Dim::H] == 35 && _inputDims[Dim::W] == 35 &&
-            _outputDims[Dim::C] == 384) {
-            _inputTileDims.set(Dim::C, 194);
-            _inputTileDims.set(Dim::H, 35);
-            _inputTileDims.set(Dim::W, 35);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (_inputDims[Dim::C] == 192 &&
-                _inputDims[Dim::H] == 71 &&
-                _inputDims[Dim::W] == 71 &&
-                _outputDims[Dim::H] == 35) {
-            _inputTileDims.set(Dim::W, 71);
-            _inputTileDims.set(Dim::C, 96);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-                _inputDims[Dim::C] == 256 &&
-                _inputDims[Dim::H] == 128 &&
-                _inputDims[Dim::W] == 128 &&
-                _outputDims[Dim::C] == 256) {
-            _inputTileDims.set(Dim::W, 128);
-            _inputTileDims.set(Dim::H, 15);
-            _inputTileDims.set(Dim::C, 64);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-                _inputDims[Dim::C] == 512 &&
-                _inputDims[Dim::H] == 64 &&
-                _inputDims[Dim::W] == 64 &&
-                _outputDims[Dim::C] == 512) {
-            _inputTileDims.set(Dim::W, 64);
-            _inputTileDims.set(Dim::H, 10);
-            _inputTileDims.set(Dim::C, 128);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 1 && _kernelSizeY == 1 && _paddingX == 0 && _paddingY == 0 && _kernelStride == 1 &&
-            _inputDims[Dim::C] == 384 &&
-            _inputDims[Dim::H] == 56 &&
-            _inputDims[Dim::W] == 56 &&
-            _outputDims[Dim::C] == 64) {
-            _inputTileDims.set(Dim::C, 384);
-            _inputTileDims.set(Dim::H, 56);
-            _inputTileDims.set(Dim::W, 20);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 1 && _kernelSizeY == 1 && _paddingX == 0 && _paddingY == 0 && _kernelStride == 1 &&
-            _inputDims[Dim::C] == 2112 &&
-            _inputDims[Dim::H] == 14 &&
-            _inputDims[Dim::W] == 14 &&
-            _outputDims[Dim::C] == 1056) {
-            _inputTileDims.set(Dim::C, 556);
-            _inputTileDims.set(Dim::H, 14);
-            _inputTileDims.set(Dim::W, 14);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 3 && _kernelSizeY == 3 && _paddingX == 1 && _paddingY == 1 && _kernelStride == 2 &&
-            _inputDims[Dim::C] == 256 &&
-            _inputDims[Dim::H] == 52 &&
-            _inputDims[Dim::W] == 52 &&
-            _outputDims[Dim::C] == 512) {
-            _inputTileDims.set(Dim::C, 128);
-            _inputTileDims.set(Dim::H, 52);
-            _inputTileDims.set(Dim::W, 52);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-
-        if (!_withPool &&
-            _kernelSizeX == 3 && _kernelSizeY == 3 && _paddingX == 1 && _paddingY == 1 && _kernelStride == 1 &&
-            _inputDims[Dim::C] == 256 &&
-            _inputDims[Dim::H] == 23 &&
-            _inputDims[Dim::W] == 23 &&
-            _outputDims[Dim::C] == 640) {
-            _inputTileDims.set(Dim::C, 256);
-            _inputTileDims.set(Dim::H, 14);
-            _inputTileDims.set(Dim::W, 23);
-            _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-            _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-            correctOutputPlaneSize();
-            return;
-        }
-    }
-
-    bool selectBestTile() {
-        struct Solution final {
-            int numWidthTiles = 0;
-            int numHeightTiles = 0;
-            int numChannelTiles = 0;
-            int totalNumTiles = 0;
-            double cost = std::numeric_limits<double>::max();
-        };
-
-        const auto& env = CompileEnv::get();
-
-        // TODO: estimate this numbers
-        const int maxNumWidthTiles = 15;
-        const int maxNumHeightTiles = 15;
-        const int maxNumChannelTiles = _withPool ? 1 : 15;
-
-        Solution bestSol;
-
-        auto outputTileCopy = _outputTileDims;
-
-        auto minInputTileDimW = 64;
-        auto minInputTileDimH = _kernelSizeY;
-        if (_withPool) {
-            minInputTileDimW *= 2;
-            minInputTileDimH *= 2;
-        }
-
-        for (int numChannelTiles = 1; numChannelTiles <= maxNumChannelTiles; numChannelTiles++) {
-            int inputTileDimC = divUp(_inputDims[Dim::C], numChannelTiles);
-
-            for (int numWidthTiles = 1; numWidthTiles <= maxNumWidthTiles; numWidthTiles++) {
-                int inputTileDimW = divUp(_inputDims[Dim::W], numWidthTiles);
-
-                //
-                // Filter-out too small SoW tiles.
-                //
-
-                if (numWidthTiles > 1 && inputTileDimW < minInputTileDimW) {
-                    break;
-                }
-
-                for (int numHeightTiles = 1; numHeightTiles <= maxNumHeightTiles; numHeightTiles++) {
-                    int inputTileDimH = divUp(_inputDims[Dim::H], numHeightTiles);
-
-                    //
-                    // Filter-out too small SoH tiles.
-                    //
-
-                    if (numHeightTiles > 1 && inputTileDimH < minInputTileDimH) {
-                        break;
-                    }
-
-                    //
-                    // Try current tile size.
-                    //
-
-                    _inputTileDims.set(Dim::W, inputTileDimW);
-                    _inputTileDims.set(Dim::H, inputTileDimH);
-                    _inputTileDims.set(Dim::C, inputTileDimC);
-
-                    _outputTileDims = outputTileCopy;
-                    correctOutputPlaneSize();
-
-                    //
-                    // Limitations for Conv+Pool case.
-                    //
-
-                    if (_withPool) {
-                        if (_outputTileDims[Dim::W] <= 2 ||
-                            _outputTileDims[Dim::H] <= 2) {
-                            break;
-                        }
-                    }
-
-                    //
-                    // Check that tiling is valid.
-                    //
-
-                    auto heightTiles = calcHeightTiles();
-                    auto widthTiles = calcWidthTiles();
-
-                    if (heightTiles.empty()) {
-                        continue;
-                    }
-                    if (widthTiles.empty()) {
-                        break;
-                    }
-
-                    bool isOK = true;
-                    double solutionCost = 0.0;
-
-                    for (const auto& heightTile : heightTiles) {
-                        for (const auto& widthTile : widthTiles) {
-                            //
-                            // Limitations for Conv+Pool case.
-                            //
-
-                            if (_withPool) {
-                                if (widthTile.inputWithJunk % 2 != 0 ||
-                                    heightTile.inputWithJunk % 2 != 0 ||
-                                    widthTile.outputWithJunk % 2 != 0 ||
-                                    widthTile.outputWithJunk <= 2 ||
-                                    heightTile.outputWithJunk <= 2) {
-                                    isOK = false;
-                                    break;
-                                }
-                            }
-
-                            //
-                            // Can use this tile.
-                            //
-
-                            auto tileInfo = splitHwConvIntoOutChannelsTiles(
-                                widthTile.inputWithJunk, heightTile.inputWithJunk, inputTileDimC,
-                                outputTileCopy[Dim::C],
-                                _kernelSizeX, _kernelSizeY, _kernelStride);
-
-                            if (tileInfo.numDescr == 0) {
-                                isOK = false;
-                                break;
-                            }
-
-                            //
-                            // Output tile fits to CMX limitation.
-                            //
-
-                            DimValues fullOutputTileDims;
-                            fullOutputTileDims.set(Dim::W, widthTile.outputWithJunk);
-                            fullOutputTileDims.set(Dim::H, heightTile.outputWithJunk);
-                            fullOutputTileDims.set(Dim::C, outputTileCopy[Dim::C]);
-
-                            // TODO: support HCW
-                            if (calculateHwBufferSize(fullOutputTileDims) > env.resources.cmxLimit) {
-                                isOK = false;
-                                break;
-                            }
-
-                            //
-                            // Calc tile cost.
-                            //
-
-                            solutionCost += tileInfo.cost * numChannelTiles;
-
-                            // Alignment for output
-                            if ((widthTile.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                                solutionCost += 1.0
-                                      * widthTile.outputWithJunk
-                                      * heightTile.outputWithJunk
-                                      * outputTileCopy[Dim::C];
-                            }
-
-                            // Alignment for input
-                            if ((widthTile.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                                solutionCost += 1.0
-                                      * widthTile.inputWithJunk
-                                      * heightTile.inputWithJunk
-                                      * tileInfo.extendedInputDimC;
-                            }
-
-                            // SoC overhead
-                            solutionCost += 1.0
-                                  * (numChannelTiles - 1)
-                                  * widthTile.outputWithJunk
-                                  * heightTile.outputWithJunk
-                                  * outputTileCopy[Dim::C];
-                        }
-
-                        if (!isOK) {
-                            break;
-                        }
-                    }
-
-                    if (!isOK) {
-                        continue;
-                    }
-
-                    //
-                    // Compare with current best solution.
-                    //
-
-                    Solution curSol;
-                    curSol.numWidthTiles = numWidthTiles;
-                    curSol.numHeightTiles = numHeightTiles;
-                    curSol.numChannelTiles = numChannelTiles;
-                    curSol.totalNumTiles = numWidthTiles * numHeightTiles * numChannelTiles;
-                    curSol.cost = solutionCost;
-
-                    if (curSol.cost < bestSol.cost || (isDoubleEqual(curSol.cost, bestSol.cost) && curSol.totalNumTiles < bestSol.totalNumTiles)) {
-                        bestSol = curSol;
-                    }
-
-                    // Skip smaller SoC tiling.
-                    break;
-                }
-            }
-        }
-
-        if (bestSol.totalNumTiles == 0) {
-            return false;
-        }
-
-        int inputTileDimW = divUp(_inputDims[Dim::W], bestSol.numWidthTiles);
-        int inputTileDimH = divUp(_inputDims[Dim::H], bestSol.numHeightTiles);
-        int inputTileDimC = divUp(_inputDims[Dim::C], bestSol.numChannelTiles);
-
-        _inputTileDims.set(Dim::W, inputTileDimW);
-        _inputTileDims.set(Dim::H, inputTileDimH);
-        _inputTileDims.set(Dim::C, inputTileDimC);
-
-        _outputTileDims = outputTileCopy;
-        correctOutputPlaneSize();
-
-        return true;
-    }
-
-    bool createTiles() {
-        auto heightTiles = calcHeightTiles();
-        IE_ASSERT(!heightTiles.empty());
-
-        auto widthTiles = calcWidthTiles();
-        IE_ASSERT(!widthTiles.empty());
-
-        _tiling = std::make_shared<HwConvTiling>();
-        _tiling->sohTiles = heightTiles.size();
-        _tiling->sowTiles = widthTiles.size();
-        _tiling->socTiles = divUp(_inputDims[Dim::C], _inputTileDims[Dim::C]);
-
-        for (int sohInd = 0; sohInd < _tiling->sohTiles; ++sohInd) {
-            const auto& heightTileInfo = heightTiles[sohInd];
-
-            for (int sowInd = 0; sowInd < _tiling->sowTiles; ++sowInd) {
-                const auto& widthTileInfo = widthTiles[sowInd];
-
-                auto planeTile = std::make_shared<HwConvPlaneTile>();
-                planeTile->parent = _tiling;
-
-                planeTile->sohInd = sohInd;
-                planeTile->sowInd = sowInd;
-
-                planeTile->heightInfo = heightTileInfo;
-                planeTile->widthInfo = widthTileInfo;
-
-                for (int socInd = 0; socInd < _tiling->socTiles; ++socInd) {
-                    auto channelTile = std::make_shared<HwConvChannelTile>();
-                    channelTile->parent = planeTile;
-
-                    channelTile->socInd = socInd;
-
-                    channelTile->finalTiles = splitHwConvIntoOutChannelsTiles(
-                            widthTileInfo.inputWithJunk, heightTileInfo.inputWithJunk, _inputTileDims[Dim::C],
-                            _outputTileDims[Dim::C],
-                            _kernelSizeX, _kernelSizeY, _kernelStride);
-
-                    if (channelTile->finalTiles.numDescr == 0) {
-                        return false;
-                    }
-
-                    channelTile->extendedInputDimC = channelTile->finalTiles.extendedInputDimC;
-                    channelTile->extendedOutputDimC = channelTile->finalTiles.extendedOutputDimC;
-
-                    channelTile->channelStartIndex = socInd * _inputTileDims[Dim::C];
-                    channelTile->numInputChannels = _inputTileDims[Dim::C];
-
-                    planeTile->channelTiles.emplace_back(channelTile);
-                }
-
-                _tiling->planeTiles.emplace_back(planeTile);
-            }
-        }
-
-        return true;
-    }
-
-private:
-    void correctOutputPlaneSize() {
-        int maxOutputWidth = calcOutputSize(_inputTileDims[Dim::W], _kernelSizeX, _kernelStride, _paddingX, _paddingX, _useCeil);
-        if (_withPool) {
-            maxOutputWidth /= 2;
-        }
-        _outputTileDims.set(Dim::W, std::min(_outputTileDims[Dim::W], maxOutputWidth));
-
-        int maxOutputHeight = calcOutputSize(_inputTileDims[Dim::H], _kernelSizeY, _kernelStride, _paddingY, _paddingY, _useCeil);
-        if (_withPool) {
-            maxOutputHeight /= 2;
-        }
-        _outputTileDims.set(Dim::H, std::min(_outputTileDims[Dim::H], maxOutputHeight));
-    }
-
-    bool hasSoC() const {
-        return _inputTileDims[Dim::C] != _inputDims[Dim::C];
-    }
-
-    void removePool() {
-        _withPool = false;
-        _outputDims = _origOutputDims;
-    }
-
-    SmallVector<HwPlaneTileInfo> calcHeightTiles() {
-        SmallVector<HwPlaneTileInfo> heightTiles;
-
-        if (_outputTileDims[Dim::H] == _outputDims[Dim::H]) {
-            HwPlaneTileInfo info;
-            info.inputWithJunk = _inputDims[Dim::H];
-            info.outputWithJunk = _outputDims[Dim::H];
-            info.outputJunkBefore = 0;
-            info.outputJunkAfter = 0;
-            info.inputStartIndex = 0;
-            info.inputEndIndex = _inputDims[Dim::H];
-            info.outputStartIndex = 0;
-            info.outputEndIndex = _outputDims[Dim::H];
-
-            heightTiles.emplace_back(info);
-        } else {
-            if (_withPool) {
-                heightTiles = splitIntoPlaneTilesWithPool(
-                    _inputDims[Dim::H],
-                    _kernelSizeY,
-                    _kernelStride,
-                    _paddingY,
-                    _outputTileDims[Dim::H]);
-            } else {
-                heightTiles = splitIntoPlaneTiles(
-                    _inputDims[Dim::H],
-                    _outputDims[Dim::H],
-                    _kernelSizeY,
-                    _kernelStride,
-                    _paddingY, _paddingY,
-                    _outputTileDims[Dim::H],
-                    false,
-                    _useCeil);
-            }
-        }
-
-        return heightTiles;
-    }
-
-    SmallVector<HwPlaneTileInfo> calcWidthTiles() {
-        SmallVector<HwPlaneTileInfo> widthTiles;
-
-        if (_outputTileDims[Dim::W] == _outputDims[Dim::W]) {
-            HwPlaneTileInfo info;
-            info.inputWithJunk = _inputDims[Dim::W];
-            info.outputWithJunk = _outputDims[Dim::W];
-            info.outputJunkBefore = 0;
-            info.outputJunkAfter = 0;
-            info.inputStartIndex = 0;
-            info.inputEndIndex = _inputDims[Dim::W];
-            info.outputStartIndex = 0;
-            info.outputEndIndex = _outputDims[Dim::W];
-
-            widthTiles.emplace_back(info);
-        } else {
-            if (_withPool) {
-                widthTiles = splitIntoPlaneTilesWithPool(
-                    _inputDims[Dim::W],
-                    _kernelSizeX,
-                    _kernelStride,
-                    _paddingX,
-                    _outputTileDims[Dim::W]);
-            } else {
-                widthTiles = splitIntoPlaneTiles(
-                    _inputDims[Dim::W],
-                    _outputDims[Dim::W],
-                    _kernelSizeX,
-                    _kernelStride,
-                    _paddingX, _paddingX,
-                    _outputTileDims[Dim::W],
-                    true,
-                    _useCeil);
-            }
-        }
-
-        return widthTiles;
-    }
-
-private:
-    std::string _stageName;
-
-    DimValues _inputDims;
-    DimValues _outputDims;
-    DimValues _origOutputDims;
-
-    bool _withPool = false;
-
-    int _kernelSizeX = 0;
-    int _kernelSizeY = 0;
-    int _kernelStride = 0;
-    int _paddingX = 0;
-    int _paddingY = 0;
-
-    DimValues _inputTileDims;
-    DimValues _outputTileDims;
-
-    HwConvTilingPtr _tiling;
-
-    bool _useCeil = false;
-};
-
-using TileWeightsMap = std::unordered_map<int, Data>;
-
-const int BIASES_IND = -1;
-const int SCALES_IND = -2;
-
 class PassImpl final : public Pass {
 public:
     explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
@@ -742,110 +39,93 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        auto tryHW = origStage->attrs().getOrDefault<bool>("tryHW", false);
+        const auto tryHW = origStage->attrs().getOrDefault<bool>("tryHW", false);
         if (!tryHW) {
             continue;
         }
 
-        auto origInput = origStage->input(0);
-        auto origWeights = origStage->input(1);
-        auto origBiases = origStage->input(2);
-        auto origOutput = origStage->output(0);
-
-        auto kernelSizeX = origStage->attrs().get<int>("kernelSizeX");
-        auto kernelSizeY = origStage->attrs().get<int>("kernelSizeY");
-        auto kernelStride = origStage->attrs().get<int>("kernelStrideX");
-        auto padLeft = origStage->attrs().get<int>("padLeft");
-        auto padTop = origStage->attrs().get<int>("padTop");
-
-        auto withReLU = origStage->attrs().getOrDefault<bool>("withReLU", false);
-        auto negativeSlope = origStage->attrs().getOrDefault<float>("negativeSlope", 0.0f);
-        auto a0 = origStage->attrs().getOrDefault<uint32_t>("a0", 0);
-        auto a1 = origStage->attrs().getOrDefault<uint32_t>("a1", 0);
-        auto reluScale = origStage->attrs().getOrDefault<float>("reluScale", 1.0f);
-
-        auto withClamp = origStage->attrs().getOrDefault<bool>("withClamp", false);
-        auto clampMax =  origStage->attrs().getOrDefault<float>("clampMax", 6.0);
-
-        auto withPool = origStage->attrs().getOrDefault<bool>("withPool", false);
-        auto poolKernelSizeX = origStage->attrs().getOrDefault<int>("poolKernelSizeX", 0);
-        auto poolKernelSizeY = origStage->attrs().getOrDefault<int>("poolKernelSizeY", 0);
-        auto poolKernelStride = origStage->attrs().getOrDefault<int>("poolKernelStride", 0);
-        auto poolPadLeft = origStage->attrs().getOrDefault<int>("poolPadLeft", 0);
-        auto poolPadRight = origStage->attrs().getOrDefault<int>("poolPadRight", 0);
-        auto poolPadTop = origStage->attrs().getOrDefault<int>("poolPadTop", 0);
-        auto poolPadBottom = origStage->attrs().getOrDefault<int>("poolPadBottom", 0);
-
-        auto origOutputDesc = origStage->attrs().getOrDefault<DataDesc>("origConvOutput", origOutput->desc());
-
-        auto scaleFactor = origStage->attrs().getOrDefault<float>("scaleFactor", 1.0f);
-
-        auto& tileWeightsMap = origWeights->attrs().getOrSet<TileWeightsMap>("weightsPerTile", TileWeightsMap());
+        const HWConvStageOptions so(origStage);
+        const HWConvStageIO sio(origStage, origStage->output(0));
 
         //
         // Unsupported paddings
         //
 
-        auto hwInput = origInput;
-        auto hwOutput = origOutput;
-
         //
         // Try to find "best" tiling
         //
 
-        Optimizer opt(origStage->name(),
-                      hwInput->desc().dims(), hwOutput->desc().dims(),
-                      origOutputDesc.dims(),
-                      withPool,
-                      kernelSizeX, kernelSizeY,
-                      kernelStride,
-                      padLeft, padTop);
+        const size_t tilingsCount = 1;
+        const HWTilingNS::Direction direction =
+                HWTilingNS::Direction::INPUT_TO_OUTPUT;
+                // HWTilingNS::Direction::OUTPUT_TO_INPUT;
+
+        const HWTilingNS::HWConvolutionTiler tiler1stAttempt(
+                HWTilingNS::ConvolutionOptions(origStage->name(),
+                     sio.origInput->desc().dims(), sio.origOutput->desc().dims(),
+                     sio.origOutputDesc.dims(),
+                     so.kernelSizeX, so.kernelSizeY,
+                     so.kernelStride,
+                     so.padLeft, so.padRight, so.padTop, so.padBottom, so.withPool),
+                direction, tilingsCount);
+
+        const HWTilingNS::HWConvolutionTiler& tiler =
+                (!tiler1stAttempt.isTilingPossible() && tiler1stAttempt.withPool()) ?
+                HWTilingNS::HWConvolutionTiler(
+                        HWTilingNS::ConvolutionOptions(origStage->name(),
+                             sio.origInput->desc().dims(), sio.origOutputDesc.dims(),
+                             sio.origOutputDesc.dims(),
+                             so.kernelSizeX, so.kernelSizeY,
+                             so.kernelStride,
+                             so.padLeft, so.padRight, so.padTop, so.padBottom, false),
+                        direction, tilingsCount) :
+                tiler1stAttempt;
 
         //
         // Use SW stage if tiling optimization failed
         //
 
-        if (!opt.optimize()) {
+        if (!tiler.isTilingPossible()) {
             origStage->attrs().set<bool>("tryHW", false);
 
-            auto swConvOutput = origOutput;
-            if (withReLU || withPool || withClamp) {
+            auto swConvOutput = sio.origOutput;
+            if (so.withReLU || so.withPool || so.withClamp) {
                 swConvOutput = model->addNewData(
                     origStage->name(),
-                    origOutputDesc);
-                swConvOutput->attrs().copyFrom(origOutput->attrs());
+                    sio.origOutputDesc);
+                swConvOutput->attrs().copyFrom(sio.origOutput->attrs());
 
                 model->replaceStageOutput(origStage->outputEdge(0), swConvOutput);
             }
 
             auto hwPoolInput = swConvOutput;
-            if (withReLU) {
-                auto swReluOutput = origOutput;
-                if (withPool) {
+            if (so.withReLU) {
+                auto swReluOutput = sio.origOutput;
+                if (so.withPool) {
                     swReluOutput = model->addNewData(
                         origStage->name() + "@ReLU",
-                        origOutputDesc);
-                    swReluOutput->attrs().copyFrom(origOutput->attrs());
+                        sio.origOutputDesc);
+                    swReluOutput->attrs().copyFrom(sio.origOutput->attrs());
                 }
 
                 _stageBuilder->addReLUStage(
                     model,
                     origStage->name() + "@ReLU",
                     origStage->origLayer(),
-                    negativeSlope,
+                    so.negativeSlope,
                     swConvOutput,
                     swReluOutput);
 
                 hwPoolInput = swReluOutput;
             }
 
-            if (withClamp) {
-                auto swClampOutput = origOutput;
-                if (withPool) {
+            if (so.withClamp) {
+                auto swClampOutput = sio.origOutput;
+                if (so.withPool) {
                     swClampOutput = model->addNewData(
                             origStage->name() + "@Clamp",
-                            origOutputDesc);
-                    swClampOutput->attrs().copyFrom(origOutput->attrs());
+                            sio.origOutputDesc);
+                    swClampOutput->attrs().copyFrom(sio.origOutput->attrs());
                 }
 
                 _stageBuilder->addClampStage(
@@ -853,31 +133,31 @@ void PassImpl::run(const Model::Ptr& model) {
                         origStage->name() + "@Clamp",
                         origStage->origLayer(),
                         0.0,
-                        clampMax,
+                        so.clampMax,
                         swConvOutput,
                         swClampOutput);
 
                 hwPoolInput = swClampOutput;
             }
 
-            if (withPool) {
+            if (so.withPool) {
                 auto hwPoolStage = model->addNewStage<StubStage>(
                     origStage->name() + "@Pool",
                     StageType::StubMaxPool,
                     origStage->origLayer(),
                     {hwPoolInput},
-                    {origOutput});
+                    {sio.origOutput});
 
-                hwPoolStage->attrs().set<int>("kernelSizeX", poolKernelSizeX);
-                hwPoolStage->attrs().set<int>("kernelSizeY", poolKernelSizeY);
+                hwPoolStage->attrs().set<int>("kernelSizeX", so.poolKernelSizeX);
+                hwPoolStage->attrs().set<int>("kernelSizeY", so.poolKernelSizeY);
 
-                hwPoolStage->attrs().set<int>("kernelStrideX", poolKernelStride);
-                hwPoolStage->attrs().set<int>("kernelStrideY", poolKernelStride);
+                hwPoolStage->attrs().set<int>("kernelStrideX", so.poolKernelStride);
+                hwPoolStage->attrs().set<int>("kernelStrideY", so.poolKernelStride);
 
-                hwPoolStage->attrs().set<int>("padLeft", poolPadLeft);
-                hwPoolStage->attrs().set<int>("padRight", poolPadRight);
-                hwPoolStage->attrs().set<int>("padTop", poolPadTop);
-                hwPoolStage->attrs().set<int>("padBottom", poolPadBottom);
+                hwPoolStage->attrs().set<int>("padLeft", so.poolPadLeft);
+                hwPoolStage->attrs().set<int>("padRight", so.poolPadRight);
+                hwPoolStage->attrs().set<int>("padTop", so.poolPadTop);
+                hwPoolStage->attrs().set<int>("padBottom", so.poolPadBottom);
 
                 hwPoolStage->attrs().set<bool>("excludePad", false);
 
@@ -887,479 +167,38 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        //
-        // Remove merged pool if we failed to optimize tiling with it
-        //
-
-        model->disconnectStageDatas(origStage);
-
-        if (withPool && !opt.withPool()) {
-            auto hwPoolInput = model->addNewData(
-                origStage->name(),
-                origOutputDesc);
-            hwPoolInput->attrs().copyFrom(origOutput->attrs());
-
-            auto hwPoolStage = model->addNewStage<StubStage>(
-                origStage->name() + "@Pool",
-                StageType::StubMaxPool,
-                origStage->origLayer(),
-                {hwPoolInput},
-                {hwOutput});
-
-            hwPoolStage->attrs().set<int>("kernelSizeX", poolKernelSizeX);
-            hwPoolStage->attrs().set<int>("kernelSizeY", poolKernelSizeY);
-
-            hwPoolStage->attrs().set<int>("kernelStrideX", poolKernelStride);
-            hwPoolStage->attrs().set<int>("kernelStrideY", poolKernelStride);
-
-            hwPoolStage->attrs().set<int>("padLeft", poolPadLeft);
-            hwPoolStage->attrs().set<int>("padRight", poolPadRight);
-            hwPoolStage->attrs().set<int>("padTop", poolPadTop);
-            hwPoolStage->attrs().set<int>("padBottom", poolPadBottom);
-
-            hwPoolStage->attrs().set<bool>("excludePad", false);
-
-            hwPoolStage->attrs().set<bool>("tryHW", true);
-
-            hwOutput = hwPoolInput;
-
-            withPool = false;
-        }
-
-        //
-        // Broadcast input/output if needed
-        //
-
-        const auto& tiling = opt.getTiling();
-
-        int totalExtendedInputDimC = 0;
-        int maxExtendedOutputDimC = 0;
-        for (const auto& planeTile : tiling->planeTiles) {
-            for (const auto& channelTile : planeTile->channelTiles) {
-                totalExtendedInputDimC = std::max(totalExtendedInputDimC, channelTile->channelStartIndex + channelTile->extendedInputDimC);
-                maxExtendedOutputDimC = std::max(maxExtendedOutputDimC, channelTile->extendedOutputDimC);
-            }
-        }
-
-        auto origOutputDimC = hwOutput->desc().dim(Dim::C);
-
-        if (totalExtendedInputDimC > hwInput->desc().dim(Dim::C)) {
-            auto newDesc = hwInput->desc();
-            newDesc.setDim(Dim::C, totalExtendedInputDimC);
-
-            auto hwInputExtended = model->duplicateData(
-                hwInput,
-                "@extended",
-                newDesc);
-
-            _stageBuilder->addBroadcastStage(
-                model,
-                origStage->name() + "@broadcast-input",
-                origStage->origLayer(),
-                hwInput,
-                hwInputExtended);
-
-            hwInput = hwInputExtended;
-        }
-
-        //
-        // Create HW biases
-        //
-
-        auto hwBiases = tileWeightsMap[BIASES_IND];
-        if (hwBiases == nullptr) {
-            if (origBiases->usage() == DataUsage::Fake) {
-                hwBiases = model->addFakeData();
-            } else {
-                auto origBiasesContent = origBiases->content();
-                IE_ASSERT(origBiasesContent != nullptr);
-
-                auto origBiasesPtr = origBiasesContent->get<fp16_t>();
-                IE_ASSERT(origBiasesPtr != nullptr);
-
-                auto hwTileBiasesBlob = ie::make_shared_blob<fp16_t>(InferenceEngine::TensorDesc(
-                    ie::Precision::FP16,
-                    {static_cast<size_t>(maxExtendedOutputDimC)},
-                    ie::Layout::C));
-                hwTileBiasesBlob->allocate();
-
-                auto hwTileBiasesBlobPtr = hwTileBiasesBlob->buffer().as<fp16_t*>();
-                IE_ASSERT(hwTileBiasesBlobPtr != nullptr);
-
-                std::fill_n(hwTileBiasesBlobPtr, maxExtendedOutputDimC, ie::PrecisionUtils::f32tof16(0.0f));
-                std::copy_n(origBiasesPtr, origOutputDimC, hwTileBiasesBlobPtr);
-
-                hwBiases = model->duplicateData(
-                    origBiases,
-                    "@HW",
-                    DataDesc({maxExtendedOutputDimC}),
-                    ieBlobContent(hwTileBiasesBlob));
-
-                if (scaleFactor != 1.0f) {
-                    auto hwBiasesScaled = model->duplicateData(
-                        hwBiases,
-                        formatString("@SCALE=%f", scaleFactor),
-                        hwBiases->desc(),
-                        scaleContent(hwBiases->content(), scaleFactor));
-                    hwBiasesScaled->attrs().getOrSet<float>("scaleFactor", 1.0f) *= scaleFactor;
-
-                    hwBiases = hwBiasesScaled;
-                }
-            }
-
-            tileWeightsMap[BIASES_IND] = hwBiases;
-        }
-
-        //
-        // Create HW scales
-        //
-
-        auto hwScales = tileWeightsMap[SCALES_IND];
-        if (hwScales == nullptr) {
-            float fullScale = 1.0f / scaleFactor;
-            if (tiling->socTiles == 1 && reluScale != 1.0f) {
-                fullScale *= reluScale;
-            }
-
-            if (fullScale == 1.0f) {
-                hwScales = model->addFakeData();
-            } else {
-                hwScales = model->addConstData(
-                    origStage->name() + "@scales",
-                    DataDesc({maxExtendedOutputDimC}),
-                    replicateContent(fullScale, maxExtendedOutputDimC));
-            }
-
-            tileWeightsMap[SCALES_IND] = hwScales;
-        }
-
-        //
-        // Create HW tiles
-        //
-
-        DataVector hwInputTiles;
-        std::vector<DimValues> hwInputTilesOffsets;
+        model->disconnectStage(origStage);
 
-        DataVector hwOutputTiles;
-        std::vector<DimValues> hwOutputTilesOffsets;
-
-        hwInputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
-        hwInputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
-        hwOutputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
-        hwOutputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
-
-        for (const auto& planeTile : tiling->planeTiles) {
-            auto planeTilePostfix = getPlaneTilePostfix(planeTile);
-
-            //
-            // Create output tile
-            //
-
-            Data hwOutputPlaneTile;
-
-            if (tiling->sohTiles == 1 && tiling->sowTiles == 1) {
-                hwOutputPlaneTile = hwOutput;
-            } else {
-                auto newDesc = hwOutput->desc();
-                newDesc.setDim(Dim::W, planeTile->widthInfo.outputEndIndex - planeTile->widthInfo.outputStartIndex);
-                newDesc.setDim(Dim::H, planeTile->heightInfo.outputEndIndex - planeTile->heightInfo.outputStartIndex);
-
-                hwOutputPlaneTile = model->duplicateData(
-                    hwOutput,
-                    planeTilePostfix,
-                    newDesc);
-
-                hwOutputTiles.emplace_back(hwOutputPlaneTile);
-                hwOutputTilesOffsets.emplace_back(
-                    DimValues({
-                        {Dim::W, planeTile->widthInfo.outputStartIndex},
-                        {Dim::H, planeTile->heightInfo.outputStartIndex}
-                    }));
-            }
+        for (const auto &tiling : tiler.getHwTilings()) {
+            HWConvStageTiler hwStageTiler(so, sio, model,
+                    origStage, _stageBuilder, tiling, so.withPool && !tiler.withPool());
 
             //
-            // Add alignment to output tile if needed
+            // Split/concat input/output tiles
             //
 
-            if ((planeTile->widthInfo.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                auto hwOutputPlaneTileAligned = model->duplicateData(
-                    hwOutputPlaneTile,
-                    "@aligned");
-
-                _stageBuilder->addCopyStage(
+            if (!hwStageTiler.hwInputTiles.empty()) {
+                _stageBuilder->addSplitStage(
                     model,
-                    origStage->name() + planeTilePostfix + "@align-output-ptr",
+                    origStage->name() + "@split-input",
                     origStage->origLayer(),
-                    hwOutputPlaneTileAligned,
-                    hwOutputPlaneTile);
-
-                hwOutputPlaneTile = hwOutputPlaneTileAligned;
+                    std::move(hwStageTiler.hwInputTilesOffsets),
+                    hwStageTiler.hwInput,
+                    hwStageTiler.hwInputTiles);
             }
 
-            Data prevPartialSum;
-
-            for (const auto& channelTile : planeTile->channelTiles) {
-                auto channelTilePostfix = getChannelTilePostfix(channelTile);
-
-                auto tilePostfix = planeTilePostfix + channelTilePostfix;
-
-                auto hwOutputTile = hwOutputPlaneTile;
-
-                //
-                // Create input tile
-                //
-
-                Data hwInputTile;
-
-                if (tiling->sohTiles == 1 && tiling->sowTiles == 1 && tiling->socTiles == 1) {
-                    hwInputTile = hwInput;
-                } else {
-                    auto newDesc = hwInput->desc();
-                    newDesc.setDim(Dim::W, planeTile->widthInfo.inputWithJunk);
-                    newDesc.setDim(Dim::H, planeTile->heightInfo.inputWithJunk);
-                    newDesc.setDim(Dim::C, channelTile->extendedInputDimC);
-
-                    hwInputTile = model->duplicateData(
-                        hwInput,
-                        tilePostfix,
-                        newDesc);
-
-                    hwInputTiles.emplace_back(hwInputTile);
-                    hwInputTilesOffsets.emplace_back(
-                        DimValues({
-                            {Dim::W, planeTile->widthInfo.inputStartIndex},
-                            {Dim::H, planeTile->heightInfo.inputStartIndex},
-                            {Dim::C, channelTile->channelStartIndex}
-                        }));
-                }
-
-                //
-                // Add alignment to input tile if needed
-                //
-
-                if ((planeTile->widthInfo.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                    auto hwInputTileAligned = model->duplicateData(
-                        hwInputTile,
-                        "@aligned");
-
-                    _stageBuilder->addCopyStage(
-                        model,
-                        origStage->name() + tilePostfix + "@align-input-ptr",
-                        origStage->origLayer(),
-                        hwInputTile,
-                        hwInputTileAligned);
-
-                    hwInputTile = hwInputTileAligned;
-                }
-
-                //
-                // Process partial output for split-over-channels
-                //
-
-                if (tiling->socTiles > 1) {
-                    auto hwConvPartialOutput = model->duplicateData(
-                        hwOutputTile,
-                        channelTilePostfix + "@partial");
-
-                    if (channelTile->socInd == 0) {
-                        prevPartialSum = hwConvPartialOutput;
-                    } else {
-                        auto sumPartialOutput = hwOutputTile;
-                        if (channelTile->socInd < tiling->socTiles - 1 || withReLU || withClamp) {
-                            sumPartialOutput = model->duplicateData(
-                                hwOutputTile,
-                                channelTilePostfix + "@accum");
-                        }
-
-                        _stageBuilder->addSumStage(
-                            model,
-                            origStage->name() + tilePostfix + "@accum",
-                            origStage->origLayer(),
-                            prevPartialSum, hwConvPartialOutput,
-                            sumPartialOutput);
-
-                        if (channelTile->socInd == tiling->socTiles - 1 && withReLU) {
-                            _stageBuilder->addReLUStage(
-                                model,
-                                origStage->name() + tilePostfix + "@ReLU",
-                                origStage->origLayer(),
-                                negativeSlope,
-                                sumPartialOutput,
-                                hwOutputTile);
-                        }
-
-                        if (channelTile->socInd == tiling->socTiles - 1 && withClamp) {
-                            _stageBuilder->addClampStage(
-                                    model,
-                                    origStage->name() + tilePostfix + "@Clamp",
-                                    origStage->origLayer(),
-                                    0.0,
-                                    clampMax,
-                                    sumPartialOutput,
-                                    hwOutputTile);
-                        }
-
-                        prevPartialSum = sumPartialOutput;
-                    }
-
-                    hwOutputTile = hwConvPartialOutput;
-                }
-
-                //
-                // Process output junk if needed
-                //
-
-                if (planeTile->heightInfo.outputJunkBefore != 0 ||
-                    planeTile->heightInfo.outputJunkAfter != 0 ||
-                    planeTile->widthInfo.outputJunkBefore != 0 ||
-                    planeTile->widthInfo.outputJunkAfter != 0) {
-                    auto newDesc = hwOutputTile->desc();
-                    newDesc.setDim(Dim::W, planeTile->widthInfo.outputWithJunk);
-                    newDesc.setDim(Dim::H, planeTile->heightInfo.outputWithJunk);
-
-                    auto hwOutputTileWithJunk = model->duplicateData(
-                        hwOutputTile,
-                        "@with-junk",
-                        newDesc);
-
-                    DimValues innerOffset;
-                    innerOffset.set(Dim::W, planeTile->widthInfo.outputJunkBefore);
-                    innerOffset.set(Dim::H, planeTile->heightInfo.outputJunkBefore);
-
-                    _stageBuilder->addShrinkStage(
-                        model,
-                        origStage->name() + tilePostfix + "@remove-junk",
-                        origStage->origLayer(),
-                        hwOutputTileWithJunk,
-                        hwOutputTile,
-                        innerOffset);
-
-                    hwOutputTile = hwOutputTileWithJunk;
-                }
-
-                //
-                // Create tile weights
-                //
-
-                auto hwTileWeights = tileWeightsMap[channelTile->socInd];
-
-                if (hwTileWeights == nullptr) {
-                    hwTileWeights = model->duplicateData(
-                        origWeights,
-                        "@HW" + channelTilePostfix,
-                        DataDesc({8, kernelSizeX * kernelSizeY, channelTile->extendedInputDimC, channelTile->extendedOutputDimC / 8}),
-                        std::make_shared<HwWeightsContent>(
-                            origWeights->content(),
-                            origWeights->desc(),
-                            channelTile->numInputChannels,
-                            channelTile->channelStartIndex));
-
-                    if (scaleFactor != 1.0f) {
-                        auto hwTileWeightsScaled = model->duplicateData(
-                            hwTileWeights,
-                            formatString("@SCALE=%f", scaleFactor),
-                            hwTileWeights->desc(),
-                            scaleContent(hwTileWeights->content(), scaleFactor));
-                        hwTileWeightsScaled->attrs().getOrSet<float>("scaleFactor", 1.0f) *= scaleFactor;
-
-                        hwTileWeights = hwTileWeightsScaled;
-                    }
-
-                    tileWeightsMap[channelTile->socInd] = hwTileWeights;
-                }
-
-                //
-                // Create tile biases
-                //
-
-                Data hwTileBiases;
-
-                if (channelTile->socInd > 0) {
-                    hwTileBiases = model->addFakeData();
-                } else {
-                    hwTileBiases = hwBiases;
-                }
-
-                //
-                // Create HW stage for tile
-                //
-
-                auto hwOutputTileDims = hwOutputTile->desc().dims();
-                if (withPool) {
-                    hwOutputTileDims.set(Dim::W, hwOutputTileDims[Dim::W] * poolKernelStride - poolPadLeft - poolPadRight);
-                    hwOutputTileDims.set(Dim::H, hwOutputTileDims[Dim::H] * poolKernelStride - poolPadTop - poolPadBottom);
-                }
-
-                auto hwPad = getHwPaddingInfo(
-                    hwInputTile->desc().dims(), hwOutputTileDims,
-                    kernelSizeX, kernelSizeY,
-                    kernelStride, kernelStride,
-                    padLeft, padTop);
-
-                auto hwStage = model->addNewStage<MyriadXHwStage>(
-                    origStage->name() + tilePostfix,
-                    StageType::MyriadXHwOp,
+            if (!hwStageTiler.hwOutputTiles.empty()) {
+                _stageBuilder->addConcatStage(
+                    model,
+                    origStage->name() + "@concat-output",
                     origStage->origLayer(),
-                    {hwInputTile, hwTileWeights, hwTileBiases, hwScales},
-                    {hwOutputTile});
-
-                hwStage->attrs().set<HwOpType>("hwOpType", withPool ? HwOpType::CONV_POOL : HwOpType::CONV);
-
-                hwStage->attrs().set<int>("kernelSizeX", kernelSizeX);
-                hwStage->attrs().set<int>("kernelSizeY", kernelSizeY);
-                hwStage->attrs().set<int>("kernelStride", kernelStride);
-
-                if (withPool) {
-                    hwStage->attrs().set<int>("poolKernelSizeX", poolKernelSizeX);
-                    hwStage->attrs().set<int>("poolKernelSizeY", poolKernelSizeY);
-                }
-
-                hwStage->attrs().set<HwPaddingInfo>("pad", hwPad);
-
-                hwStage->attrs().set<HwConvTileInfo>("tiling", channelTile->finalTiles);
-
-                if (tiling->socTiles > 1) {
-                    hwStage->attrs().set<bool>("withReLU", false);
-                    hwStage->attrs().set<bool>("withClamp", false);
-                } else {
-                    hwStage->attrs().set<bool>("withReLU", withReLU);
-                    hwStage->attrs().set<uint32_t>("a0", a0);
-                    hwStage->attrs().set<uint32_t>("a1", a1);
-                    hwStage->attrs().set<float>("negativeSlope", negativeSlope);
-
-                    hwStage->attrs().set<bool>("withClamp", withClamp);
-                    hwStage->attrs().set<float>("clampMax", clampMax);
-                }
-
-                hwStage->attrs().set<float>("scaleFactor", scaleFactor);
+                    std::move(hwStageTiler.hwOutputTilesOffsets),
+                    hwStageTiler.hwOutputTiles,
+                    hwStageTiler.hwOutput);
             }
         }
 
         //
-        // Split/concat input/output tiles
-        //
-
-        if (!hwInputTiles.empty()) {
-            _stageBuilder->addSplitStage(
-                model,
-                origStage->name() + "@split-input",
-                origStage->origLayer(),
-                std::move(hwInputTilesOffsets),
-                hwInput,
-                hwInputTiles);
-        }
-
-        if (!hwOutputTiles.empty()) {
-            _stageBuilder->addConcatStage(
-                model,
-                origStage->name() + "@concat-output",
-                origStage->origLayer(),
-                std::move(hwOutputTilesOffsets),
-                hwOutputTiles,
-                hwOutput);
-        }
-
-        //
         // Remove original stage
         //
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/hw_conv_tiling/hw_convolution_tiler.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/hw_conv_tiling/hw_convolution_tiler.cpp
new file mode 100644 (file)
index 0000000..0152984
--- /dev/null
@@ -0,0 +1,757 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+#include <memory>
+#include <utility>
+#include <vpu/passes/hw_conv_tiling/hw_convolution_tiler.hpp>
+
+namespace vpu {
+
+namespace HWTilingNS {
+
+bool operator<(const TilingOption& a, const TilingOption& b) {
+    return a.cost < b.cost || (isDoubleEqual(a.cost, b.cost) && a.totalNumTiles < b.totalNumTiles);
+}
+
+class ConvInputToOutputDirection;
+class ConvOutputToInputDirection;
+
+// Input -> Output case
+class ConvInputToOutputDirection: public GraphDataTiling {
+public:
+    explicit ConvInputToOutputDirection(const ConvolutionOptions &co): GraphDataTiling(co, Direction::INPUT_TO_OUTPUT) {}
+    ConvInputToOutputDirection(const ConvInputToOutputDirection &other): GraphDataTiling(other) {}
+    void initTileSizes() override {
+        _useCeil = ceilNeeded();
+
+        _inputTileDims.set(Dim::W, std::min(CNN_MAX_INPUT_WIDTH, _co._inputDims[Dim::W]));
+        _inputTileDims.set(Dim::H, std::min(CNN_MAX_INPUT_HEIGHT, _co._inputDims[Dim::H]));
+        _inputTileDims.set(Dim::C, std::min(CNN_MAX_INPUT_CHANNELS, _co._inputDims[Dim::C]));
+
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::C, _co._outputDims[Dim::C]);
+
+        correctOutputPlaneSize();
+    }
+
+    // Input -> Output case
+    void setInputNOutputTileDimensions(const int tileDimW, const int tileDimH, const int tileDimC) override {
+        _inputTileDims.set(Dim::W, tileDimW);
+        _inputTileDims.set(Dim::H, tileDimH);
+        _inputTileDims.set(Dim::C, tileDimC);
+
+        correctOutputPlaneSize();
+    }
+
+    // Input -> Output case
+    void applyTilingOption(const TilingOption &tilingOption) override {
+        int tileDimW = divUp(_co._inputDims[Dim::W], tilingOption.numWidthTiles);
+        int tileDimH = divUp(_co._inputDims[Dim::H], tilingOption.numHeightTiles);
+        const int tileDimC = divUp(_co._inputDims[Dim::C], tilingOption.numChannelTiles);
+
+        tileDimW = divUp(tileDimW, _co._kernelStride) * _co._kernelStride;
+        tileDimH = divUp(tileDimH, _co._kernelStride) * _co._kernelStride;
+
+        _inputTileDims.set(Dim::W, tileDimW);
+        _inputTileDims.set(Dim::H, tileDimH);
+        _inputTileDims.set(Dim::C, tileDimC);
+
+        correctOutputPlaneSize();
+    }
+
+    void correctPlaneSize() override {
+        correctOutputPlaneSize();
+    }
+
+    void correctOutputPlaneSize() {
+        int maxOutputWidth = calcOutputSize(_inputTileDims[Dim::W], _co._kernelSizeX, _co._kernelStride,
+                _co._paddingLeft, _co._paddingRight, _useCeil);
+        if (_co._withPool) {
+            maxOutputWidth /= 2;
+        }
+        _outputTileDims.set(Dim::W, std::min(_outputTileDims[Dim::W], maxOutputWidth));
+
+        int maxOutputHeight = calcOutputSize(_inputTileDims[Dim::H], _co._kernelSizeY, _co._kernelStride,
+                _co._paddingTop, _co._paddingBottom, _useCeil);
+        if (_co._withPool) {
+            maxOutputHeight /= 2;
+        }
+        _outputTileDims.set(Dim::H, std::min(_outputTileDims[Dim::H], maxOutputHeight));
+    }
+
+    const DimValues &splitOverTensorDims() override {
+        return _co._inputDims;
+    }
+
+    void patternMatching() override;
+
+private:
+    bool ceilNeeded() {
+        int tempX = _co._inputDims[Dim::W] + _co._paddingLeft + _co._paddingRight - _co._kernelSizeX;
+        int tempY = _co._inputDims[Dim::H] + _co._paddingTop + _co._paddingBottom - _co._kernelSizeY;
+
+        int outWidthWithOutCeil = (tempX + _co._kernelStride) / _co._kernelStride;
+        int outHeightWithOutCeil = (tempY + _co._kernelStride) / _co._kernelStride;
+
+        int outWidthWithCeil = static_cast<int>(std::ceil(static_cast<double>(tempX) / _co._kernelStride + 1));
+        int outHeightWithCeil = static_cast<int>(std::ceil(static_cast<double>(tempY) / _co._kernelStride + 1));
+
+        if ((_co._origOutputDims[Dim::W] != outWidthWithCeil) && (_co._origOutputDims[Dim::W] != outWidthWithOutCeil)) {
+            VPU_THROW_EXCEPTION
+                    << "Internal error: Output in " << _co._stageName << " has incorrect width dimension. Expected: "
+                    << outWidthWithCeil << " or " << outWidthWithOutCeil << " Actual: " << _co._origOutputDims[Dim::W];
+        }
+
+        if ((_co._origOutputDims[Dim::H] != outHeightWithCeil) && (_co._origOutputDims[Dim::H] != outHeightWithOutCeil)) {
+            VPU_THROW_EXCEPTION
+                    << "Internal error: Output in " << _co._stageName << " has incorrect height dimension. Expected: "
+                    << outHeightWithCeil << " or " << outHeightWithOutCeil << " Actual: " << _co._origOutputDims[Dim::H];
+        }
+
+        if ((_co._origOutputDims[Dim::W] == outWidthWithOutCeil) && (_co._origOutputDims[Dim::H] == outHeightWithOutCeil)) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+};
+
+// Output -> Input case
+class ConvOutputToInputDirection: public GraphDataTiling {
+public:
+    explicit ConvOutputToInputDirection(const ConvolutionOptions &co): GraphDataTiling(co, Direction::OUTPUT_TO_INPUT) {}
+    ConvOutputToInputDirection(const ConvOutputToInputDirection &other): GraphDataTiling(other) {}
+    void initTileSizes() override {
+        _useCeil = false;   // no ceiling needed for ConvOutputToInputDirection
+
+        _outputTileDims.set(Dim::W, std::min(CNN_MAX_INPUT_WIDTH, _co._outputDims[Dim::W]));
+        _outputTileDims.set(Dim::H, std::min(CNN_MAX_INPUT_HEIGHT, _co._outputDims[Dim::H]));
+        _outputTileDims.set(Dim::C, _co._outputDims[Dim::C]);
+
+        _inputTileDims.set(Dim::W, std::min(CNN_MAX_INPUT_WIDTH, _co._inputDims[Dim::W]));
+        _inputTileDims.set(Dim::H, std::min(CNN_MAX_INPUT_HEIGHT, _co._inputDims[Dim::H]));
+        _inputTileDims.set(Dim::C, std::min(CNN_MAX_INPUT_CHANNELS, _co._inputDims[Dim::C]));
+
+        correctInputPlaneSize();
+    }
+    // Output -> Input case
+    void setInputNOutputTileDimensions(const int tileDimW, const int tileDimH, const int tileDimC) override {
+        _outputTileDims.set(Dim::W, tileDimW);
+        _outputTileDims.set(Dim::H, tileDimH);
+        _outputTileDims.set(Dim::C, tileDimC);
+
+        correctInputPlaneSize();
+    }
+
+    // Output -> Input case
+    void applyTilingOption(const TilingOption &tilingOption) override {
+        const int tileDimW = divUp(_co._outputDims[Dim::W], tilingOption.numWidthTiles);
+        const int tileDimH = divUp(_co._outputDims[Dim::H], tilingOption.numHeightTiles);
+        // split only input tensor over C dim
+        const int tileDimC = divUp(_co._inputDims[Dim::C], tilingOption.numChannelTiles);
+
+        _outputTileDims.set(Dim::W, tileDimW);
+        _outputTileDims.set(Dim::H, tileDimH);
+        _inputTileDims.set(Dim::C, tileDimC);
+
+        correctInputPlaneSize();
+    }
+
+    int calcInputSize(
+            int outputSize,
+            int kernelSize, int kernelStride,
+            int padBefore, int padAfter
+    ) {
+        return (outputSize - 1) * kernelStride + kernelSize - padBefore - padAfter;
+    }
+
+    void correctPlaneSize() override {
+        correctInputPlaneSize();
+    }
+
+    void correctInputPlaneSize() {
+        int maxInputWidth = calcInputSize(_outputTileDims[Dim::W], _co._kernelSizeX, _co._kernelStride, _co._paddingLeft,
+                                          _co._paddingRight);
+        if (_co._withPool) {
+            maxInputWidth *= 2;
+        }
+        _inputTileDims.set(Dim::W, std::min(_inputTileDims[Dim::W], maxInputWidth));
+
+        int maxInputHeight = calcInputSize(_outputTileDims[Dim::H], _co._kernelSizeY, _co._kernelStride, _co._paddingTop,
+                                           _co._paddingBottom);
+        if (_co._withPool) {
+            maxInputHeight *= 2;
+        }
+        _inputTileDims.set(Dim::H, std::min(_inputTileDims[Dim::H], maxInputHeight));
+    }
+
+    const DimValues &splitOverTensorDims() override {
+        return _co._outputDims;
+    }
+
+    void patternMatching() override {
+        // noop
+    }
+};
+
+HWConvolutionTiler::HWConvolutionTiler(const ConvolutionOptions &co,
+                   Direction direction,
+                   size_t maxTilingOptions) :
+        _co(co),
+        _searcher(_co, direction, maxTilingOptions) {
+    _tilingPossible = tileForHW();
+}
+
+bool HWConvolutionTiler::tileForHW() {
+    const std::vector<TilingOption> &tilingOptions = _searcher.tilingOptions();
+    if (tilingOptions.empty()) {
+            return false;
+    }
+
+    for (const TilingOption &tilingOption : tilingOptions) {
+        const HWConvolutionTileLayoutCut tileLayoutCut = _searcher.tileLayoutCut(tilingOption);
+        if (tileLayoutCut.tileCutPossible()) {
+            _hwTilings.push_back(tileLayoutCut.hwTiling());
+        }
+    }
+
+    return _hwTilings.size() != 0;
+}
+
+void ConvInputToOutputDirection::patternMatching() {
+    if (!_co._withPool &&
+        _co._kernelSizeX == 3 && _co._kernelSizeY == 3 && _co._paddingLeft == 1 && _co._paddingRight == 1  &&
+        _co._paddingTop == 1 && _co._paddingBottom == 1  && _co._kernelStride == 1 &&
+        _co._inputDims[Dim::C] == 512 && _co._inputDims[Dim::H] == 28 && _co._inputDims[Dim::W] == 28 &&
+        _co._outputDims[Dim::C] == 512) {
+        _inputTileDims.set(Dim::H, 28);
+        _inputTileDims.set(Dim::C, 172);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 3 && _co._kernelSizeY == 3 && _co._paddingLeft == 1 && _co._paddingRight == 1  &&
+        _co._paddingTop == 1 && _co._paddingBottom == 1  && _co._kernelStride == 1 &&
+        _co._inputDims[Dim::C] == 256 && _co._inputDims[Dim::H] == 56 && _co._inputDims[Dim::W] == 56 &&
+        _co._outputDims[Dim::C] == 256) {
+        _inputTileDims.set(Dim::H, 30);
+        _inputTileDims.set(Dim::C, 128);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 3 && _co._kernelSizeY == 3 && _co._paddingLeft == 1 && _co._paddingRight == 1  &&
+        _co._paddingTop == 1 && _co._paddingBottom == 1  && _co._kernelStride == 1 &&
+        _co._inputDims[Dim::C] == 64 && _co._inputDims[Dim::H] == 224 && _co._inputDims[Dim::W] == 224 &&
+        _co._outputDims[Dim::C] == 64) {
+        _inputTileDims.set(Dim::H, 82);
+        _inputTileDims.set(Dim::W, 82);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (_co._inputDims[Dim::C] == 512 &&
+        _co._inputDims[Dim::H] == 7 &&
+        _co._inputDims[Dim::W] == 7 &&
+        _co._outputDims[Dim::C] == 4096) {
+        _inputTileDims.set(Dim::C, 64);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 3 && _co._kernelSizeY == 3 && _co._paddingLeft == 1 && _co._paddingRight == 1  &&
+        _co._paddingTop == 1 && _co._paddingBottom == 1  && _co._kernelStride == 1 &&
+        _co._inputDims[Dim::C] == 128 && _co._inputDims[Dim::H] == 112 && _co._inputDims[Dim::W] == 112 &&
+        _co._outputDims[Dim::C] == 128) {
+        _inputTileDims.set(Dim::H, 32);
+        _inputTileDims.set(Dim::W, 112);
+        _inputTileDims.set(Dim::C, 32);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (_co._inputDims[Dim::C] == 1088 &&
+        _co._inputDims[Dim::H] == 17 &&
+        _co._inputDims[Dim::W] == 17 &&
+        (_co._outputDims[Dim::C] == 128 || _co._outputDims[Dim::C] == 192)) {
+        _inputTileDims.set(Dim::H, 17);
+        _inputTileDims.set(Dim::C, 544);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (_co._inputDims[Dim::C] == 1024 &&
+        _co._inputDims[Dim::H] == 17 &&
+        _co._inputDims[Dim::W] == 17 &&
+        _co._outputDims[Dim::C] == 384) {
+        _inputTileDims.set(Dim::H, 17);
+        _inputTileDims.set(Dim::C, 512);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 3 && _co._kernelSizeY == 3 && _co._paddingLeft == 0 && _co._paddingRight == 0  &&
+        _co._paddingTop == 0 && _co._paddingBottom == 0  && _co._kernelStride == 2 &&
+        _co._inputDims[Dim::C] == 384 && _co._inputDims[Dim::H] == 35 && _co._inputDims[Dim::W] == 35 &&
+        _co._outputDims[Dim::C] == 384) {
+        _inputTileDims.set(Dim::C, 194);
+        _inputTileDims.set(Dim::H, 35);
+        _inputTileDims.set(Dim::W, 35);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (_co._inputDims[Dim::C] == 192 &&
+        _co._inputDims[Dim::H] == 71 &&
+        _co._inputDims[Dim::W] == 71 &&
+        _co._outputDims[Dim::H] == 35) {
+        _inputTileDims.set(Dim::W, 71);
+        _inputTileDims.set(Dim::C, 96);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._inputDims[Dim::C] == 256 &&
+        _co._inputDims[Dim::H] == 128 &&
+        _co._inputDims[Dim::W] == 128 &&
+        _co._outputDims[Dim::C] == 256) {
+        _inputTileDims.set(Dim::W, 128);
+        _inputTileDims.set(Dim::H, 15);
+        _inputTileDims.set(Dim::C, 64);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._inputDims[Dim::C] == 512 &&
+        _co._inputDims[Dim::H] == 64 &&
+        _co._inputDims[Dim::W] == 64 &&
+        _co._outputDims[Dim::C] == 512) {
+        _inputTileDims.set(Dim::W, 64);
+        _inputTileDims.set(Dim::H, 10);
+        _inputTileDims.set(Dim::C, 128);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 1 && _co._kernelSizeY == 1 && _co._paddingLeft == 0 && _co._paddingRight == 0  &&
+        _co._paddingTop == 0 && _co._paddingBottom == 0  && _co._kernelStride == 1 &&
+        _co._inputDims[Dim::C] == 384 &&
+        _co._inputDims[Dim::H] == 56 &&
+        _co._inputDims[Dim::W] == 56 &&
+        _co._outputDims[Dim::C] == 64) {
+        _inputTileDims.set(Dim::C, 384);
+        _inputTileDims.set(Dim::H, 56);
+        _inputTileDims.set(Dim::W, 20);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 1 && _co._kernelSizeY == 1 && _co._paddingLeft == 0 && _co._paddingRight == 0  &&
+        _co._paddingTop == 0 && _co._paddingBottom == 0  && _co._kernelStride == 1 &&
+        _co._inputDims[Dim::C] == 2112 &&
+        _co._inputDims[Dim::H] == 14 &&
+        _co._inputDims[Dim::W] == 14 &&
+        _co._outputDims[Dim::C] == 1056) {
+        _inputTileDims.set(Dim::C, 556);
+        _inputTileDims.set(Dim::H, 14);
+        _inputTileDims.set(Dim::W, 14);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 3 && _co._kernelSizeY == 3 && _co._paddingLeft == 1 && _co._paddingRight == 1  &&
+        _co._paddingTop == 1 && _co._paddingBottom == 1  && _co._kernelStride == 2 &&
+        _co._inputDims[Dim::C] == 256 &&
+        _co._inputDims[Dim::H] == 52 &&
+        _co._inputDims[Dim::W] == 52 &&
+        _co._outputDims[Dim::C] == 512) {
+        _inputTileDims.set(Dim::C, 128);
+        _inputTileDims.set(Dim::H, 52);
+        _inputTileDims.set(Dim::W, 52);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+
+    if (!_co._withPool &&
+        _co._kernelSizeX == 3 && _co._kernelSizeY == 3 && _co._paddingLeft == 1 && _co._paddingRight == 1  &&
+        _co._paddingTop == 1 && _co._paddingBottom == 1  && _co._kernelStride == 1 &&
+        _co._inputDims[Dim::C] == 256 &&
+        _co._inputDims[Dim::H] == 23 &&
+        _co._inputDims[Dim::W] == 23 &&
+        _co._outputDims[Dim::C] == 640) {
+        _inputTileDims.set(Dim::C, 256);
+        _inputTileDims.set(Dim::H, 14);
+        _inputTileDims.set(Dim::W, 23);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        correctPlaneSize();
+        return;
+    }
+}
+
+std::unique_ptr<GraphDataTiling> ConvGraphDataTilingFactory::makeDirTiling(const ConvolutionOptions &co,
+        Direction direction) {
+    if (direction == Direction::INPUT_TO_OUTPUT) {
+        return std::unique_ptr<GraphDataTiling>(new ConvInputToOutputDirection(co));
+    } else if (direction == Direction::OUTPUT_TO_INPUT) {
+        return std::unique_ptr<GraphDataTiling>(new ConvOutputToInputDirection(co));
+    } else {
+        IE_ASSERT(false) << "Unsupported direction";
+    }
+}
+
+std::unique_ptr<GraphDataTiling> ConvGraphDataTilingFactory::makeDirTiling(const GraphDataTiling &o) {
+    if (o.getDirection() == Direction::INPUT_TO_OUTPUT) {
+        return std::unique_ptr<GraphDataTiling>(
+                new ConvInputToOutputDirection(dynamic_cast<const ConvInputToOutputDirection&>(o)));
+    } else if (o.getDirection() == Direction::OUTPUT_TO_INPUT) {
+        return std::unique_ptr<GraphDataTiling>(
+                new ConvOutputToInputDirection(dynamic_cast<const ConvOutputToInputDirection&>(o)));
+    } else {
+        IE_ASSERT(false) << "Unsupported direction";
+    }
+}
+
+//
+// Looks for the optimal tiling accordingly to the cost function. Modifies dimensions in dirTiling during search.
+//
+std::vector<TilingOption> HWConvolutionTilingSearcher::selectBetterTiling() const {
+    const auto &env = CompileEnv::get();
+    GraphDataTiling &dirTiling = *_dirTiling;
+    FixedMaxHeap<TilingOption> tilingOptions(_maxTilingOptions);
+
+    // TODO: estimate this numbers
+    const int maxNumWidthTiles = 15;
+    const int maxNumHeightTiles = 15;
+    const int maxNumChannelTiles = _co._withPool ? 1 : 15;
+
+    const auto outputTileInitial = dirTiling.getOutputTileDims();
+    const auto inputTileInitial = dirTiling.getInputTileDims();
+
+    auto minInputTileDimW = 64;
+    auto minInputTileDimH = _co._kernelSizeY;
+    if (_co._withPool) {
+        minInputTileDimW *= 2;
+        minInputTileDimH *= 2;
+    }
+
+    const DimValues &splitOver = dirTiling.splitOverTensorDims();
+    const auto direction = dirTiling.getDirection();
+    // split over Input tensor for the Channel dimension always
+    for (int numChannelTiles = 1; numChannelTiles <= maxNumChannelTiles; numChannelTiles++) {
+        const int tileSizeDimC = divUp(_co._inputDims[Dim::C], numChannelTiles);
+
+        // here split and iterate either over input tensors or over output tensors depending on the direction.
+        for (int numWidthTiles = 1; numWidthTiles <= maxNumWidthTiles; numWidthTiles++) {
+            int tileSizeDimW = divUp(splitOver[Dim::W], numWidthTiles);
+
+            //
+            // Filter-out too small SoW input tiles when loops split input tensors.
+            //
+
+            if (numWidthTiles > 1 && direction == Direction::INPUT_TO_OUTPUT) {
+                tileSizeDimW = divUp(tileSizeDimW, _co._kernelStride) * _co._kernelStride;
+
+                if (tileSizeDimW < minInputTileDimW) {
+                    break;
+                }
+            }
+
+            for (int numHeightTiles = 1; numHeightTiles <= maxNumHeightTiles; numHeightTiles++) {
+                int tileSizeDimH = divUp(splitOver[Dim::H], numHeightTiles);
+
+                //
+                // Filter-out too small SoH input tiles when loops split input tensors.
+                //
+
+                if (numHeightTiles > 1 && direction == Direction::INPUT_TO_OUTPUT) {
+                    tileSizeDimH = divUp(tileSizeDimH, _co._kernelStride) * _co._kernelStride;
+
+                    if (tileSizeDimH < minInputTileDimH) {
+                        break;
+                    }
+                }
+
+                //
+                // Try current tile size.
+                //
+
+                dirTiling.resetInputTileDims(inputTileInitial);
+                dirTiling.resetOutputTileDims(outputTileInitial);
+
+                dirTiling.setInputNOutputTileDimensions(tileSizeDimW, tileSizeDimH, tileSizeDimC);
+
+                //
+                // Limitations for Conv+Pool case.
+                //
+
+                if (_co._withPool) {
+                    if (dirTiling.getOutputTileDims()[Dim::W] <= 2 ||
+                        dirTiling.getOutputTileDims()[Dim::H] <= 2) {
+                        break;
+                    }
+                }
+
+                //
+                // Check that tiling is valid.
+                //
+
+                // todo: check internal in/out hardcodes
+                const auto heightTiles = calcHeightTiles(_co, dirTiling.getOutputTileDims(),
+                                                         dirTiling.useCeil());
+                const auto widthTiles = calcWidthTiles(_co, dirTiling.getOutputTileDims(), dirTiling.useCeil());
+
+                if (heightTiles.empty()) {
+                    continue;
+                }
+                if (widthTiles.empty()) {
+                    break;
+                }
+
+                bool isOK = true;
+                double solutionCost = 0.0;
+
+                for (const auto &heightTile : heightTiles) {
+                    for (const auto &widthTile : widthTiles) {
+                        //
+                        // Limitations for Conv+Pool case.
+                        //
+
+                        if (_co._withPool) {
+                            if (widthTile.inputWithJunk % 2 != 0 ||
+                                heightTile.inputWithJunk % 2 != 0 ||
+                                widthTile.outputWithJunk % 2 != 0 ||
+                                widthTile.outputWithJunk <= 2 ||
+                                heightTile.outputWithJunk <= 2) {
+                                isOK = false;
+                                break;
+                            }
+                        }
+
+                        //
+                        // Can use this tile.
+                        //
+
+                        auto tileInfo = splitHwConvIntoOutChannelsTiles(  // left asis, not new ver in new api
+                                widthTile.inputWithJunk, heightTile.inputWithJunk, tileSizeDimC,
+                                outputTileInitial[Dim::C],
+                                _co._kernelSizeX, _co._kernelSizeY, _co._kernelStride);
+
+                        if (tileInfo.numDescr == 0) {
+                            isOK = false;
+                            break;
+                        }
+
+                        //
+                        // Output tile fits to CMX limitation.
+                        //
+
+                        DimValues fullOutputTileDims;
+                        fullOutputTileDims.set(Dim::W, widthTile.outputWithJunk);
+                        fullOutputTileDims.set(Dim::H, heightTile.outputWithJunk);
+                        fullOutputTileDims.set(Dim::C, outputTileInitial[Dim::C]);
+
+                        // TODO: support HCW
+                        if (calculateHwBufferSize(fullOutputTileDims) > env.resources.cmxLimit) {
+                            isOK = false;
+                            break;
+                        }
+
+                        //
+                        // Calc tile cost.
+                        //
+
+                        solutionCost += tileInfo.cost * numChannelTiles;
+
+                        // Alignment for output
+                        if ((widthTile.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+                            solutionCost += 1.0
+                                            * widthTile.outputWithJunk
+                                            * heightTile.outputWithJunk
+                                            * outputTileInitial[Dim::C];
+                        }
+
+                        // Alignment for input
+                        if ((widthTile.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+                            solutionCost += 1.0
+                                            * widthTile.inputWithJunk
+                                            * heightTile.inputWithJunk
+                                            * tileInfo.extendedInputDimC;
+                        }
+
+                        // SoC overhead
+                        solutionCost += 1.0
+                                        * (numChannelTiles - 1)
+                                        * widthTile.outputWithJunk
+                                        * heightTile.outputWithJunk
+                                        * outputTileInitial[Dim::C];
+                    }
+
+                    if (!isOK) {
+                        break;
+                    }
+                }
+
+                if (!isOK) {
+                    continue;
+                }
+
+                //
+                // Put to the pool of best options.
+                //
+
+                const int totalNumTiles = numWidthTiles * numHeightTiles * numChannelTiles;
+
+                const TilingOption to =
+                        {numWidthTiles, numHeightTiles, numChannelTiles, totalNumTiles, solutionCost};
+                tilingOptions.push(to);
+
+                // Skip smaller SoC tiling.
+                break;
+            }
+        }
+    }
+
+    dirTiling.resetInputTileDims(inputTileInitial);
+    dirTiling.resetOutputTileDims(outputTileInitial);
+
+    return tilingOptions.sorted();
+}
+
+HWConvolutionTileLayoutCut HWConvolutionTilingSearcher::tileLayoutCut(const TilingOption &option) const {
+    return HWConvolutionTileLayoutCut(*_dirTiling, option);
+}
+
+std::ostream& operator<<(std::ostream &o, const TilingOption &to) {
+    o << "WHC: "
+        << to.numWidthTiles << "x"
+        << to.numHeightTiles << "x"
+        << to.numChannelTiles
+        << " Tot: " << to.totalNumTiles << " " << " cost: " << to.cost;
+
+    return o;
+}
+
+// based on height of the tile for output tensor
+SmallVector<HwPlaneTileInfo> calcHeightTiles(const ConvolutionOptions &_co,
+                                             const DimValues &outputTileDims, bool useCeil) {
+    SmallVector<HwPlaneTileInfo> heightTiles;
+
+    if (outputTileDims[Dim::H] == _co._outputDims[Dim::H]) {
+        HwPlaneTileInfo info;
+        info.inputWithJunk = _co._inputDims[Dim::H];
+        info.outputWithJunk = _co._outputDims[Dim::H];
+        info.outputJunkBefore = 0;
+        info.outputJunkAfter = 0;
+        info.inputStartIndex = 0;
+        info.inputEndIndex = _co._inputDims[Dim::H];
+        info.outputStartIndex = 0;
+        info.outputEndIndex = _co._outputDims[Dim::H];
+
+        heightTiles.emplace_back(info);
+    } else {
+        if (_co._withPool) {
+            heightTiles = splitIntoPlaneTilesWithPool(
+                    _co._inputDims[Dim::H],
+                    _co._kernelSizeY,
+                    _co._kernelStride,
+                    _co._paddingTop,
+                    outputTileDims[Dim::H]);
+        } else {
+            heightTiles = splitIntoPlaneTiles(
+                    _co._inputDims[Dim::H],
+                    _co._outputDims[Dim::H],
+                    _co._kernelSizeY,
+                    _co._kernelStride,
+                    _co._paddingTop, _co._paddingBottom,
+                    outputTileDims[Dim::H],
+                    useCeil);
+        }
+    }
+
+    return heightTiles;
+}
+
+SmallVector<HwPlaneTileInfo> calcWidthTiles(const ConvolutionOptions &_co,
+                                            const DimValues &outputTileDims, bool useCeil) {
+    SmallVector<HwPlaneTileInfo> widthTiles;
+
+    if (outputTileDims[Dim::W] == _co._outputDims[Dim::W]) {
+        HwPlaneTileInfo info;
+        info.inputWithJunk = _co._inputDims[Dim::W];
+        info.outputWithJunk = _co._outputDims[Dim::W];
+        info.outputJunkBefore = 0;
+        info.outputJunkAfter = 0;
+        info.inputStartIndex = 0;
+        info.inputEndIndex = _co._inputDims[Dim::W];
+        info.outputStartIndex = 0;
+        info.outputEndIndex = _co._outputDims[Dim::W];
+
+        widthTiles.emplace_back(info);
+    } else {
+        if (_co._withPool) {
+            widthTiles = splitIntoPlaneTilesWithPool(
+                    _co._inputDims[Dim::W],
+                    _co._kernelSizeX,
+                    _co._kernelStride,
+                    _co._paddingLeft,
+                    outputTileDims[Dim::W]);
+        } else {
+            widthTiles = splitIntoPlaneTiles(
+                    _co._inputDims[Dim::W],
+                    _co._outputDims[Dim::W],
+                    _co._kernelSizeX,
+                    _co._kernelStride,
+                    _co._paddingLeft, _co._paddingRight,
+                    outputTileDims[Dim::W],
+                    useCeil);
+        }
+    }
+
+    return widthTiles;
+}
+
+}  // namespace HWTilingNS
+
+}  // namespace vpu
+
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/hw_conv_tiling/hw_stage_tiler.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/hw_conv_tiling/hw_stage_tiler.cpp
new file mode 100644 (file)
index 0000000..040c546
--- /dev/null
@@ -0,0 +1,481 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <vpu/passes/hw_conv_tiling/hw_stage_tiler.hpp>
+
+#include <precision_utils.h>
+#include <tuple>
+#include <utility>
+#include <memory>
+#include <list>
+#include <string>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include <unordered_map>
+#include <set>
+
+#include <vpu/compile_env.hpp>
+#include <vpu/stub_stage.hpp>
+#include <vpu/hw/mx_stage.hpp>
+#include <vpu/hw/tiling.hpp>
+#include <vpu/hw/utility.hpp>
+#include <vpu/utils/attributes_map.hpp>
+
+namespace vpu {
+
+const int BIASES_IND = -1;
+const int SCALES_IND = -2;
+
+using TileWeightsMap = std::unordered_map<int, Data>;
+
+HWConvStageTiler::HWConvStageTiler(const HWConvStageOptions &so, const HWConvStageIO &sio,
+                           const Model::Ptr &model, const Handle <StageNode> &origStage,
+                           const StageBuilder::Ptr &_stageBuilder, const HwConvTilingPtr &tiling,
+                       const bool makeExplicitPoolStage) {
+    hwInput = sio.origInput;
+    hwOutput = sio.origOutput;
+
+    //
+    // Create explicit pool stage if tiling with pool is not possible
+    //
+    bool tileStageWithPool = so.withPool;
+    if (makeExplicitPoolStage) {
+        auto hwPoolInput = model->addNewData(
+                origStage->name(),
+                sio.origOutputDesc);
+        hwPoolInput->attrs().copyFrom(sio.origOutput->attrs());
+
+        auto hwPoolStage = model->addNewStage<StubStage>(
+                origStage->name() + "@Pool",
+                StageType::StubMaxPool,
+                origStage->origLayer(),
+                {hwPoolInput},
+                {hwOutput});
+
+        hwPoolStage->attrs().set<int>("kernelSizeX", so.poolKernelSizeX);
+        hwPoolStage->attrs().set<int>("kernelSizeY", so.poolKernelSizeY);
+
+        hwPoolStage->attrs().set<int>("kernelStrideX", so.poolKernelStride);
+        hwPoolStage->attrs().set<int>("kernelStrideY", so.poolKernelStride);
+
+        hwPoolStage->attrs().set<int>("padLeft", so.poolPadLeft);
+        hwPoolStage->attrs().set<int>("padRight", so.poolPadRight);
+        hwPoolStage->attrs().set<int>("padTop", so.poolPadTop);
+        hwPoolStage->attrs().set<int>("padBottom", so.poolPadBottom);
+
+        hwPoolStage->attrs().set<bool>("excludePad", false);
+
+        hwPoolStage->attrs().set<bool>("tryHW", true);
+
+        hwOutput = hwPoolInput;
+        tileStageWithPool = false;
+    }
+
+    //
+    // Expand input/output if needed
+    //
+
+    int totalExtendedInputDimC = 0;
+    int maxExtendedOutputDimC = 0;
+
+    for (const auto& planeTile : tiling->planeTiles) {
+        for (const auto& channelTile : planeTile->channelTiles) {
+            totalExtendedInputDimC = std::max(totalExtendedInputDimC, channelTile->channelStartIndex + channelTile->extendedInputDimC);
+            maxExtendedOutputDimC = std::max(maxExtendedOutputDimC, channelTile->extendedOutputDimC);
+        }
+    }
+
+    auto origOutputDimC = hwOutput->desc().dim(Dim::C);
+
+    if (totalExtendedInputDimC > hwInput->desc().dim(Dim::C)) {
+        auto newDesc = hwInput->desc();
+        newDesc.setDim(Dim::C, totalExtendedInputDimC);
+
+        auto hwInputExtended = model->duplicateData(
+                hwInput,
+                "@extended",
+                newDesc);
+
+        _stageBuilder->addExpandStage(
+                model,
+                origStage->name() + "@expand-input",
+                origStage->origLayer(),
+                hwInput,
+                hwInputExtended);
+
+        hwInput = hwInputExtended;
+    }
+
+    //
+    // Create HW biases
+    //
+
+    auto& tileWeightsMap = sio.origWeights->attrs().getOrSet<TileWeightsMap>("weightsPerTile", TileWeightsMap());
+    auto hwBiases = tileWeightsMap[BIASES_IND];
+    if (hwBiases == nullptr) {
+        if (sio.origBiases->usage() == DataUsage::Fake) {
+            hwBiases = model->addFakeData();
+        } else {
+            auto origBiasesContent = sio.origBiases->content();
+            IE_ASSERT(origBiasesContent != nullptr);
+
+            auto origBiasesPtr = origBiasesContent->get<fp16_t>();
+            IE_ASSERT(origBiasesPtr != nullptr);
+
+            auto hwTileBiasesBlob = ie::make_shared_blob<fp16_t>(InferenceEngine::TensorDesc(
+                    ie::Precision::FP16,
+                    {static_cast<size_t>(maxExtendedOutputDimC)},
+                    ie::Layout::C));
+            hwTileBiasesBlob->allocate();
+
+            auto hwTileBiasesBlobPtr = hwTileBiasesBlob->buffer().as<fp16_t*>();
+            IE_ASSERT(hwTileBiasesBlobPtr != nullptr);
+
+            std::fill_n(hwTileBiasesBlobPtr, maxExtendedOutputDimC, ie::PrecisionUtils::f32tof16(0.0f));
+            std::copy_n(origBiasesPtr, origOutputDimC, hwTileBiasesBlobPtr);
+
+            hwBiases = model->duplicateData(
+                    sio.origBiases,
+                    "@HW",
+                    DataDesc({maxExtendedOutputDimC}),
+                    ieBlobContent(hwTileBiasesBlob));
+
+            if (so.scaleFactor != 1.0f) {
+                auto hwBiasesScaled = model->duplicateData(
+                        hwBiases,
+                        formatString("@SCALE=%f", so.scaleFactor),
+                        hwBiases->desc(),
+                        scaleContent(hwBiases->content(), so.scaleFactor));
+                hwBiasesScaled->attrs().getOrSet<float>("scaleFactor", 1.0f) *= so.scaleFactor;
+
+                hwBiases = hwBiasesScaled;
+            }
+        }
+
+        tileWeightsMap[BIASES_IND] = hwBiases;
+    }
+
+    //
+    // Create HW scales
+    //
+
+    auto hwScales = tileWeightsMap[SCALES_IND];
+    if (hwScales == nullptr) {
+        float fullScale = 1.0f / so.scaleFactor;
+        if (tiling->socTiles == 1 && so.reluScale != 1.0f) {
+            fullScale *= so.reluScale;
+        }
+
+        if (fullScale == 1.0f) {
+            hwScales = model->addFakeData();
+        } else {
+            hwScales = model->addConstData(
+                    origStage->name() + "@scales",
+                    DataDesc({maxExtendedOutputDimC}),
+                    replicateContent(fullScale, maxExtendedOutputDimC));
+        }
+
+        tileWeightsMap[SCALES_IND] = hwScales;
+    }
+
+    //
+    // Create HW tiles
+    //
+
+    hwInputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+    hwInputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+    hwOutputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+    hwOutputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+
+    for (const auto& planeTile : tiling->planeTiles) {
+        auto planeTilePostfix = getPlaneTilePostfix(planeTile);
+
+        //
+        // Create output tile
+        //
+
+        Data hwOutputPlaneTile;
+
+        if (tiling->sohTiles == 1 && tiling->sowTiles == 1) {
+            hwOutputPlaneTile = hwOutput;
+        } else {
+            auto newDesc = hwOutput->desc();
+            newDesc.setDim(Dim::W, planeTile->widthInfo.outputEndIndex - planeTile->widthInfo.outputStartIndex);
+            newDesc.setDim(Dim::H, planeTile->heightInfo.outputEndIndex - planeTile->heightInfo.outputStartIndex);
+
+            hwOutputPlaneTile = model->duplicateData(
+                    hwOutput,
+                    planeTilePostfix,
+                    newDesc);
+
+            hwOutputTiles.emplace_back(hwOutputPlaneTile);
+            hwOutputTilesOffsets.emplace_back(
+                    DimValues({
+                                      {Dim::W, planeTile->widthInfo.outputStartIndex},
+                                      {Dim::H, planeTile->heightInfo.outputStartIndex}
+                              }));
+        }
+
+        //
+        // Add alignment to output tile if needed
+        //
+
+        if ((planeTile->widthInfo.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+            auto hwOutputPlaneTileAligned = model->duplicateData(
+                    hwOutputPlaneTile,
+                    "@aligned");
+
+            _stageBuilder->addCopyStage(
+                    model,
+                    origStage->name() + planeTilePostfix + "@align-output-ptr",
+                    origStage->origLayer(),
+                    hwOutputPlaneTileAligned,
+                    hwOutputPlaneTile);
+
+            hwOutputPlaneTile = hwOutputPlaneTileAligned;
+        }
+
+        Data prevPartialSum;
+
+        for (const auto& channelTile : planeTile->channelTiles) {
+            auto channelTilePostfix = getChannelTilePostfix(channelTile);
+
+            auto tilePostfix = planeTilePostfix + channelTilePostfix;
+
+            auto hwOutputTile = hwOutputPlaneTile;
+
+            //
+            // Create input tile
+            //
+
+            Data hwInputTile;
+
+            if (tiling->sohTiles == 1 && tiling->sowTiles == 1 && tiling->socTiles == 1) {
+                hwInputTile = hwInput;
+            } else {
+                auto newDesc = hwInput->desc();
+                newDesc.setDim(Dim::W, planeTile->widthInfo.inputWithJunk);
+                newDesc.setDim(Dim::H, planeTile->heightInfo.inputWithJunk);
+                newDesc.setDim(Dim::C, channelTile->extendedInputDimC);
+
+                hwInputTile = model->duplicateData(
+                        hwInput,
+                        tilePostfix,
+                        newDesc);
+
+                hwInputTiles.emplace_back(hwInputTile);
+                hwInputTilesOffsets.emplace_back(
+                        DimValues({
+                                          {Dim::W, planeTile->widthInfo.inputStartIndex},
+                                          {Dim::H, planeTile->heightInfo.inputStartIndex},
+                                          {Dim::C, channelTile->channelStartIndex}
+                                  }));
+            }
+
+            //
+            // Add alignment to input tile if needed
+            //
+
+            if ((planeTile->widthInfo.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+                auto hwInputTileAligned = model->duplicateData(
+                        hwInputTile,
+                        "@aligned");
+
+                _stageBuilder->addCopyStage(
+                        model,
+                        origStage->name() + tilePostfix + "@align-input-ptr",
+                        origStage->origLayer(),
+                        hwInputTile,
+                        hwInputTileAligned);
+
+                hwInputTile = hwInputTileAligned;
+            }
+
+            //
+            // Process partial output for split-over-channels
+            //
+
+            if (tiling->socTiles > 1) {
+                auto hwConvPartialOutput = model->duplicateData(
+                        hwOutputTile,
+                        channelTilePostfix + "@partial");
+
+                if (channelTile->socInd == 0) {
+                    prevPartialSum = hwConvPartialOutput;
+                } else {
+                    auto sumPartialOutput = hwOutputTile;
+                    if (channelTile->socInd < tiling->socTiles - 1 || so.withReLU || so.withClamp) {
+                        sumPartialOutput = model->duplicateData(
+                                hwOutputTile,
+                                channelTilePostfix + "@accum");
+                    }
+
+                    _stageBuilder->addSumStage(
+                            model,
+                            origStage->name() + tilePostfix + "@accum",
+                            origStage->origLayer(),
+                            prevPartialSum, hwConvPartialOutput,
+                            sumPartialOutput);
+
+                    if (channelTile->socInd == tiling->socTiles - 1 && so.withReLU) {
+                        _stageBuilder->addReLUStage(
+                                model,
+                                origStage->name() + tilePostfix + "@ReLU",
+                                origStage->origLayer(),
+                                so.negativeSlope,
+                                sumPartialOutput,
+                                hwOutputTile);
+                    }
+
+                    if (channelTile->socInd == tiling->socTiles - 1 && so.withClamp) {
+                        _stageBuilder->addClampStage(
+                                model,
+                                origStage->name() + tilePostfix + "@Clamp",
+                                origStage->origLayer(),
+                                0.0,
+                                so.clampMax,
+                                sumPartialOutput,
+                                hwOutputTile);
+                    }
+
+                    prevPartialSum = sumPartialOutput;
+                }
+
+                hwOutputTile = hwConvPartialOutput;
+            }
+
+            //
+            // Process output junk if needed
+            //
+
+            if (planeTile->heightInfo.outputJunkBefore != 0 ||
+                planeTile->heightInfo.outputJunkAfter != 0 ||
+                planeTile->widthInfo.outputJunkBefore != 0 ||
+                planeTile->widthInfo.outputJunkAfter != 0) {
+                auto newDesc = hwOutputTile->desc();
+                newDesc.setDim(Dim::W, planeTile->widthInfo.outputWithJunk);
+                newDesc.setDim(Dim::H, planeTile->heightInfo.outputWithJunk);
+
+                auto hwOutputTileWithJunk = model->duplicateData(
+                        hwOutputTile,
+                        "@with-junk",
+                        newDesc);
+
+                DimValues innerOffset;
+                innerOffset.set(Dim::W, planeTile->widthInfo.outputJunkBefore);
+                innerOffset.set(Dim::H, planeTile->heightInfo.outputJunkBefore);
+
+                _stageBuilder->addShrinkStage(
+                        model,
+                        origStage->name() + tilePostfix + "@remove-junk",
+                        origStage->origLayer(),
+                        hwOutputTileWithJunk,
+                        hwOutputTile,
+                        innerOffset);
+
+                hwOutputTile = hwOutputTileWithJunk;
+            }
+
+            //
+            // Create tile weights
+            //
+
+            auto hwTileWeights = tileWeightsMap[channelTile->socInd];
+
+            if (hwTileWeights == nullptr) {
+                hwTileWeights = model->duplicateData(
+                        sio.origWeights,
+                        "@HW" + channelTilePostfix,
+                        DataDesc({8, so.kernelSizeX * so.kernelSizeY, channelTile->extendedInputDimC, channelTile->extendedOutputDimC / 8}),
+                        std::make_shared<HwWeightsContent>(
+                                sio.origWeights->content(),
+                                sio.origWeights->desc(),
+                                channelTile->numInputChannels,
+                                channelTile->channelStartIndex));
+
+                if (so.scaleFactor != 1.0f) {
+                    auto hwTileWeightsScaled = model->duplicateData(
+                            hwTileWeights,
+                            formatString("@SCALE=%f", so.scaleFactor),
+                            hwTileWeights->desc(),
+                            scaleContent(hwTileWeights->content(), so.scaleFactor));
+                    hwTileWeightsScaled->attrs().getOrSet<float>("scaleFactor", 1.0f) *= so.scaleFactor;
+
+                    hwTileWeights = hwTileWeightsScaled;
+                }
+
+                tileWeightsMap[channelTile->socInd] = hwTileWeights;
+            }
+
+            //
+            // Create tile biases
+            //
+
+            Data hwTileBiases;
+
+            if (channelTile->socInd > 0) {
+                hwTileBiases = model->addFakeData();
+            } else {
+                hwTileBiases = hwBiases;
+            }
+
+            //
+            // Create HW stage for tile
+            //
+
+            auto hwOutputTileDims = hwOutputTile->desc().dims();
+            if (tileStageWithPool) {
+                hwOutputTileDims.set(Dim::W, hwOutputTileDims[Dim::W] * so.poolKernelStride - so.poolPadLeft - so.poolPadRight);
+                hwOutputTileDims.set(Dim::H, hwOutputTileDims[Dim::H] * so.poolKernelStride - so.poolPadTop - so.poolPadBottom);
+            }
+
+            auto hwPad = getHwPaddingInfo(
+                    hwInputTile->desc().dims(), hwOutputTileDims,
+                    so.kernelSizeX, so.kernelSizeY,
+                    so.kernelStride, so.kernelStride,
+                    so.padLeft, so.padTop);
+
+            auto hwStage = model->addNewStage<MyriadXHwStage>(
+                    origStage->name() + tilePostfix,
+                    StageType::MyriadXHwOp,
+                    origStage->origLayer(),
+                    {hwInputTile, hwTileWeights, hwTileBiases, hwScales},
+                    {hwOutputTile});
+
+            hwStage->attrs().set<HwOpType>("hwOpType", tileStageWithPool ? HwOpType::CONV_POOL : HwOpType::CONV);
+
+            hwStage->attrs().set<int>("kernelSizeX", so.kernelSizeX);
+            hwStage->attrs().set<int>("kernelSizeY", so.kernelSizeY);
+            hwStage->attrs().set<int>("kernelStride", so.kernelStride);
+
+            if (tileStageWithPool) {
+                hwStage->attrs().set<int>("poolKernelSizeX", so.poolKernelSizeX);
+                hwStage->attrs().set<int>("poolKernelSizeY", so.poolKernelSizeY);
+            }
+
+            hwStage->attrs().set<HwPaddingInfo>("pad", hwPad);
+
+            hwStage->attrs().set<HwConvTileInfo>("tiling", channelTile->finalTiles);
+
+            if (tiling->socTiles > 1) {
+                hwStage->attrs().set<bool>("withReLU", false);
+                hwStage->attrs().set<bool>("withClamp", false);
+            } else {
+                hwStage->attrs().set<bool>("withReLU", so.withReLU);
+                hwStage->attrs().set<uint32_t>("a0", so.a0);
+                hwStage->attrs().set<uint32_t>("a1", so.a1);
+                hwStage->attrs().set<float>("negativeSlope", so.negativeSlope);
+
+                hwStage->attrs().set<bool>("withClamp", so.withClamp);
+                hwStage->attrs().set<float>("clampMax", so.clampMax);
+            }
+
+            hwStage->attrs().set<float>("scaleFactor", so.scaleFactor);
+        }
+    }
+}
+
+}  // namespace vpu
index e0eaae6..351384e 100644 (file)
@@ -101,39 +101,31 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
-
-        _orderInfo.setInput(_inputEdges[0], input->desc().dimsOrder().createMovedDim(Dim::C, 2));
-        _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setInput(inputEdge(0), input->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setOutput(outputEdge(0), output->desc().dimsOrder().createMovedDim(Dim::C, 2));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto output = _outputEdges[0]->output();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto output = outputEdge(0)->output();
 
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement().add(1, DimStride::Aligned));
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement().add(1, DimStride::Aligned));
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
@@ -141,18 +133,15 @@ private:
     }
 
     void finalCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
@@ -273,10 +262,10 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        model->disconnectStageDatas(origStage);
+        model->disconnectStage(origStage);
 
         //
-        // Broadcast input/output if needed
+        // Expand input/output if needed
         //
 
         auto origInputDimC = hwInput->desc().dim(Dim::C);
@@ -291,9 +280,9 @@ void PassImpl::run(const Model::Ptr& model) {
                 "@extended",
                 newDesc);
 
-            _stageBuilder->addBroadcastStage(
+            _stageBuilder->addExpandStage(
                 model,
-                origStage->name() + "@broadcast-input",
+                origStage->name() + "@expand-input",
                 origStage->origLayer(),
                 hwInput,
                 hwInputExtended);
index f2acbdd..183c46d 100644 (file)
@@ -80,25 +80,19 @@ bool supportedPaddingPool(const Stage& stage) {
         kernelStride, kernelStride,
         padLeft, padTop);
 
-    bool originalUnsupportedPad = (
-        (padRight  != padLeft && padRight  != padLeft + 1)       ||
-        (padBottom != padTop  && padBottom != padTop + 1)        ||
-        (padLeft   != 0       && padLeft   != (kernelSizeX / 2)) ||
-        (padRight  != 0       && padRight  != (kernelSizeX / 2)) ||
-        (padTop    != 0       && padTop    != (kernelSizeY / 2)) ||
-        (padBottom != 0       && padBottom != (kernelSizeY / 2)));
-
-    bool hwUnsupportedPad = (
-        (hwInitialPad.right  != hwInitialPad.left && hwInitialPad.right  != hwInitialPad.left + 1) ||
-        (hwInitialPad.bottom != hwInitialPad.top  && hwInitialPad.bottom != hwInitialPad.top + 1)  ||
-        (hwInitialPad.left   != 0                 && hwInitialPad.left   != (kernelSizeX / 2))     ||
-        (hwInitialPad.right  != 0                 && hwInitialPad.right  != (kernelSizeX / 2))     ||
-        (hwInitialPad.top    != 0                 && hwInitialPad.top    != (kernelSizeY / 2))     ||
-        (hwInitialPad.bottom != 0                 && hwInitialPad.bottom != (kernelSizeY / 2)));
-
-    return !originalUnsupportedPad &&
-           !hwUnsupportedPad       &&
-           !forcePaddingStage;
+    //
+    // HW unit supports pooling with even-sized kernel with such asymmetrical paddings.
+    // But it does not support inverted paddings.
+    // For odd-sized kernels supported paddings are symmetrical.
+    //
+
+    bool isPadSupported =
+        (hwInitialPad.left   == 0 || hwInitialPad.left   == kernelSizeX / 2)       &&
+        (hwInitialPad.right  == 0 || hwInitialPad.right  == (kernelSizeX - 1) / 2) &&
+        (hwInitialPad.top    == 0 || hwInitialPad.top    == kernelSizeY / 2)       &&
+        (hwInitialPad.bottom == 0 || hwInitialPad.bottom == (kernelSizeY - 1) / 2);
+
+    return isPadSupported && !forcePaddingStage;
 }
 
 bool supportedPaddingConv(const Stage& stage) {
@@ -111,15 +105,20 @@ bool supportedPaddingConv(const Stage& stage) {
     auto padTop      = stage->attrs().get<int>("padTop");
     auto padBottom   = stage->attrs().get<int>("padBottom");
 
-    bool kernelIsOdd = kernelSizeX % 2 == 1 && kernelSizeY % 2 == 1;
+    //
+    // HW unit supports convolution with even-sized kernel with such asymmetrical paddings.
+    // But it does not support inverted paddings.
+    // For odd-sized kernels supported paddings are symmetrical.
+    //
+
     bool paddingsAreZeros = padLeft == 0 && padTop == 0 && padRight == 0 && padBottom == 0;
     bool paddingsAreSupported =
-        padLeft   == (kernelSizeX - 1) / 2 &&
-        padTop    == (kernelSizeY - 1) / 2 &&
-        padRight  == kernelSizeX / 2 &&
-        padBottom == kernelSizeY / 2;
+        padLeft   == kernelSizeX / 2 &&
+        padTop    == kernelSizeY / 2 &&
+        padRight  == (kernelSizeX - 1) / 2 &&
+        padBottom == (kernelSizeY - 1) / 2;
 
-    return paddingsAreZeros || (kernelIsOdd && paddingsAreSupported);
+    return paddingsAreZeros || paddingsAreSupported;
 }
 
 void insertPaddingStageBefore(const Model::Ptr& model, StageBuilder::Ptr& stageBuilder, const Stage& origStage) {
index da0011a..2c38e92 100644 (file)
 #include <vpu/hw/mx_stage.hpp>
 #include <vpu/hw/tiling.hpp>
 #include <vpu/hw/utility.hpp>
+#include <vpu/passes/hw_conv_tiling/hw_convolution_tiler.hpp>
+#include <vpu/passes/hw_pooling_tiling/hw_pooling_tiler.hpp>
+#include <vpu/passes/hw_pooling_tiling/hw_stage_tiler.hpp>
 
 namespace vpu {
 
 namespace {
 
-const int CHANS_PER_DESCR = 16;
-
-HwPoolTileInfo splitPooling(int outZ) {
-    HwPoolTileInfo tiles;
-    tiles.mode = HwOpMode::MODE_16_16;
-    tiles.numDescr = (outZ + CHANS_PER_DESCR - 1) / CHANS_PER_DESCR;
-    tiles.chansPerDescr = CHANS_PER_DESCR;
-    return tiles;
-}
-
-class Optimizer final {
-public:
-    Optimizer(const std::string& stageName,
-              const DimValues& inputDims, const DimValues& outputDims,
-              int kernelSizeX, int kernelSizeY,
-              int kernelStride,
-              int padLeft, int padRight,
-              int padTop, int padBottom)
-        : _stageName(stageName),
-          _inputDims(inputDims), _outputDims(outputDims),
-          _kernelSizeX(kernelSizeX), _kernelSizeY(kernelSizeY),
-          _kernelStride(kernelStride),
-          _padLeft(padLeft), _padRight(padRight),
-          _padTop(padTop), _padBottom(padBottom) {
-    }
-
-    bool optimize() {
-        initTileSizes();
-
-        if (!selectBestTile()) {
-            return false;
-        }
-
-        return createTiles();
-    }
-
-    const HwPoolTilingPtr& getTiling() const {
-        return _tiling;
-    }
-
-private:
-    void initTileSizes() {
-        int tempX = _inputDims[Dim::W] + _padLeft + _padRight  - _kernelSizeX;
-        int tempY = _inputDims[Dim::H] + _padTop  + _padBottom - _kernelSizeY;
-
-        int outWidthWithOutCeil  = (tempX + _kernelStride) / _kernelStride;
-        int outHeightWithOutCeil = (tempY + _kernelStride) / _kernelStride;
-
-        int outWidthWithCeil =  static_cast<int>(std::ceil(static_cast<double>(tempX) / _kernelStride + 1));
-        int outHeightWithCeil = static_cast<int>(std::ceil(static_cast<double>(tempY) / _kernelStride + 1));
-
-        if ((_outputDims[Dim::W] != outWidthWithCeil) && (_outputDims[Dim::W] != outWidthWithOutCeil)) {
-            VPU_THROW_EXCEPTION
-                    << "Internal error: Output in " << _stageName << " has incorrect width dimension. Expected: "
-                    << outWidthWithCeil << " or " << outWidthWithOutCeil << " Actual: " << _outputDims[Dim::W];
-        }
-
-        if ((_outputDims[Dim::H] != outHeightWithCeil) && (_outputDims[Dim::H] != outHeightWithOutCeil)) {
-            VPU_THROW_EXCEPTION
-                    << "Internal error: Output in " << _stageName << " has incorrect height dimension. Expected: "
-                    << outHeightWithCeil << " or " << outHeightWithOutCeil << " Actual: " << _outputDims[Dim::H];
-        }
-
-        if ((_outputDims[Dim::W] == outWidthWithCeil) && (_outputDims[Dim::H] == outHeightWithCeil)) {
-            _useCeil = true;
-        } else {
-            IE_ASSERT((_outputDims[Dim::W] == outWidthWithOutCeil) && (_outputDims[Dim::H] == outHeightWithOutCeil));
-        }
-
-        _inputTileDims.set(Dim::W, _inputDims[Dim::W]);
-        _inputTileDims.set(Dim::H, _inputDims[Dim::H]);
-        _inputTileDims.set(Dim::C, _inputDims[Dim::C]);
-        _inputTileDims.set(Dim::N, _inputDims.get(Dim::N, 1));
-
-        _outputTileDims.set(Dim::W, _outputDims[Dim::W]);
-        _outputTileDims.set(Dim::H, _outputDims[Dim::H]);
-        _outputTileDims.set(Dim::C, _outputDims[Dim::C]);
-        _outputTileDims.set(Dim::N, _outputDims.get(Dim::N, 1));
-    }
-
-    bool selectBestTile() {
-        struct Solution final {
-            int numWidthTiles = 0;
-            int numHeightTiles = 0;
-            int numBatchTiles = 0;
-            int totalNumTiles = 0;
-            double cost = std::numeric_limits<double>::max();
-        };
-
-        const auto& env = CompileEnv::get();
-
-        // TODO: estimate this numbers
-        const int maxNumWidthTiles = 15;
-        const int maxNumHeightTiles = 15;
-        const int maxNumBatchTiles = _outputDims.get(Dim::N, 1);
-
-        Solution bestSol;
-
-        auto outputTileCopy = _outputTileDims;
-
-        for (int numBatchTiles = 1; numBatchTiles <= maxNumBatchTiles; numBatchTiles++) {
-            //
-            // Filter-out misaligned SoN tiles.
-            //
-
-            if (outputTileCopy[Dim::N] % numBatchTiles != 0) {
-                continue;
-            }
-
-            auto tileDimN = outputTileCopy[Dim::N] / numBatchTiles;
-
-            for (int numWidthTiles = 1; numWidthTiles <= maxNumWidthTiles; numWidthTiles++) {
-                auto inputTileDimW = divUp(_inputDims[Dim::W], numWidthTiles);
-
-                //
-                // Filter-out too small SoW tiles.
-                //
-
-                if (numWidthTiles > 1 && (inputTileDimW < 8 || inputTileDimW < _kernelSizeX)) {
-                    break;
-                }
-
-                for (int numHeightTiles = 1; numHeightTiles <= maxNumHeightTiles ; numHeightTiles++) {
-                    auto inputTileDimH = divUp(_inputDims[Dim::H], numHeightTiles);
-
-                    //
-                    // Filter-out too small SoH tiles.
-                    //
-
-                    if (numHeightTiles > 1 && inputTileDimH < _kernelSizeY) {
-                        break;
-                    }
-
-                    //
-                    // Try current tile size.
-                    //
-
-                    _inputTileDims.set(Dim::W, inputTileDimW);
-                    _inputTileDims.set(Dim::H, inputTileDimH);
-                    _inputTileDims.set(Dim::N, tileDimN);
-
-                    _outputTileDims = outputTileCopy;
-                    _outputTileDims.set(Dim::N, tileDimN);
-                    correctOutputPlaneSize();
-
-                    //
-                    // Check that tiling is valid.
-                    //
-
-                    auto heightTiles = calcHeightTiles();
-                    auto widthTiles = calcWidthTiles();
-
-                    if (heightTiles.empty()) {
-                        continue;
-                    }
-                    if (widthTiles.empty()) {
-                        break;
-                    }
-
-                    bool isOK = true;
-                    double solutionCost = 0.0;
-
-                    for (const auto& heightTile : heightTiles) {
-                        for (const auto& widthTile : widthTiles) {
-                            //
-                            // Output tile fits to CMX limitation.
-                            //
-
-                            DimValues fullOutputTileDims;
-                            fullOutputTileDims.set(Dim::W, widthTile.outputWithJunk);
-                            fullOutputTileDims.set(Dim::H, heightTile.outputWithJunk);
-                            fullOutputTileDims.set(Dim::C, _outputTileDims[Dim::C]);
-                            fullOutputTileDims.set(Dim::N, _outputTileDims[Dim::N]);
-
-                            // TODO: support HCW
-                            if (calculateHwBufferSize(fullOutputTileDims) > env.resources.cmxLimit) {
-                                isOK = false;
-                                break;
-                            }
-
-                            //
-                            // `linesPerChan` restrictions.
-                            //
-
-                            if (heightTile.inputWithJunk < _kernelSizeY) {
-                                isOK = false;
-                                break;
-                            }
-
-                            const uint32_t LOCAL_RAM_SIZE = 128 * 1024;
-                            const uint32_t CMX_DATA_BIT_WIDTH = 128;
-
-                            uint32_t sizeOfBlock = LOCAL_RAM_SIZE >> static_cast<uint32_t>(HwOpMode::MODE_16_16);
-                            uint32_t bytesPerPixel = 1 << (1 - static_cast<uint32_t>(HwDataMode::FP16));
-                            uint32_t pixelsPerCMXLine = CMX_DATA_BIT_WIDTH / (bytesPerPixel * 8u);
-                            uint32_t localLineStride = (widthTile.inputWithJunk + (pixelsPerCMXLine - 1)) / pixelsPerCMXLine;
-                            uint32_t chanPerBlock = 1;
-                            uint32_t availableBytesPerChan = sizeOfBlock / chanPerBlock;
-                            uint32_t bytesPerLine = localLineStride * pixelsPerCMXLine * bytesPerPixel;
-                            uint32_t linesPerChan = availableBytesPerChan / bytesPerLine;
-                            if (linesPerChan < _kernelSizeY) {
-                                isOK = false;
-                                break;
-                            }
-
-                            //
-                            // Replicate padding in case of large input plane - #-16783.
-                            //
-
-                            DimValues fullInputTileDims;
-                            fullInputTileDims.set(Dim::W, widthTile.inputWithJunk);
-                            fullInputTileDims.set(Dim::H, heightTile.inputWithJunk);
-
-                            auto pad = getHwPaddingInfo(
-                                fullInputTileDims, fullOutputTileDims,
-                                _kernelSizeX, _kernelSizeY,
-                                _kernelStride, _kernelStride,
-                                _padLeft, _padTop);
-
-                            if (pad.enable && (pad.left > 0 || pad.right > 0 || pad.bottom > 0)) {
-                                int memPerPlane = alignVal(
-                                            fullInputTileDims[Dim::W], 8) * sizeof(fp16_t)
-                                          * ((fullInputTileDims[Dim::H] - 1) + (_kernelSizeY - 1));
-                                int memLimit = pad.bottom > 0 ? 0x800 : 0x1000;
-                                if (memPerPlane > memLimit) {
-                                    isOK = false;
-                                    break;
-                                }
-                            }
-
-                            //
-                            // Calc tile cost.
-                            //
-
-                            auto noOfBlocks = 1 << static_cast<int>(HwOpMode::MODE_16_16);
-                            solutionCost += 1.0
-                                  * ((_inputTileDims[Dim::C] * _inputTileDims[Dim::N]) / noOfBlocks) * _kernelSizeX * _kernelSizeY
-                                  * numBatchTiles;
-
-                            // Alignment for output
-                            if ((widthTile.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                                solutionCost += 1.0
-                                      * widthTile.outputWithJunk
-                                      * heightTile.outputWithJunk
-                                      * _outputTileDims[Dim::C]
-                                      * _outputTileDims[Dim::N];
-                            }
-
-                            // Alignment for input
-                            if ((widthTile.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                                solutionCost += 1.0
-                                      * widthTile.inputWithJunk
-                                      * heightTile.inputWithJunk
-                                      * _inputTileDims[Dim::C]
-                                      * _inputTileDims[Dim::N];
-                            }
-                        }
-
-                        if (!isOK) {
-                            break;
-                        }
-                    }
-
-                    if (!isOK) {
-                        continue;
-                    }
-
-                    //
-                    // Compare with current best solution.
-                    //
-
-                    Solution curSol;
-                    curSol.numWidthTiles = numWidthTiles;
-                    curSol.numHeightTiles = numHeightTiles;
-                    curSol.numBatchTiles = numBatchTiles;
-                    curSol.totalNumTiles = numWidthTiles * numHeightTiles * numBatchTiles;
-                    curSol.cost = solutionCost;
-
-                    if (curSol.cost < bestSol.cost || (isDoubleEqual(curSol.cost, bestSol.cost) && curSol.totalNumTiles < bestSol.totalNumTiles)) {
-                        bestSol = curSol;
-                    }
-                }
-            }
-        }
-
-        if (bestSol.totalNumTiles == 0) {
-            return false;
-        }
-
-        int inputTileDimW = divUp(_inputDims[Dim::W], bestSol.numWidthTiles);
-        int inputTileDimH = divUp(_inputDims[Dim::H], bestSol.numHeightTiles);
-        auto tileDimN = outputTileCopy[Dim::N] / bestSol.numBatchTiles;
-
-        _inputTileDims.set(Dim::W, inputTileDimW);
-        _inputTileDims.set(Dim::H, inputTileDimH);
-        _inputTileDims.set(Dim::N, tileDimN);
-
-        _outputTileDims = outputTileCopy;
-        _outputTileDims.set(Dim::N, tileDimN);
-        correctOutputPlaneSize();
-
-        return true;
-    }
-
-    bool createTiles() {
-        auto heightTiles = calcHeightTiles();
-        IE_ASSERT(!heightTiles.empty());
-
-        auto widthTiles = calcWidthTiles();
-        IE_ASSERT(!widthTiles.empty());
-
-        _tiling = std::make_shared<HwPoolTiling>();
-        _tiling->sohTiles = heightTiles.size();
-        _tiling->sowTiles = widthTiles.size();
-        _tiling->socTiles = divUp(_inputDims.get(Dim::N, 1), _inputTileDims[Dim::N]);
-
-        for (int sohInd = 0; sohInd < _tiling->sohTiles; ++sohInd) {
-            const auto& heightTileInfo = heightTiles[sohInd];
-
-            for (int sowInd = 0; sowInd < _tiling->sowTiles; ++sowInd) {
-                const auto& widthTileInfo = widthTiles[sowInd];
-
-                auto planeTile = std::make_shared<HwPoolPlaneTile>();
-                planeTile->parent = _tiling;
-
-                planeTile->sohInd = sohInd;
-                planeTile->sowInd = sowInd;
-
-                planeTile->heightInfo = heightTileInfo;
-                planeTile->widthInfo = widthTileInfo;
-
-                for (int socInd = 0; socInd < _tiling->socTiles; ++socInd) {
-                    auto channelTile = std::make_shared<HwPoolChannelTile>();
-                    channelTile->parent = planeTile;
-
-                    channelTile->socInd = socInd;
-
-                    channelTile->finalTiles = splitPooling(_inputTileDims[Dim::C] * _inputTileDims[Dim::N]);
-
-                    if (channelTile->finalTiles.numDescr == 0) {
-                        return false;
-                    }
-
-                    channelTile->channelStartIndex = socInd * _inputTileDims[Dim::N];
-                    channelTile->numInputChannels = _inputTileDims[Dim::N];
-
-                    planeTile->channelTiles.emplace_back(channelTile);
-                }
-
-                _tiling->planeTiles.emplace_back(planeTile);
-            }
-        }
-
-        return true;
-    }
-
-private:
-    void correctOutputPlaneSize() {
-        int maxOutputWidth = calcOutputSize(_inputTileDims[Dim::W], _kernelSizeX, _kernelStride, _padLeft, _padRight, _useCeil);
-        _outputTileDims.set(Dim::W, std::min(_outputTileDims[Dim::W], maxOutputWidth));
-
-        int maxOutputHeight = calcOutputSize(_inputTileDims[Dim::H], _kernelSizeY, _kernelStride, _padTop, _padBottom, _useCeil);
-        _outputTileDims.set(Dim::H, std::min(_outputTileDims[Dim::H], maxOutputHeight));
-    }
-
-    SmallVector<HwPlaneTileInfo> calcHeightTiles() {
-        SmallVector<HwPlaneTileInfo> heightTiles;
-
-        if (_outputTileDims[Dim::H] == _outputDims[Dim::H]) {
-            HwPlaneTileInfo info;
-            info.inputWithJunk = _inputDims[Dim::H];
-            info.outputWithJunk = _outputDims[Dim::H];
-            info.outputJunkBefore = 0;
-            info.outputJunkAfter = 0;
-            info.inputStartIndex = 0;
-            info.inputEndIndex = _inputDims[Dim::H];
-            info.outputStartIndex = 0;
-            info.outputEndIndex = _outputDims[Dim::H];
-
-            heightTiles.emplace_back(info);
-        } else {
-            heightTiles = splitIntoPlaneTiles(
-                _inputDims[Dim::H],
-                _outputDims[Dim::H],
-                _kernelSizeY,
-                _kernelStride,
-                _padTop, _padBottom,
-                _outputTileDims[Dim::H],
-                false,
-                _useCeil);
-        }
-
-        return heightTiles;
-    }
-
-    SmallVector<HwPlaneTileInfo> calcWidthTiles() {
-        SmallVector<HwPlaneTileInfo> widthTiles;
-
-        if (_outputTileDims[Dim::W] == _outputDims[Dim::W]) {
-            HwPlaneTileInfo info;
-            info.inputWithJunk = _inputDims[Dim::W];
-            info.outputWithJunk = _outputDims[Dim::W];
-            info.outputJunkBefore = 0;
-            info.outputJunkAfter = 0;
-            info.inputStartIndex = 0;
-            info.inputEndIndex = _inputDims[Dim::W];
-            info.outputStartIndex = 0;
-            info.outputEndIndex = _outputDims[Dim::W];
-
-            widthTiles.emplace_back(info);
-        } else {
-            widthTiles = splitIntoPlaneTiles(
-                _inputDims[Dim::W],
-                _outputDims[Dim::W],
-                _kernelSizeX,
-                _kernelStride,
-                _padLeft, _padRight,
-                _outputTileDims[Dim::W],
-                true,
-                _useCeil);
-        }
-
-        return widthTiles;
-    }
-
-private:
-    std::string _stageName;
-
-    DimValues _inputDims;
-    DimValues _outputDims;
-
-    int _kernelSizeX  = 0;
-    int _kernelSizeY  = 0;
-    int _kernelStride = 0;
-    int _padLeft   = 0;
-    int _padRight  = 0;
-    int _padTop    = 0;
-    int _padBottom = 0;
-
-    DimValues _inputTileDims;
-    DimValues _outputTileDims;
-
-    HwPoolTilingPtr _tiling;
-
-    bool _useCeil = false;
-};
-
 class PassImpl final : public Pass {
 public:
-    explicit PassImpl(const StageBuilder::Ptr& stageBuidler) : _stageBuidler(stageBuidler) {}
+    explicit PassImpl(const StageBuilder::Ptr& stageBuidler) : _stageBuilder(stageBuidler) {}
 
     void run(const Model::Ptr& model) override;
 
 private:
-    StageBuilder::Ptr _stageBuidler;
+    StageBuilder::Ptr _stageBuilder;
 };
 
-HwPaddingInfo getPoolPadding(const HwPlaneTilePtr<HwPoolTileInfo>& tile,
-                             const DimValues& dims,
-                             int kernelSizeX,
-                             int kernelSizeY,
-                             int kernelStrideX,
-                             int kernelStrideY,
-                             int padLeft,
-                             int padRight,
-                             int padTop,
-                             int padBottom) {
-    const auto& widthInfo  = tile->widthInfo;
-    const auto& heightInfo = tile->heightInfo;
-
-    auto padW = (widthInfo.outputWithJunk  - 1)*kernelStrideX + kernelSizeX - widthInfo.inputWithJunk;
-    auto padH = (heightInfo.outputWithJunk - 1)*kernelStrideY + kernelSizeY - heightInfo.inputWithJunk;
-
-    HwPaddingInfo pad;
-
-    pad.left   = padLeft;
-    pad.right  = (dims[Dim::W] <= widthInfo.inputEndIndex)  ? padRight  : padW - pad.left;
-    pad.top    = padTop;
-    pad.bottom = (dims[Dim::H] <= heightInfo.inputEndIndex) ? padBottom : padH - pad.top;
-
-    pad.enable = pad.left || pad.right || pad.top || pad.bottom;
-
-    return pad;
-}
-
 void PassImpl::run(const Model::Ptr& model) {
     VPU_PROFILE(hwPoolTiling);
 
@@ -525,51 +53,46 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        auto origInput = origStage->input(0);
-        auto origOutput = origStage->output(0);
-
-        auto kernelSizeX  = origStage->attrs().get<int>("kernelSizeX");
-        auto kernelSizeY  = origStage->attrs().get<int>("kernelSizeY");
-        auto kernelStride = origStage->attrs().get<int>("kernelStrideX");
-        auto padLeft   = origStage->attrs().get<int>("padLeft");
-        auto padRight  = origStage->attrs().get<int>("padRight");
-        auto padTop    = origStage->attrs().get<int>("padTop");
-        auto padBottom = origStage->attrs().get<int>("padBottom");
-
-        auto withReLU = origStage->attrs().getOrDefault<bool>("withReLU", false);
-
-        auto hwInput  = origInput;
-        auto hwOutput = origOutput;
+        const HWPoolStageOptions so(origStage);
+        const HWPoolStageIO sio(origStage, origStage->output(0));
 
         //
         // Try to find "best" tiling
         //
 
-        Optimizer opt(origStage->name(),
-                      hwInput->desc().dims(), hwOutput->desc().dims(),
-                      kernelSizeX, kernelSizeY,
-                      kernelStride,
-                      padLeft, padRight, padTop, padBottom);
-
-        if (!opt.optimize()) {
+        const size_t tilingsCount = 1;
+        const HWTilingNS::Direction direction =
+                HWTilingNS::Direction::INPUT_TO_OUTPUT;
+        // HWTilingNS::Direction::OUTPUT_TO_INPUT;
+
+        const HWTilingNS::HWPoolingTiler tiler(
+                HWTilingNS::ConvolutionOptions(origStage->name(),
+                     sio.origInput->desc().dims(), sio.origOutput->desc().dims(),
+                     sio.origOutput->desc().dims(),
+                     so.kernelSizeX, so.kernelSizeY,
+                     so.kernelStride,
+                     so.padLeft, so.padRight, so.padTop, so.padBottom, false),
+                direction, tilingsCount);
+
+        if (!tiler.isTilingPossible()) {
             origStage->attrs().set<bool>("tryHW", false);
 
-            auto swOutput = origOutput;
-            if (withReLU) {
+            auto swOutput = sio.origOutput;
+            if (so.withReLU) {
                 swOutput = model->addNewData(
                     origStage->name(),
-                    origOutput->desc());
-                swOutput->attrs().copyFrom(origOutput->attrs());
+                    sio.origOutput->desc());
+                swOutput->attrs().copyFrom(sio.origOutput->attrs());
 
                 model->replaceStageOutput(origStage->outputEdge(0), swOutput);
 
-                _stageBuidler->addReLUStage(
+                _stageBuilder->addReLUStage(
                     model,
                     origStage->name() + "@ReLU",
                     origStage->origLayer(),
                     0.0,
                     swOutput,
-                    origOutput);
+                    sio.origOutput);
             }
 
             continue;
@@ -579,211 +102,36 @@ void PassImpl::run(const Model::Ptr& model) {
         // Create HW tiles
         //
 
-        model->disconnectStageDatas(origStage);
-
-        const auto& tiling = opt.getTiling();
-
-        DataVector hwInputTiles;
-        std::vector<DimValues> hwInputTilesOffsets;
-
-        DataVector hwOutputTiles;
-        std::vector<DimValues> hwOutputTilesOffsets;
+        model->disconnectStage(origStage);
 
-        hwInputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
-        hwInputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
-        hwOutputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
-        hwOutputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
 
-        for (const auto& planeTile : tiling->planeTiles) {
-            for (const auto& channelTile : planeTile->channelTiles) {
-                auto tilePostfix = getPlaneTilePostfix(planeTile) + getChannelTilePostfix(channelTile);
-
-                //
-                // Create input tile
-                //
-
-                Data hwInputTile;
-
-                if (tiling->sohTiles == 1 && tiling->sowTiles == 1 && tiling->socTiles == 1) {
-                    hwInputTile = hwInput;
-                } else {
-                    auto newDesc = hwInput->desc();
-                    newDesc.setDim(Dim::W, planeTile->widthInfo.inputWithJunk);
-                    newDesc.setDim(Dim::H, planeTile->heightInfo.inputWithJunk);
-                    newDesc.setDim(Dim::N, channelTile->numInputChannels);
-
-                    hwInputTile = model->duplicateData(
-                        hwInput,
-                        tilePostfix,
-                        newDesc);
-
-                    hwInputTiles.emplace_back(hwInputTile);
-                    hwInputTilesOffsets.emplace_back(
-                        DimValues({
-                            {Dim::W, planeTile->widthInfo.inputStartIndex},
-                            {Dim::H, planeTile->heightInfo.inputStartIndex},
-                            {Dim::N, channelTile->channelStartIndex}
-                        }));
-                }
-
-                //
-                // Add alignement to input tile if needed
-                //
-
-                if ((planeTile->widthInfo.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                    auto hwInputTileAligned = model->duplicateData(
-                        hwInputTile,
-                        "@aligned");
-
-                    _stageBuidler->addCopyStage(
-                        model,
-                        origStage->name() + tilePostfix + "@align-input-ptr",
-                        origStage->origLayer(),
-                        hwInputTile,
-                        hwInputTileAligned);
-
-                    hwInputTile = hwInputTileAligned;
-                }
-
-                //
-                // Create output tile
-                //
-
-                Data hwOutputTile;
-
-                if (tiling->sohTiles == 1 && tiling->sowTiles == 1 && tiling->socTiles == 1) {
-                    hwOutputTile = hwOutput;
-                } else {
-                    auto newDesc = hwOutput->desc();
-                    newDesc.setDim(Dim::W, planeTile->widthInfo.outputEndIndex - planeTile->widthInfo.outputStartIndex);
-                    newDesc.setDim(Dim::H, planeTile->heightInfo.outputEndIndex - planeTile->heightInfo.outputStartIndex);
-                    newDesc.setDim(Dim::N, channelTile->numInputChannels);
-
-                    hwOutputTile = model->duplicateData(
-                        hwOutput,
-                        tilePostfix,
-                        newDesc);
-
-                    hwOutputTiles.emplace_back(hwOutputTile);
-                    hwOutputTilesOffsets.emplace_back(
-                        DimValues({
-                            {Dim::W, planeTile->widthInfo.outputStartIndex},
-                            {Dim::H, planeTile->heightInfo.outputStartIndex},
-                            {Dim::N, channelTile->channelStartIndex}
-                        }));
-                }
-
-                //
-                // Add alignement to output tile if needed
-                //
-
-                if ((planeTile->widthInfo.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
-                    auto hwOutputTileAligned = model->duplicateData(
-                        hwOutputTile,
-                        "@aligned");
+        for (const auto &tiling : tiler.getHwTilings()) {
+            HWPoolStageTiler hwStageTiler(so, sio, model,
+                                          origStage, _stageBuilder, tiling);
+            //
+            // Split/concat input/output tiles
+            //
 
-                    _stageBuidler->addCopyStage(
+            if (!hwStageTiler.hwInputTiles.empty()) {
+                _stageBuilder->addSplitStage(
                         model,
-                        origStage->name() + tilePostfix + "@align-output-ptr",
+                        origStage->name() + "@split-input",
                         origStage->origLayer(),
-                        hwOutputTileAligned,
-                        hwOutputTile);
-
-                    hwOutputTile = hwOutputTileAligned;
-                }
-
-                //
-                // Process output junk if needed
-                //
-
-                if (planeTile->heightInfo.outputJunkBefore != 0 ||
-                    planeTile->heightInfo.outputJunkAfter != 0 ||
-                    planeTile->widthInfo.outputJunkBefore != 0 ||
-                    planeTile->widthInfo.outputJunkAfter != 0) {
-                    auto newDesc = hwOutputTile->desc();
-                    newDesc.setDim(Dim::W, planeTile->widthInfo.outputWithJunk);
-                    newDesc.setDim(Dim::H, planeTile->heightInfo.outputWithJunk);
-
-                    auto hwOutputTileWithJunk = model->duplicateData(
-                        hwOutputTile,
-                        "@with-junk",
-                        newDesc);
-
-                    DimValues innerOffset;
-                    innerOffset.set(Dim::W, planeTile->widthInfo.outputJunkBefore);
-                    innerOffset.set(Dim::H, planeTile->heightInfo.outputJunkBefore);
+                        std::move(hwStageTiler.hwInputTilesOffsets),
+                        hwStageTiler.hwInput,
+                        hwStageTiler.hwInputTiles);
+            }
 
-                    _stageBuidler->addShrinkStage(
+            if (!hwStageTiler.hwOutputTiles.empty()) {
+                _stageBuilder->addConcatStage(
                         model,
-                        origStage->name() + tilePostfix + "@remove-junk",
+                        origStage->name() + "@concat-output",
                         origStage->origLayer(),
-                        hwOutputTileWithJunk,
-                        hwOutputTile,
-                        innerOffset);
-
-                    hwOutputTile = hwOutputTileWithJunk;
-                }
-
-                //
-                // Create HW stage for tile
-                //
-
-                auto hwPad = getPoolPadding(
-                    planeTile, hwInput->desc().dims(),
-                    kernelSizeX, kernelSizeY,
-                    kernelStride, kernelStride,
-                    padLeft, padRight, padTop, padBottom);
-
-                auto hwTileWeights = model->addFakeData();
-                auto hwTileBiases = model->addFakeData();
-                auto hwTileScales = model->addFakeData();
-
-                auto hwStage = model->addNewStage<MyriadXHwStage>(
-                    origStage->name() + tilePostfix,
-                    StageType::MyriadXHwOp,
-                    origStage->origLayer(),
-                    {hwInputTile, hwTileWeights, hwTileBiases, hwTileScales},
-                    {hwOutputTile});
-
-                hwStage->attrs().set<HwOpType>("hwOpType", HwOpType::POOL);
-                hwStage->attrs().set<HwPoolType>("poolType", origStage->type() == StageType::StubMaxPool ? HwPoolType::MAX : HwPoolType::AVERAGE);
-
-                hwStage->attrs().set<int>("kernelSizeX", kernelSizeX);
-                hwStage->attrs().set<int>("kernelSizeY", kernelSizeY);
-                hwStage->attrs().set<int>("kernelStride", kernelStride);
-
-                hwStage->attrs().set("pad", hwPad);
-
-                hwStage->attrs().set<HwPoolTileInfo>("tiling", channelTile->finalTiles);
-
-                hwStage->attrs().set<bool>("withReLU", withReLU);
+                        std::move(hwStageTiler.hwOutputTilesOffsets),
+                        hwStageTiler.hwOutputTiles,
+                        hwStageTiler.hwOutput);
             }
         }
-
-        //
-        // Split/concat input/output tiles
-        //
-
-        if (!hwInputTiles.empty()) {
-            _stageBuidler->addSplitStage(
-                model,
-                origStage->name() + "@split-input",
-                origStage->origLayer(),
-                std::move(hwInputTilesOffsets),
-                hwInput,
-                hwInputTiles);
-        }
-
-        if (!hwOutputTiles.empty()) {
-            _stageBuidler->addConcatStage(
-                model,
-                origStage->name() + "@concat-output",
-                origStage->origLayer(),
-                std::move(hwOutputTilesOffsets),
-                hwOutputTiles,
-                hwOutput);
-        }
-
         //
         // Remove SW stage
         //
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/hw_pooling_tiling/hw_pooling_tiler.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/hw_pooling_tiling/hw_pooling_tiler.cpp
new file mode 100644 (file)
index 0000000..4d3a5c9
--- /dev/null
@@ -0,0 +1,477 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+#include <memory>
+#include <utility>
+#include <vpu/passes/hw_pooling_tiling/hw_pooling_tiler.hpp>
+
+namespace vpu {
+
+namespace HWTilingNS {
+
+class PoolingInputToOutputDirection;
+class PoolingOutputToInputDirection;
+
+// Input -> Output case
+class PoolingInputToOutputDirection: public GraphDataTiling {
+public:
+    explicit PoolingInputToOutputDirection(const ConvolutionOptions &co): GraphDataTiling(co, Direction::INPUT_TO_OUTPUT) {}
+    PoolingInputToOutputDirection(const PoolingInputToOutputDirection &other): GraphDataTiling(other) {}
+    // ok
+    void initTileSizes() override {
+        _useCeil = ceilNeeded();
+
+        _inputTileDims.set(Dim::W, _co._inputDims[Dim::W]);
+        _inputTileDims.set(Dim::H, _co._inputDims[Dim::H]);
+        _inputTileDims.set(Dim::C, _co._inputDims[Dim::C]);
+        _inputTileDims.set(Dim::N, _co._inputDims.get(Dim::N, 1));
+
+        _outputTileDims.set(Dim::W, _co._outputDims[Dim::W]);
+        _outputTileDims.set(Dim::H, _co._outputDims[Dim::H]);
+        _outputTileDims.set(Dim::C, _co._outputDims[Dim::C]);
+        _outputTileDims.set(Dim::N, _co._outputDims.get(Dim::N, 1));
+    }
+
+    // Input -> Output case
+    // ok
+    void setInputNOutputTileDimensions(const int tileDimW, const int tileDimH, const int tileDimN) override {
+        _inputTileDims.set(Dim::W, tileDimW);
+        _inputTileDims.set(Dim::H, tileDimH);
+        _inputTileDims.set(Dim::N, tileDimN);
+
+        _outputTileDims.set(Dim::N, tileDimN);
+
+        correctOutputPlaneSize();
+    }
+
+    // Input -> Output case
+    // ..
+    void applyTilingOption(const TilingOption &tilingOption) override {
+        int tileDimW = divUp(_co._inputDims[Dim::W], tilingOption.numWidthTiles);
+        int tileDimH = divUp(_co._inputDims[Dim::H], tilingOption.numHeightTiles);
+        const int tileDimN = divUp(_co._inputDims[Dim::N], tilingOption.numChannelTiles);
+
+        tileDimW = divUp(tileDimW, _co._kernelStride) * _co._kernelStride;
+        tileDimH = divUp(tileDimH, _co._kernelStride) * _co._kernelStride;
+
+        _inputTileDims.set(Dim::W, tileDimW);
+        _inputTileDims.set(Dim::H, tileDimH);
+        _inputTileDims.set(Dim::N, tileDimN);
+
+        correctOutputPlaneSize();
+    }
+
+    void correctPlaneSize() override {
+        correctOutputPlaneSize();
+    }
+
+    void correctOutputPlaneSize() {
+        int maxOutputWidth = calcOutputSize(_inputTileDims[Dim::W], _co._kernelSizeX, _co._kernelStride, _co._paddingLeft, _co._paddingRight, _useCeil);
+        _outputTileDims.set(Dim::W, std::min(_outputTileDims[Dim::W], maxOutputWidth));
+
+        int maxOutputHeight = calcOutputSize(_inputTileDims[Dim::H], _co._kernelSizeY, _co._kernelStride, _co._paddingTop, _co._paddingBottom, _useCeil);
+        _outputTileDims.set(Dim::H, std::min(_outputTileDims[Dim::H], maxOutputHeight));
+    }
+
+    const DimValues &splitOverTensorDims() override {
+        return _co._inputDims;
+    }
+
+    void patternMatching() override {};
+
+private:
+    // ok
+    bool ceilNeeded() {
+        int tempX = _co._inputDims[Dim::W] + _co._paddingLeft + _co._paddingRight  - _co._kernelSizeX;
+        int tempY = _co._inputDims[Dim::H] + _co._paddingTop  + _co._paddingBottom - _co._kernelSizeY;
+
+        int outWidthWithOutCeil  = (tempX + _co._kernelStride) / _co._kernelStride;
+        int outHeightWithOutCeil = (tempY + _co._kernelStride) / _co._kernelStride;
+
+        int outWidthWithCeil =  static_cast<int>(std::ceil(static_cast<double>(tempX) / _co._kernelStride + 1));
+        int outHeightWithCeil = static_cast<int>(std::ceil(static_cast<double>(tempY) / _co._kernelStride + 1));
+
+        if ((_co._outputDims[Dim::W] != outWidthWithCeil) && (_co._outputDims[Dim::W] != outWidthWithOutCeil)) {
+            VPU_THROW_EXCEPTION
+                    << "Internal error: Output in " << _co._stageName << " has incorrect width dimension. Expected: "
+                    << outWidthWithCeil << " or " << outWidthWithOutCeil << " Actual: " << _co._outputDims[Dim::W];
+        }
+
+        if ((_co._outputDims[Dim::H] != outHeightWithCeil) && (_co._outputDims[Dim::H] != outHeightWithOutCeil)) {
+            VPU_THROW_EXCEPTION
+                    << "Internal error: Output in " << _co._stageName << " has incorrect height dimension. Expected: "
+                    << outHeightWithCeil << " or " << outHeightWithOutCeil << " Actual: " << _co._outputDims[Dim::H];
+        }
+
+        if ((_co._origOutputDims[Dim::W] == outWidthWithOutCeil) && (_co._origOutputDims[Dim::H] == outHeightWithOutCeil)) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+};
+
+HWPoolingTiler::HWPoolingTiler(const ConvolutionOptions &co,
+                                   Direction direction,
+                                   size_t maxTilingOptions) :
+    _co(co),
+    _searcher(_co, direction, maxTilingOptions) {
+    _tilingPossible = tileForHW();
+}
+
+bool HWPoolingTiler::tileForHW() {
+    const std::vector<TilingOption> &tilingOptions = _searcher.tilingOptions();
+    if (tilingOptions.empty()) {
+        return false;
+    }
+
+    for (const TilingOption &tilingOption : tilingOptions) {
+        const HWPoolingTileLayoutCut tileLayoutCut = _searcher.tileLayoutCut(tilingOption);
+        if (tileLayoutCut.tileCutPossible()) {
+            _hwTilings.push_back(tileLayoutCut.hwTiling());
+        }
+    }
+
+    return _hwTilings.size() != 0;
+}
+
+std::unique_ptr<GraphDataTiling> PoolGraphDataTilingFactory::makeDirTiling(const ConvolutionOptions &co,
+                                                                           Direction direction) {
+    if (direction == Direction::INPUT_TO_OUTPUT) {
+        return std::unique_ptr<GraphDataTiling>(new PoolingInputToOutputDirection(co));
+    // } else if (direction == Direction::OUTPUT_TO_INPUT) {
+    //     return std::unique_ptr<GraphDataTiling>(new PoolingOutputToInputDirection(co));
+    } else {
+        IE_ASSERT(false) << "Unsupported direction";
+    }
+}
+
+std::unique_ptr<GraphDataTiling> PoolGraphDataTilingFactory::makeDirTiling(const GraphDataTiling &o) {
+    if (o.getDirection() == Direction::INPUT_TO_OUTPUT) {
+        return std::unique_ptr<GraphDataTiling>(
+                new PoolingInputToOutputDirection(dynamic_cast<const PoolingInputToOutputDirection&>(o)));
+    // } else if (o.getDirection() == Direction::OUTPUT_TO_INPUT) {
+    //     return std::unique_ptr<GraphDataTiling>(
+    //             new PoolingOutputToInputDirection(dynamic_cast<const PoolingOutputToInputDirection&>(o)));
+    } else {
+        IE_ASSERT(false) << "Unsupported direction";
+    }
+}
+
+//
+// Looks for the optimal tiling accordingly to the cost function. Modifies dimensions in dirTiling during search.
+//
+std::vector<TilingOption> HWPoolingTilingSearcher::selectBetterTiling() const {
+    const auto& env = CompileEnv::get();
+    GraphDataTiling &dirTiling = *_dirTiling;
+    FixedMaxHeap<TilingOption> tilingOptions(_maxTilingOptions);
+
+    // TODO: estimate this numbers
+    const int maxNumWidthTiles = 15;
+    const int maxNumHeightTiles = 15;
+    const int maxNumBatchTiles = _co._outputDims.get(Dim::N, 1);
+
+    const auto outputTileInitial = dirTiling.getOutputTileDims();
+    const auto inputTileInitial = dirTiling.getInputTileDims();
+
+    const auto minInputTileDimW = std::max(8, _co._kernelSizeX);
+    const auto minInputTileDimH = _co._kernelSizeY;
+
+    //  const DimValues &splitOver = dirTiling.splitOverTensorDims();
+    const auto direction = dirTiling.getDirection();
+
+    for (int numBatchTiles = 1; numBatchTiles <= maxNumBatchTiles; numBatchTiles++) {
+        //
+        // Filter-out misaligned SoN tiles.
+        //
+
+        if (outputTileInitial[Dim::N] % numBatchTiles != 0) {
+            continue;
+        }
+
+        auto tileSizeDimN = outputTileInitial[Dim::N] / numBatchTiles;
+
+        for (int numWidthTiles = 1; numWidthTiles <= maxNumWidthTiles; numWidthTiles++) {
+            // const int tileSizeDimW = divUp(splitOver[Dim::W], numWidthTiles);
+            int tileSizeDimW = divUp(_co._inputDims[Dim::W], numWidthTiles);
+
+            //
+            // Filter-out too small SoW tiles.
+            //
+
+            if (numWidthTiles > 1 && direction == Direction::INPUT_TO_OUTPUT) {
+                tileSizeDimW = divUp(tileSizeDimW, _co._kernelStride) * _co._kernelStride;
+
+                if (tileSizeDimW < minInputTileDimW) {
+                    break;
+                }
+            }
+
+            for (int numHeightTiles = 1; numHeightTiles <= maxNumHeightTiles ; numHeightTiles++) {
+                // const int tileSizeDimH = divUp(splitOver[Dim::H], numHeightTiles);
+                int tileSizeDimH = divUp(_co._inputDims[Dim::H], numHeightTiles);
+
+                if (direction == Direction::INPUT_TO_OUTPUT) {
+                    tileSizeDimH = divUp(tileSizeDimH, _co._kernelStride) * _co._kernelStride;
+                }
+
+                //
+                // Filter-out too small SoH tiles.
+                //
+
+                if (numHeightTiles > 1 && direction == Direction::INPUT_TO_OUTPUT) {
+                    tileSizeDimH = divUp(tileSizeDimH, _co._kernelStride) * _co._kernelStride;
+
+                    if (tileSizeDimH < minInputTileDimH) {
+                        break;
+                    }
+                }
+
+                //
+                // Try current tile size.
+                //
+
+                dirTiling.resetInputTileDims(inputTileInitial);
+                dirTiling.resetOutputTileDims(outputTileInitial);
+
+                dirTiling.setInputNOutputTileDimensions(tileSizeDimW, tileSizeDimH, tileSizeDimN);
+
+
+                //
+                // Check that tiling is valid.
+                //
+
+                const auto heightTiles = calcHeightTilesP(_co, dirTiling.getOutputTileDims(),
+                                                         dirTiling.useCeil());
+                const auto widthTiles = calcWidthTilesP(_co, dirTiling.getOutputTileDims(), dirTiling.useCeil());
+
+                if (heightTiles.empty()) {
+                    continue;
+                }
+                if (widthTiles.empty()) {
+                    break;
+                }
+
+                bool isOK = true;
+                double solutionCost = 0.0;
+
+                for (const auto& heightTile : heightTiles) {
+                    for (const auto& widthTile : widthTiles) {
+                        //
+                        // Output tile fits to CMX limitation.
+                        //
+
+                        DimValues fullOutputTileDims;
+                        fullOutputTileDims.set(Dim::W, widthTile.outputWithJunk);
+                        fullOutputTileDims.set(Dim::H, heightTile.outputWithJunk);
+                        fullOutputTileDims.set(Dim::C, dirTiling.getOutputTileDims()[Dim::C]);
+                        fullOutputTileDims.set(Dim::N, dirTiling.getOutputTileDims()[Dim::N]);
+
+                        // TODO: support HCW
+                        if (calculateHwBufferSize(fullOutputTileDims) > env.resources.cmxLimit) {
+                            isOK = false;
+                            break;
+                        }
+
+                        //
+                        // `linesPerChan` restrictions.
+                        //
+
+                        if (heightTile.inputWithJunk < _co._kernelSizeY) {
+                            isOK = false;
+                            break;
+                        }
+
+                        if (!checkPoolingHWRestrictions(
+                                widthTile.inputWithJunk,
+                                heightTile.inputWithJunk,
+                                dirTiling.getInputTileDims()[Dim::C],
+                                dirTiling.getOutputTileDims()[Dim::C],
+                                _co._kernelSizeX, _co._kernelSizeY, _co._kernelStride)) {
+                            isOK = false;
+                            break;
+                        }
+
+                        //
+                        // Replicate padding in case of large input plane - #-16783.
+                        //
+
+                        DimValues fullInputTileDims;
+                        fullInputTileDims.set(Dim::W, widthTile.inputWithJunk);
+                        fullInputTileDims.set(Dim::H, heightTile.inputWithJunk);
+
+                        auto pad = getHwPaddingInfo(
+                                fullInputTileDims, fullOutputTileDims,
+                                _co._kernelSizeX, _co._kernelSizeY,
+                                _co._kernelStride, _co._kernelStride,
+                                _co._paddingLeft, _co._paddingTop);
+
+                        if (pad.enable && (pad.left > 0 || pad.right > 0 || pad.bottom > 0)) {
+                            int memPerPlane = alignVal(
+                                    fullInputTileDims[Dim::W], 8) * sizeof(fp16_t)
+                                              * ((fullInputTileDims[Dim::H] - 1) + (_co._kernelSizeY - 1));
+                            int memLimit = pad.bottom > 0 ? 0x800 : 0x1000;
+                            if (memPerPlane > memLimit) {
+                                isOK = false;
+                                break;
+                            }
+                        }
+
+                        //
+                        // Calc tile cost.
+                        //
+                        const auto& _inputTileDims = dirTiling.getInputTileDims();
+                        const auto& _outputTileDims = dirTiling.getOutputTileDims();
+                        auto chansPerBlock = 1 << static_cast<int>(HwOpMode::MODE_16_16);
+                        solutionCost += 1.0
+                                        * ((_inputTileDims[Dim::C] * _inputTileDims[Dim::N]) / chansPerBlock) * _co._kernelSizeX * _co._kernelSizeY
+                                        * numBatchTiles;
+
+                        // Alignment for output
+                        if ((widthTile.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+                            solutionCost += 1.0
+                                            * widthTile.outputWithJunk
+                                            * heightTile.outputWithJunk
+                                            * _outputTileDims[Dim::C]
+                                            * _outputTileDims[Dim::N];
+                        }
+
+                        // Alignment for input
+                        if ((widthTile.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+                            solutionCost += 1.0
+                                            * widthTile.inputWithJunk
+                                            * heightTile.inputWithJunk
+                                            * _inputTileDims[Dim::C]
+                                            * _inputTileDims[Dim::N];
+                        }
+                    }
+
+                    if (!isOK) {
+                        break;
+                    }
+                }
+
+                if (!isOK) {
+                    continue;
+                }
+
+                //
+                // Put to the pool of best options.
+                //
+
+                const int totalNumTiles = numWidthTiles * numHeightTiles * numBatchTiles;
+
+                const TilingOption to =
+                        {numWidthTiles, numHeightTiles, numBatchTiles, totalNumTiles, solutionCost};
+                tilingOptions.push(to);
+            }
+        }
+    }
+
+    const auto sorted = tilingOptions.sorted();
+
+    if (sorted.size() != 0) {
+        const TilingOption& best = sorted.front();
+        int inputTileDimW = divUp(_co._inputDims[Dim::W], best.numWidthTiles);
+        int inputTileDimH = divUp(_co._inputDims[Dim::H], best.numHeightTiles);
+        auto tileDimN = outputTileInitial[Dim::N] / best.numChannelTiles;
+
+        inputTileDimW = divUp(inputTileDimW, _co._kernelStride) * _co._kernelStride;
+        inputTileDimH = divUp(inputTileDimH, _co._kernelStride) * _co._kernelStride;
+
+        auto& _inputTileDims = dirTiling.getInputTileDims();
+        auto& _outputTileDims = dirTiling.getOutputTileDims();
+
+        _inputTileDims.set(Dim::W, inputTileDimW);
+        _inputTileDims.set(Dim::H, inputTileDimH);
+        _inputTileDims.set(Dim::N, tileDimN);
+
+        dirTiling.resetOutputTileDims(outputTileInitial);
+        _outputTileDims.set(Dim::N, tileDimN);
+
+        dirTiling.correctPlaneSize();
+    }
+
+    return sorted;
+}
+
+const vpu::HWTilingNS::HWPoolingTileLayoutCut HWPoolingTilingSearcher::tileLayoutCut(const TilingOption &option) const {
+    return HWPoolingTileLayoutCut(*_dirTiling, option);
+}
+
+SmallVector<HwPlaneTileInfo> calcHeightTilesP(const ConvolutionOptions &_co,
+                                              const DimValues &outputTileDims, bool useCeil) {
+    SmallVector<HwPlaneTileInfo> heightTiles;
+
+    if (outputTileDims[Dim::H] == _co._outputDims[Dim::H]) {
+        HwPlaneTileInfo info;
+        info.inputWithJunk = _co._inputDims[Dim::H];
+        info.outputWithJunk = _co._outputDims[Dim::H];
+        info.outputJunkBefore = 0;
+        info.outputJunkAfter = 0;
+        info.inputStartIndex = 0;
+        info.inputEndIndex = _co._inputDims[Dim::H];
+        info.outputStartIndex = 0;
+        info.outputEndIndex = _co._outputDims[Dim::H];
+
+        heightTiles.emplace_back(info);
+    } else {
+        heightTiles = splitIntoPlaneTiles(
+                _co._inputDims[Dim::H],
+                _co._outputDims[Dim::H],
+                _co._kernelSizeY,
+                _co._kernelStride,
+                _co._paddingTop, _co._paddingBottom,
+                outputTileDims[Dim::H],
+                useCeil);
+    }
+
+    return heightTiles;
+}
+
+SmallVector<HwPlaneTileInfo> calcWidthTilesP(const ConvolutionOptions &_co,
+                                             const DimValues &outputTileDims, bool useCeil) {
+    SmallVector<HwPlaneTileInfo> widthTiles;
+
+    if (outputTileDims[Dim::W] == _co._outputDims[Dim::W]) {
+        HwPlaneTileInfo info;
+        info.inputWithJunk = _co._inputDims[Dim::W];
+        info.outputWithJunk = _co._outputDims[Dim::W];
+        info.outputJunkBefore = 0;
+        info.outputJunkAfter = 0;
+        info.inputStartIndex = 0;
+        info.inputEndIndex = _co._inputDims[Dim::W];
+        info.outputStartIndex = 0;
+        info.outputEndIndex = _co._outputDims[Dim::W];
+
+        widthTiles.emplace_back(info);
+    } else {
+        widthTiles = splitIntoPlaneTiles(
+                _co._inputDims[Dim::W],
+                _co._outputDims[Dim::W],
+                _co._kernelSizeX,
+                _co._kernelStride,
+                _co._paddingLeft, _co._paddingRight,
+                outputTileDims[Dim::W],
+                useCeil);
+    }
+
+    return widthTiles;
+}
+
+HwPoolTileInfo splitPooling(int outZ) {
+    HwPoolTileInfo tiles;
+    tiles.mode = HwOpMode::MODE_16_16;
+    tiles.numDescr = (outZ + CHANS_PER_DESCR - 1) / CHANS_PER_DESCR;
+    tiles.chansPerDescr = CHANS_PER_DESCR;
+    return tiles;
+}
+
+}  // namespace HWTilingNS
+
+}  // namespace vpu
+
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/hw_pooling_tiling/hw_stage_tiler.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/hw_pooling_tiling/hw_stage_tiler.cpp
new file mode 100644 (file)
index 0000000..7a19a96
--- /dev/null
@@ -0,0 +1,234 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <vpu/passes/hw_pooling_tiling/hw_stage_tiler.hpp>
+
+#include <precision_utils.h>
+#include <tuple>
+#include <utility>
+#include <memory>
+#include <list>
+#include <string>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include <unordered_map>
+#include <set>
+
+#include <vpu/compile_env.hpp>
+#include <vpu/stub_stage.hpp>
+#include <vpu/hw/mx_stage.hpp>
+#include <vpu/hw/tiling.hpp>
+#include <vpu/hw/utility.hpp>
+#include <vpu/utils/attributes_map.hpp>
+
+namespace vpu {
+
+HwPaddingInfo getPoolPadding(const HwPlaneTilePtr<HwPoolTileInfo>& tile,
+                             const DimValues& dims,
+                             int kernelSizeX,
+                             int kernelSizeY,
+                             int kernelStrideX,
+                             int kernelStrideY,
+                             int padLeft,
+                             int padRight,
+                             int padTop,
+                             int padBottom) {
+    const auto& widthInfo  = tile->widthInfo;
+    const auto& heightInfo = tile->heightInfo;
+
+    auto padW = (widthInfo.outputWithJunk  - 1)*kernelStrideX + kernelSizeX - widthInfo.inputWithJunk;
+    auto padH = (heightInfo.outputWithJunk - 1)*kernelStrideY + kernelSizeY - heightInfo.inputWithJunk;
+
+    HwPaddingInfo pad;
+
+    pad.left   = padLeft;
+    pad.right  = (dims[Dim::W] <= widthInfo.inputEndIndex)  ? padRight  : padW - pad.left;
+    pad.top    = padTop;
+    pad.bottom = (dims[Dim::H] <= heightInfo.inputEndIndex) ? padBottom : padH - pad.top;
+
+    pad.enable = pad.left || pad.right || pad.top || pad.bottom;
+
+    return pad;
+}
+
+HWPoolStageTiler::HWPoolStageTiler(const HWPoolStageOptions &so, const HWPoolStageIO &sio,
+             const Model::Ptr &model, const Handle <StageNode> &origStage,
+             const StageBuilder::Ptr &stageBuilder, const HwPoolTilingPtr &tiling) {
+    hwInput = sio.origInput;
+    hwOutput = sio.origOutput;
+
+    hwInputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+    hwInputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+    hwOutputTiles.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+    hwOutputTilesOffsets.reserve(tiling->socTiles * tiling->sohTiles * tiling->sowTiles);
+
+    for (const auto& planeTile : tiling->planeTiles) {
+        for (const auto& channelTile : planeTile->channelTiles) {
+            auto tilePostfix = getPlaneTilePostfix(planeTile) + getChannelTilePostfix(channelTile);
+
+            //
+            // Create input tile
+            //
+
+            Data hwInputTile;
+
+            if (tiling->sohTiles == 1 && tiling->sowTiles == 1 && tiling->socTiles == 1) {
+                hwInputTile = hwInput;
+            } else {
+                auto newDesc = hwInput->desc();
+                newDesc.setDim(Dim::W, planeTile->widthInfo.inputWithJunk);
+                newDesc.setDim(Dim::H, planeTile->heightInfo.inputWithJunk);
+                newDesc.setDim(Dim::N, channelTile->numInputChannels);
+
+                hwInputTile = model->duplicateData(
+                        hwInput,
+                        tilePostfix,
+                        newDesc);
+
+                hwInputTiles.emplace_back(hwInputTile);
+                hwInputTilesOffsets.emplace_back(
+                        DimValues({
+                                          {Dim::W, planeTile->widthInfo.inputStartIndex},
+                                          {Dim::H, planeTile->heightInfo.inputStartIndex},
+                                          {Dim::N, channelTile->channelStartIndex}
+                                  }));
+            }
+
+            //
+            // Add alignement to input tile if needed
+            //
+
+            if ((planeTile->widthInfo.inputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+                auto hwInputTileAligned = model->duplicateData(
+                        hwInputTile,
+                        "@aligned");
+
+                stageBuilder->addCopyStage(
+                        model,
+                        origStage->name() + tilePostfix + "@align-input-ptr",
+                        origStage->origLayer(),
+                        hwInputTile,
+                        hwInputTileAligned);
+
+                hwInputTile = hwInputTileAligned;
+            }
+
+            //
+            // Create output tile
+            //
+
+            Data hwOutputTile;
+
+            if (tiling->sohTiles == 1 && tiling->sowTiles == 1 && tiling->socTiles == 1) {
+                hwOutputTile = hwOutput;
+            } else {
+                auto newDesc = hwOutput->desc();
+                newDesc.setDim(Dim::W, planeTile->widthInfo.outputEndIndex - planeTile->widthInfo.outputStartIndex);
+                newDesc.setDim(Dim::H, planeTile->heightInfo.outputEndIndex - planeTile->heightInfo.outputStartIndex);
+                newDesc.setDim(Dim::N, channelTile->numInputChannels);
+
+                hwOutputTile = model->duplicateData(
+                        hwOutput,
+                        tilePostfix,
+                        newDesc);
+
+                hwOutputTiles.emplace_back(hwOutputTile);
+                hwOutputTilesOffsets.emplace_back(
+                        DimValues({
+                                          {Dim::W, planeTile->widthInfo.outputStartIndex},
+                                          {Dim::H, planeTile->heightInfo.outputStartIndex},
+                                          {Dim::N, channelTile->channelStartIndex}
+                                  }));
+            }
+
+            //
+            // Add alignement to output tile if needed
+            //
+
+            if ((planeTile->widthInfo.outputStartIndex * sizeof(fp16_t)) % 16 != 0) {
+                auto hwOutputTileAligned = model->duplicateData(
+                        hwOutputTile,
+                        "@aligned");
+
+                stageBuilder->addCopyStage(
+                        model,
+                        origStage->name() + tilePostfix + "@align-output-ptr",
+                        origStage->origLayer(),
+                        hwOutputTileAligned,
+                        hwOutputTile);
+
+                hwOutputTile = hwOutputTileAligned;
+            }
+
+            //
+            // Process output junk if needed
+            //
+
+            if (planeTile->heightInfo.outputJunkBefore != 0 ||
+                planeTile->heightInfo.outputJunkAfter != 0 ||
+                planeTile->widthInfo.outputJunkBefore != 0 ||
+                planeTile->widthInfo.outputJunkAfter != 0) {
+                auto newDesc = hwOutputTile->desc();
+                newDesc.setDim(Dim::W, planeTile->widthInfo.outputWithJunk);
+                newDesc.setDim(Dim::H, planeTile->heightInfo.outputWithJunk);
+
+                auto hwOutputTileWithJunk = model->duplicateData(
+                        hwOutputTile,
+                        "@with-junk",
+                        newDesc);
+
+                DimValues innerOffset;
+                innerOffset.set(Dim::W, planeTile->widthInfo.outputJunkBefore);
+                innerOffset.set(Dim::H, planeTile->heightInfo.outputJunkBefore);
+
+                stageBuilder->addShrinkStage(
+                        model,
+                        origStage->name() + tilePostfix + "@remove-junk",
+                        origStage->origLayer(),
+                        hwOutputTileWithJunk,
+                        hwOutputTile,
+                        innerOffset);
+
+                hwOutputTile = hwOutputTileWithJunk;
+            }
+
+            //
+            // Create HW stage for tile
+            //
+
+            auto hwPad = getPoolPadding(
+                    planeTile, hwInput->desc().dims(),
+                    so.kernelSizeX, so.kernelSizeY,
+                    so.kernelStride, so.kernelStride,
+                    so.padLeft, so.padRight, so.padTop, so.padBottom);
+
+            auto hwTileWeights = model->addFakeData();
+            auto hwTileBiases = model->addFakeData();
+            auto hwTileScales = model->addFakeData();
+
+            auto hwStage = model->addNewStage<MyriadXHwStage>(
+                    origStage->name() + tilePostfix,
+                    StageType::MyriadXHwOp,
+                    origStage->origLayer(),
+                    {hwInputTile, hwTileWeights, hwTileBiases, hwTileScales},
+                    {hwOutputTile});
+
+            hwStage->attrs().set<HwOpType>("hwOpType", HwOpType::POOL);
+            hwStage->attrs().set<HwPoolType>("poolType", origStage->type() == StageType::StubMaxPool ? HwPoolType::MAX : HwPoolType::AVERAGE);
+
+            hwStage->attrs().set<int>("kernelSizeX", so.kernelSizeX);
+            hwStage->attrs().set<int>("kernelSizeY", so.kernelSizeY);
+            hwStage->attrs().set<int>("kernelStride", so.kernelStride);
+
+            hwStage->attrs().set("pad", hwPad);
+
+            hwStage->attrs().set<HwPoolTileInfo>("tiling", channelTile->finalTiles);
+
+            hwStage->attrs().set<bool>("withReLU", so.withReLU);
+        }
+    }
+}
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/initial_check.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/initial_check.cpp
new file mode 100644 (file)
index 0000000..cafcfc1
--- /dev/null
@@ -0,0 +1,28 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <vpu/pass_manager.hpp>
+
+#include <memory>
+
+namespace vpu {
+namespace {
+
+class PassImpl final : public Pass {
+public:
+    void run(const Model::Ptr& model) override {
+        VPU_PROFILE(initialCheck);
+
+        for (const auto& stage : model->getStages()) {
+            stage->initialCheck();
+        }
+    }
+};
+
+}  // namespace
+
+Pass::Ptr PassManager::initialCheck() {
+    return std::make_shared<PassImpl>();
+}
+
+}  // namespace vpu
index 58525dd..4972a1e 100644 (file)
@@ -50,7 +50,7 @@ void PassImpl::run(const Model::Ptr& model) {
     StageVector hwStages;
     std::list<Stage> swStages;
 
-    hwStages.reserve(model->numStages());
+    hwStages.reserve(checked_cast<size_t>(model->numStages()));
     for (const auto& stage : model->getStages()) {
         if (stage->category() == StageCategory::HW) {
             hwStages.emplace_back(stage);
@@ -66,10 +66,14 @@ void PassImpl::run(const Model::Ptr& model) {
     // Try to merge HW and SW stages
     //
 
+    StageVector swCandidates;
+
     for (const auto& hwStage : hwStages) {
-        for (const auto& swStage : swStages) {
-            model->buildStageOrder();
+        swCandidates.clear();
+
+        model->buildStageOrder();
 
+        for (const auto& swStage : swStages) {
             auto hwInd = hwStage->index();
             IE_ASSERT(hwInd >= 0);
 
@@ -115,10 +119,12 @@ void PassImpl::run(const Model::Ptr& model) {
                 }
             }
 
-            if (!isOK) {
-                continue;
+            if (isOK) {
+                swCandidates.push_back(swStage);
             }
+        }
 
+        for (const auto& swStage : swCandidates) {
             //
             // Try to inject and check allocation, if it is failed -> revert
             //
index 1ca839d..a26eb1c 100644 (file)
@@ -138,7 +138,7 @@ void PassImpl::run(const Model::Ptr& model) {
             if (isOK) {
                 output = nextPostOpStage->output(0);
 
-                model->disconnectStageDatas(nextPostOpStage);
+                model->disconnectStage(nextPostOpStage);
 
                 model->replaceStageOutput(stage->outputEdge(0), output);
 
@@ -175,7 +175,7 @@ void PassImpl::run(const Model::Ptr& model) {
                 if (auto nextPoolStage = getNextPoolStage(stage, output)) {
                     output = nextPoolStage->output(0);
 
-                    model->disconnectStageDatas(nextPoolStage);
+                    model->disconnectStage(nextPoolStage);
 
                     model->replaceStageOutput(stage->outputEdge(0), output);
 
index 128868f..b49b5c1 100644 (file)
@@ -3,6 +3,7 @@
 //
 
 #include <vpu/pass_manager.hpp>
+#include <vpu/special_stage_processor.hpp>
 
 #include <memory>
 #include <string>
@@ -19,47 +20,42 @@ namespace {
 
 class PassImpl final : public Pass {
 public:
-    explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
+    explicit PassImpl(const StageBuilder::Ptr& stageBuilder) :
+            _stageBuilder(stageBuilder), _processor(stageBuilder) {}
 
     void run(const Model::Ptr& model) override;
 
 private:
-    void processConcat(const Model::Ptr& model, const Stage& stage);
-    void processSplit(const Model::Ptr& model, const Stage& stage);
-    void processReshape(const Model::Ptr& model, const Stage& stage);
-    void processBroadcast(const Model::Ptr& model, const Stage& stage);
-    void processShrink(const Model::Ptr& model, const Stage& stage);
-
-private:
     StageBuilder::Ptr _stageBuilder;
+    SpecialStageProcessor _processor;
 };
 
 void PassImpl::run(const Model::Ptr& model) {
     VPU_PROFILE(processSpecialStages);
 
     //
-    // Merge multiple Broadcast stages applied to the same input.
+    // Merge multiple Expand stages applied to the same input.
     //
 
-    for (const auto& curBroadcastStage : model->getStages()) {
-        if (curBroadcastStage == nullptr) {
+    for (const auto& curExpandStage : model->getStages()) {
+        if (curExpandStage == nullptr) {
             continue;
         }
 
-        if (curBroadcastStage->type() != StageType::Broadcast) {
+        if (curExpandStage->type() != StageType::Expand) {
             continue;
         }
 
-        auto input = curBroadcastStage->input(0);
-        auto output = curBroadcastStage->output(0);
+        auto input = curExpandStage->input(0);
+        auto output = curExpandStage->output(0);
 
         bool hasDuplicates = false;
         for (const auto& inputConsumer : input->consumers()) {
-            if (inputConsumer->type() != StageType::Broadcast) {
+            if (inputConsumer->type() != StageType::Expand) {
                 continue;
             }
 
-            if (inputConsumer == curBroadcastStage) {
+            if (inputConsumer == curExpandStage) {
                 continue;
             }
 
@@ -83,11 +79,11 @@ void PassImpl::run(const Model::Ptr& model) {
         }
 
         for (const auto& inputConsumer : input->consumers()) {
-            if (inputConsumer->type() != StageType::Broadcast) {
+            if (inputConsumer->type() != StageType::Expand) {
                 continue;
             }
 
-            if (inputConsumer == curBroadcastStage) {
+            if (inputConsumer == curExpandStage) {
                 continue;
             }
 
@@ -111,552 +107,17 @@ void PassImpl::run(const Model::Ptr& model) {
         }
 
         if (stage->type() == StageType::Concat) {
-            processConcat(model, stage);
+            _processor.processConcat(model, stage);
         } else if (stage->type() == StageType::Split) {
-            processSplit(model, stage);
+            _processor.processSplit(model, stage);
         } else if (stage->type() == StageType::Reshape) {
-            processReshape(model, stage);
-        } else if (stage->type() == StageType::Broadcast) {
-            processBroadcast(model, stage);
+            _processor.processReshape(model, stage);
+        } else if (stage->type() == StageType::Expand) {
+            _processor.processExpand(model, stage);
         } else if (stage->type() == StageType::Shrink) {
-            processShrink(model, stage);
-        }
-    }
-}
-
-void PassImpl::processConcat(const Model::Ptr& model, const Stage& stage) {
-    auto output = stage->output(0);
-
-    const auto& offsets = stage->attrs().get<std::vector<DimValues>>("offsets");
-    IE_ASSERT(offsets.size() == stage->numInputs());
-
-    for (const auto& inEdge : stage->inputEdges()) {
-        IE_ASSERT(inEdge->portInd() >= 0);
-        IE_ASSERT(inEdge->portInd() < offsets.size());
-
-        auto input = inEdge->input();
-        const auto& offsetFromOutput = offsets[inEdge->portInd()];
-
-        IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
-        IE_ASSERT(offsetFromOutput.size() <= output->desc().numDims());
-        for (const auto& p : offsetFromOutput) {
-            IE_ASSERT(output->desc().dimsOrder().hasDim(p.first));
-            IE_ASSERT(p.second + input->desc().dim(p.first) <= output->desc().dim(p.first));
-        }
-
-        //
-        // Check if we need to insert Copy stage
-        //
-
-        bool needCopy = false;
-        bool optionalCopy = false;
-        if (input->usage() != DataUsage::Intermediate) {
-            needCopy = true;
-            optionalCopy = false;
-        } else if (input->parentDataEdge() != nullptr) {
-            needCopy = true;
-            optionalCopy = false;
-        } else {
-            //
-            // Check input StridesRequirement.
-            //
-
-            IE_ASSERT(input->checkStrides(input->requiredStrides()));
-            if (!checkStrides(input->desc(), output->strides(), input->requiredStrides())) {
-                needCopy = true;
-                optionalCopy = false;
-            }
-
-            //
-            // Check consumers StridesRequirement.
-            //
-
-            if (!needCopy) {
-                for (const auto& consumerEdge : input->consumerEdges()) {
-                    const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
-
-                    if (consumerInfo.hasInput(consumerEdge)) {
-                        const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
-                        IE_ASSERT(input->checkStrides(consumerStrideReqs));
-
-                        if (!checkStrides(input->desc(), output->strides(), consumerStrideReqs)) {
-                            needCopy = true;
-                            optionalCopy = false;
-                        }
-                    }
-                }
-            }
-
-            //
-            // Check producer StridesRequirement.
-            //
-
-            if (!needCopy) {
-                if (auto producerEdge = input->producerEdge()) {
-                    const auto& producerInfo = producerEdge->producer()->getDataStridesRequirements();
-
-                    if (producerInfo.hasOutput(producerEdge)) {
-                        const auto& producerStrideReqs = producerInfo.getOutput(producerEdge);
-                        IE_ASSERT(input->checkStrides(producerStrideReqs));
-
-                        if (!checkStrides(input->desc(), output->strides(), producerStrideReqs)) {
-                            needCopy = true;
-                            optionalCopy = false;
-                        }
-                    }
-
-                    if (!needCopy) {
-                        //
-                        // To reduce the size of HW output (still can be optimized).
-                        //
-
-                        if (producerEdge->producer()->category() == StageCategory::HW) {
-                            needCopy = true;
-                            optionalCopy = true;
-                        }
-                    }
-                }
-            }
-        }
-
-        //
-        // Insert Copy if needed
-        //
-
-        if (needCopy) {
-            Data inputCopy;
-            if (input->usage() == DataUsage::Const) {
-                inputCopy = model->addNewData(
-                    input->name() + "@copy",
-                    input->desc());
-            } else {
-                inputCopy = model->duplicateData(
-                    input,
-                    "@copy");
-                inputCopy->resetRequiredStrides();
-            }
-
-            auto copyStage = _stageBuilder->addCopyStage(
-                model,
-                formatString("%s@input=%d@copy-for-concat", stage->name(), inEdge->portInd()),
-                stage->origLayer(),
-                input,
-                inputCopy);
-            copyStage->attrs().set<bool>("optional", optionalCopy);
-
-            model->replaceStageInput(inEdge, inputCopy);
-
-            input = inputCopy;
-        }
-
-        //
-        // Add Data<->Data edge
-        //
-
-        model->connectDatas()
-                .parent(output)
-                .child(input)
-                .mode(SharedDataMode::ROI)
-                .order(SharedDataOrder::ChildWritesToParent)
-                .offset(offsetFromOutput)
-                .done();
-    }
-}
-
-void PassImpl::processSplit(const Model::Ptr& model, const Stage& stage) {
-    auto input = stage->input(0);
-
-    const auto& offsets = stage->attrs().get<std::vector<DimValues>>("offsets");
-    IE_ASSERT(offsets.size() == stage->numOutputs());
-
-    for (const auto& outEdge : stage->outputEdges()) {
-        IE_ASSERT(outEdge->portInd() >= 0);
-        IE_ASSERT(outEdge->portInd() < offsets.size());
-
-        auto output = outEdge->output();
-        const auto& offsetFromInput = offsets[outEdge->portInd()];
-
-        IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
-        IE_ASSERT(offsetFromInput.size() <= input->desc().numDims());
-        for (const auto& p : offsetFromInput) {
-            IE_ASSERT(input->desc().dimsOrder().hasDim(p.first));
-            IE_ASSERT(p.second + output->desc().dim(p.first) <= input->desc().dim(p.first));
-        }
-
-        //
-        // Check if we need to insert Copy stage
-        //
-
-        bool needCopy = false;
-        if (output->usage() != DataUsage::Intermediate) {
-            needCopy = true;
-        } else if (output->parentDataEdge() != nullptr) {
-            needCopy = true;
-        } else {
-            //
-            // Check output StridesRequirement.
-            //
-
-            IE_ASSERT(output->checkStrides(output->requiredStrides()));
-            if (!checkStrides(output->desc(), input->strides(), output->requiredStrides())) {
-                needCopy = true;
-            }
-
-            //
-            // Check consumers StridesRequirement.
-            //
-
-            if (!needCopy) {
-                for (const auto& consumerEdge : output->consumerEdges()) {
-                    const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
-
-                    if (consumerInfo.hasInput(consumerEdge)) {
-                        const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
-                        IE_ASSERT(output->checkStrides(consumerStrideReqs));
-
-                        if (!checkStrides(output->desc(), input->strides(), consumerStrideReqs)) {
-                            needCopy = true;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        //
-        // Insert Copy if needed
-        //
-
-        if (needCopy) {
-            auto outputCopy = model->duplicateData(
-                output,
-                "@copy");
-            outputCopy->resetRequiredStrides();
-
-            auto outPortInd = outEdge->portInd();
-
-            model->replaceStageOutput(outEdge, outputCopy);
-
-            _stageBuilder->addCopyStage(
-                model,
-                formatString("%s@output=%d@copy-for-split", stage->name(), outPortInd),
-                stage->origLayer(),
-                outputCopy,
-                output);
-
-            output = outputCopy;
-        }
-
-        //
-        // Add Data<->Data edge
-        //
-
-        model->connectDatas()
-                .parent(input)
-                .child(output)
-                .mode(SharedDataMode::ROI)
-                .order(SharedDataOrder::ParentWritesToChild)
-                .offset(offsetFromInput)
-                .done();
-    }
-}
-
-void PassImpl::processReshape(const Model::Ptr& model, const Stage& stage) {
-    auto input = stage->input(0);
-    auto output = stage->output(0);
-
-    IE_ASSERT(input->desc().dimsOrder() == DimsOrder::fromNumDims(input->desc().numDims()));
-    IE_ASSERT(input->checkStrides(StridesRequirement::compact()));
-
-    IE_ASSERT(output->desc().dimsOrder() == DimsOrder::fromNumDims(output->desc().numDims()));
-    IE_ASSERT(output->checkStrides(StridesRequirement::compact()));
-
-    //
-    // Check if we need to insert Copy stage
-    //
-
-    bool needCopy = false;
-    if (input->usage() != DataUsage::Intermediate &&
-        output->usage() != DataUsage::Intermediate) {
-        needCopy = true;
-    } else if (input->parentDataEdge() != nullptr &&
-               output->parentDataEdge() != nullptr) {
-        needCopy = true;
-    }
-
-    //
-    // Insert Copy if needed
-    //
-
-    if (needCopy) {
-        Data inputCopy;
-        if (input->usage() == DataUsage::Const) {
-            inputCopy = model->addNewData(
-                input->name() + "@copy",
-                input->desc());
-        } else {
-            inputCopy = model->duplicateData(
-                input,
-                "@copy");
-        }
-        inputCopy->updateRequiredStrides(StridesRequirement::compact());
-
-        _stageBuilder->addCopyStage(
-            model,
-            formatString("%s@copy-for-reshape", stage->name()),
-            stage->origLayer(),
-            input,
-            inputCopy);
-
-        model->replaceStageInput(stage->inputEdge(0), inputCopy);
-
-        input = inputCopy;
-    }
-
-    //
-    // Add Data<->Data edge
-    //
-
-    if (input->usage() == DataUsage::Intermediate &&
-        input->parentDataEdge() == nullptr) {
-        model->connectDatas()
-                .parent(output)
-                .child(input)
-                .mode(SharedDataMode::Reshape)
-                .order(SharedDataOrder::ChildWritesToParent)
-                .done();
-    } else {
-        IE_ASSERT(output->usage() == DataUsage::Intermediate);
-        IE_ASSERT(output->parentDataEdge() == nullptr);
-
-        model->connectDatas()
-                .parent(input)
-                .child(output)
-                .mode(SharedDataMode::Reshape)
-                .order(SharedDataOrder::ParentWritesToChild)
-                .done();
-    }
-}
-
-void PassImpl::processBroadcast(const Model::Ptr& model, const Stage& stage) {
-    auto input = stage->input(0);
-    auto output = stage->output(0);
-
-    const auto& offset = stage->attrs().get<DimValues>("offset");
-
-    IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
-
-    IE_ASSERT(offset.size() <= output->desc().numDims());
-    for (const auto& p : offset) {
-        IE_ASSERT(output->desc().dimsOrder().hasDim(p.first));
-        IE_ASSERT(p.second + input->desc().dim(p.first) <= output->desc().dim(p.first));
-    }
-
-    //
-    // Check if we need to insert Copy stage
-    //
-
-    bool needCopy = false;
-    bool optionalCopy = false;
-    if (input->usage() != DataUsage::Intermediate) {
-        needCopy = true;
-        optionalCopy = false;
-    } else if (input->parentDataEdge() != nullptr) {
-        needCopy = true;
-        optionalCopy = false;
-    } else {
-        //
-        // Check input StridesRequirement.
-        //
-
-        IE_ASSERT(input->checkStrides(input->requiredStrides()));
-        if (!checkStrides(input->desc(), output->strides(), input->requiredStrides())) {
-            needCopy = true;
-            optionalCopy = false;
-        }
-
-        //
-        // Check consumers StridesRequirement.
-        //
-
-        if (!needCopy) {
-            for (const auto& consumerEdge : input->consumerEdges()) {
-                const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
-
-                if (consumerInfo.hasInput(consumerEdge)) {
-                    const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
-                    IE_ASSERT(input->checkStrides(consumerStrideReqs));
-
-                    if (!checkStrides(input->desc(), output->strides(), consumerStrideReqs)) {
-                        needCopy = true;
-                        optionalCopy = false;
-                    }
-                }
-            }
-        }
-
-        //
-        // Check producer StridesRequirement.
-        //
-
-        if (!needCopy) {
-            if (auto producerEdge = input->producerEdge()) {
-                const auto& producerInfo = producerEdge->producer()->getDataStridesRequirements();
-
-                if (producerInfo.hasOutput(producerEdge)) {
-                    const auto& producerStrideReqs = producerInfo.getOutput(producerEdge);
-                    IE_ASSERT(input->checkStrides(producerStrideReqs));
-
-                    if (!checkStrides(input->desc(), output->strides(), producerStrideReqs)) {
-                        needCopy = true;
-                        optionalCopy = false;
-                    }
-                }
-
-                if (!needCopy) {
-                    //
-                    // To reduce the size of HW output (still can be optimized).
-                    //
-
-                    if (producerEdge->producer()->category() == StageCategory::HW) {
-                        needCopy = true;
-                        optionalCopy = true;
-                    }
-                }
-            }
+            _processor.processShrink(model, stage);
         }
     }
-
-    //
-    // Insert Copy if needed
-    //
-
-    if (needCopy) {
-        Data inputCopy;
-        if (input->usage() == DataUsage::Const) {
-            inputCopy = model->addNewData(
-                input->name() + "@copy",
-                input->desc());
-        } else {
-            inputCopy = model->duplicateData(
-                input,
-                "@copy");
-            inputCopy->resetRequiredStrides();
-        }
-
-        auto copyStage = _stageBuilder->addCopyStage(
-            model,
-            formatString("%s@copy-for-broadcast", stage->name()),
-            stage->origLayer(),
-            input,
-            inputCopy);
-        copyStage->attrs().set<bool>("optional", optionalCopy);
-
-        model->replaceStageInput(stage->inputEdge(0), inputCopy);
-
-        input = inputCopy;
-    }
-
-    //
-    // Add Data<->Data edge
-    //
-
-    model->connectDatas()
-            .parent(output)
-            .child(input)
-            .mode(SharedDataMode::ROI)
-            .order(SharedDataOrder::ChildWritesToParent)
-            .offset(offset)
-            .done();
-}
-
-void PassImpl::processShrink(const Model::Ptr& model, const Stage& stage) {
-    auto input = stage->input(0);
-    auto output = stage->output(0);
-
-    const auto& offset = stage->attrs().get<DimValues>("offset");
-
-    IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
-
-    IE_ASSERT(offset.size() <= input->desc().numDims());
-    for (const auto& p : offset) {
-        IE_ASSERT(input->desc().dimsOrder().hasDim(p.first));
-        IE_ASSERT(p.second + output->desc().dim(p.first) <= input->desc().dim(p.first));
-    }
-
-    //
-    // Check if we need to insert Copy for output
-    //
-
-    bool needCopy = false;
-    if (output->usage() != DataUsage::Intermediate) {
-        needCopy = true;
-    } else if (output->parentDataEdge() != nullptr) {
-        needCopy = true;
-    } else {
-        //
-        // Check output StridesRequirement.
-        //
-
-        IE_ASSERT(output->checkStrides(output->requiredStrides()));
-        if (!checkStrides(output->desc(), input->strides(), output->requiredStrides())) {
-            needCopy = true;
-        }
-
-        //
-        // Check consumers StridesRequirement.
-        //
-
-        if (!needCopy) {
-            for (const auto& consumerEdge : output->consumerEdges()) {
-                const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
-
-                if (consumerInfo.hasInput(consumerEdge)) {
-                    const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
-                    IE_ASSERT(output->checkStrides(consumerStrideReqs));
-
-                    if (!checkStrides(output->desc(), input->strides(), consumerStrideReqs)) {
-                        needCopy = true;
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    //
-    // Insert output Copy if needed
-    //
-
-    if (needCopy) {
-        auto outputCopy = model->duplicateData(
-            output,
-            "@copy");
-        outputCopy->resetRequiredStrides();
-
-        model->replaceStageOutput(stage->outputEdge(0), outputCopy);
-
-        _stageBuilder->addCopyStage(
-            model,
-            formatString("%s@copy-output-for-shrink", stage->name()),
-            stage->origLayer(),
-            outputCopy,
-            output);
-
-        output = outputCopy;
-    }
-
-    //
-    // Add Data<->Data edge
-    //
-
-    model->connectDatas()
-            .parent(input)
-            .child(output)
-            .mode(SharedDataMode::ROI)
-            .order(SharedDataOrder::ParentWritesToChild)
-            .offset(offset)
-            .done();
 }
 
 }  // namespace
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/remove_unused_stages_outputs.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/remove_unused_stages_outputs.cpp
new file mode 100644 (file)
index 0000000..4793fcd
--- /dev/null
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/pass_manager.hpp>
+
+#include <set>
+#include <memory>
+
+#include <vpu/sw/utility.hpp>
+
+namespace vpu {
+
+namespace {
+
+class PassImpl final : public Pass {
+public:
+    explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
+
+    void run(const Model::Ptr& model) override;
+
+private:
+    StageBuilder::Ptr _stageBuilder;
+};
+
+void PassImpl::run(const Model::Ptr& model) {
+    VPU_PROFILE(removeUnusedStagesOutputs);
+
+    for (const auto& stage : model->getStages()) {
+        if (stage == nullptr || ((stage->type() != StageType::LSTMCell) && (stage->type() != StageType::TopK))) {
+            continue;
+        }
+
+        for (const auto& outEdge : stage->outputEdges()) {
+            auto output = outEdge->output();
+
+            if (output->usage() == DataUsage::Intermediate && output->numConsumers() == 0) {
+                model->replaceStageOutput(outEdge, model->addFakeData());
+            }
+        }
+    }
+}
+
+}  // namespace
+
+Pass::Ptr PassManager::removeUnusedStagesOutputs() {
+    return std::make_shared<PassImpl>(_stageBuilder);
+}
+
+}  // namespace vpu
index 2044a18..f8381d2 100644 (file)
@@ -34,34 +34,26 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _orderInfo.setInput(_inputEdges[0], DimsOrder::NCHW);
-        _orderInfo.setOutput(_outputEdges[0], DimsOrder::NCHW);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        orderInfo.setInput(inputEdge(0), DimsOrder::NCHW);
+        orderInfo.setOutput(outputEdge(0), DimsOrder::NCHW);
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement().add(1, DimStride::Aligned));
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement().add(1, DimStride::Aligned));
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
@@ -69,6 +61,7 @@ private:
     }
 
     void finalCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -94,12 +87,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
@@ -110,10 +99,8 @@ private:
 class DeconvolutionToConvolutionContent final : public CalculatedDataContent {
 public:
     DeconvolutionToConvolutionContent(
-            const DataContent::Ptr& origContent,
-            int kernelSizeX, int kernelSizeY) :
-            CalculatedDataContent({origContent}),
-            _kerneSizeX(kernelSizeX), _kernelSizeY(kernelSizeY) {
+            const DataContent::Ptr& origContent) :
+            CalculatedDataContent({origContent}) {
     }
 
     void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const {
@@ -124,10 +111,6 @@ public:
 
         deconv_to_conv(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
     }
-
-private:
-    int _kerneSizeX;
-    int _kernelSizeY;
 };
 
 
@@ -202,7 +185,7 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        model->disconnectStageDatas(stage);
+        model->disconnectStage(stage);
 
         DataDesc newDesc({1, 1, output->desc().dim(Dim::C), output->desc().dim(Dim::N)});
         newDesc.setDim(Dim::N, input->desc().dim(Dim::N));
@@ -212,7 +195,7 @@ void PassImpl::run(const Model::Ptr& model) {
 
         auto newOutput = model->duplicateData(output, "@upsampleData", newDesc);
         auto newWeights = model->duplicateData(weights, "@upsampleData", weights->desc(),
-                     std::make_shared<DeconvolutionToConvolutionContent>(weights->content(), kernelSizeX, kernelSizeY));
+                     std::make_shared<DeconvolutionToConvolutionContent>(weights->content()));
 
         auto upsampleStage = model->addNewStage<UpsamplingStage>(
                 stage->origLayerName() + "@Upsample",
index b2e7431..72ffb1e 100644 (file)
@@ -22,21 +22,24 @@ namespace vpu {
 
 namespace {
 
-using ReplicatedDataMap = std::unordered_map<int, Data>;
-
 void setConvParameters(const vpu::Stage& stage, int kX, int kY) {
-    stage->attrs().set<int>("kernelSizeX", kX);
-    stage->attrs().set<int>("kernelSizeY", kY);
-    stage->attrs().set<int>("kernelStrideX", 1);
-    stage->attrs().set<int>("kernelStrideY", 1);
-    stage->attrs().set<int>("padLeft", 0);
-    stage->attrs().set<int>("padRight", 0);
-    stage->attrs().set<int>("padTop", 0);
-    stage->attrs().set<int>("padBottom", 0);
-    stage->attrs().set<int>("dilationX", 1);
-    stage->attrs().set<int>("dilationY", 1);
-    stage->attrs().set<int>("groupSize", 1);
-    stage->attrs().set<bool>("tryHW", true);
+    stage->attrs().set("kernelSizeX", kX);
+    stage->attrs().set("kernelSizeY", kY);
+
+    stage->attrs().set("kernelStrideX", kX);
+    stage->attrs().set("kernelStrideY", kY);
+
+    stage->attrs().set("padLeft", 0);
+    stage->attrs().set("padRight", 0);
+    stage->attrs().set("padTop", 0);
+    stage->attrs().set("padBottom", 0);
+
+    stage->attrs().set("dilationX", 1);
+    stage->attrs().set("dilationY", 1);
+
+    stage->attrs().set("groupSize", 1);
+
+    stage->attrs().set("tryHW", true);
 }
 
 class PassImpl final : public Pass {
@@ -57,129 +60,202 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        auto tryHW = stage->attrs().getOrDefault<bool>("tryHW", false);
+        const auto tryHW = stage->attrs().getOrDefault<bool>("tryHW", false);
         if (!tryHW) {
             continue;
         }
 
-        auto input = stage->input(0);
-        auto weights = stage->input(1);
-        auto biases  = stage->input(2);
-        auto output = stage->output(0);
-
-        auto dims = input->desc().dims();
-
-        if (input->desc().numDims() == 4) {
-            bool required = dims.has(Dim::N);
-            required &= dims.has(Dim::C);
-            required &= dims.has(Dim::H);
-            required &= dims.has(Dim::W);
-
-            if (required &&
-                input->desc().dim(Dim::H, 1) < 16 &&
-                input->desc().dim(Dim::W, 1) < 16) {
-                /* can convert to convolution layers */
-                model->disconnectStageDatas(stage);
-
-                auto kernelSizeX = input->desc().dim(Dim::W, 1);
-                auto kernelSizeY = input->desc().dim(Dim::H, 1);
-                IE_ASSERT(weights->desc().totalDimSize() >=
-                        kernelSizeX * kernelSizeY * (input->desc().dim(Dim::C)) * output->desc().dim(Dim::C));
-
-                auto newWeights = model->duplicateData(
-                    weights,
-                    "",
-                    DataDesc({
-                        kernelSizeX,
-                        kernelSizeY,
-                        input->desc().dim(Dim::C),
-                        output->desc().dim(Dim::C)}));
-
-                auto newBiases = model->addFakeData();
-                if (biases->usage() != DataUsage::Fake) {
-                    IE_ASSERT(biases->desc().totalDimSize() >= output->desc().dim(Dim::C));
-                    newBiases = model->duplicateData(biases,
-                        biases->name(),
-                        DataDesc({output->desc().dim(Dim::C)}));
-                }
+        const auto input = stage->input(0);
+        const auto weights = stage->input(1);
+        const auto biases  = stage->input(2);
+        const auto output = stage->output(0);
+
+        const auto inDims = input->desc().dims();
 
-                DataDesc newDesc({1, 1, output->desc().dim(Dim::C), output->desc().dim(Dim::N)});
-                auto newOutput = model->duplicateData(output, "@reshapeData", newDesc);
-
-                auto newStage = model->addNewStage<StubStage>(
-                    stage->origLayerName(),
-                    StageType::StubConv,
-                    stage->origLayer(),
-                    {input, newWeights, newBiases},
-                    {newOutput});
-                newStage->attrs().copyFrom(stage->attrs());
-                setConvParameters(newStage, kernelSizeX, kernelSizeY);
-
-                _stageBuilder->addReshapeStage(
-                    model,
-                    stage->name() + "@reshapeOut",
-                    stage->origLayer(),
-                    newOutput,
-                    output);
-
-                model->removeStage(stage);
+        if (inDims.size() != 2 && inDims.size() != 4) {
+            continue;
+        }
+
+        const auto inBatch = inDims[Dim::N];
+        const auto inSize  = input->desc().totalDimSize() / inBatch;
+
+        IE_ASSERT(output->desc().dim(Dim::N) == inBatch);
+
+        // HW restriction for kernel stride (we use stride equal to kernel size).
+        const int maxKernelSize = 8;
+
+        // TODO: something more sophisticated?
+        int convKernelSizeX = -1;
+        int convKernelSizeY = -1;
+        for (int k = maxKernelSize; k >= 1; --k) {
+            if (inSize >= (k * k) && inSize % (k * k) == 0 && isPowerOfTwo(inSize / (k * k))) {
+                convKernelSizeX = k;
+                convKernelSizeY = k;
+                break;
             }
-        } else if (dims.has(Dim::N) &&
-                   dims.has(Dim::C) &&
-                   (!dims.has(Dim::H)) &&
-                   (!dims.has(Dim::W))) {
-            IE_ASSERT(weights->desc().totalDimSize() >=
-                    (input->desc().dim(Dim::C)) * output->desc().dim(Dim::C));
-
-            model->disconnectStageDatas(stage);
-
-            auto newWeights = model->duplicateData(weights,
-                weights->name(),
-                DataDesc({
-                    1,
-                    1,
-                    input->desc().dim(Dim::C),
-                    output->desc().dim(Dim::C)}));
-
-            auto newBiases =  model->addFakeData();
-            if (biases->usage() != DataUsage::Fake) {
-                IE_ASSERT(biases->desc().totalDimSize() >= output->desc().dim(Dim::C));
-                newBiases = model->duplicateData(biases,
-                                                  biases->name(),
-                                                  DataDesc({output->desc().dim(Dim::C)}));
+        }
+        if (convKernelSizeX == -1 || convKernelSizeY == -1) {
+            for (int k = maxKernelSize; k >= 1; --k) {
+                if (inSize >= (k * k) && inSize % (k * k) == 0) {
+                    convKernelSizeX = k;
+                    convKernelSizeY = k;
+                    break;
+                }
+            }
+        }
+
+        if (convKernelSizeX == -1 || convKernelSizeY == -1) {
+            continue;
+        }
+
+        const auto convInputC = inSize / (convKernelSizeX * convKernelSizeY);
+
+        model->disconnectStage(stage);
+
+        // TODO: something more sophisticated?
+        int batchStepW = 1;
+        int batchStepH = 1;
+        for (auto div : {100, 50, 20, 10}) {
+            if (inBatch >= div && inBatch % div == 0) {
+                batchStepW = div;
+                batchStepH = inBatch / div;
+                break;
             }
+        }
 
-            DataDesc newDescIn({1, 1, input->desc().dim(Dim::C), input->desc().dim(Dim::N)});
-            auto newInput = model->duplicateData(output, "@reshapeDataIn", newDescIn);
+        Data convInput;
+        if (batchStepW == 1 && batchStepH == 1) {
+            convInput = model->duplicateData(
+                input,
+                "@reshape",
+                DataDesc{convKernelSizeX, convKernelSizeY, convInputC, inBatch});
 
-            DataDesc newDescOut({1, 1, output->desc().dim(Dim::C), output->desc().dim(Dim::N)});
-            auto newOutput = model->duplicateData(output, "@reshapeDataOut", newDescOut);
+            _stageBuilder->addReshapeStage(
+                model,
+                convInput->name(),
+                stage->origLayer(),
+                input,
+                convInput);
+        } else {
+            // NCDHW
+            const auto reshaped = model->duplicateData(
+                input,
+                "@reshape",
+                DataDesc{convKernelSizeX, convKernelSizeY, convInputC, batchStepW, batchStepH});
 
             _stageBuilder->addReshapeStage(
                 model,
-                stage->name() + "@reshapeIn",
+                reshaped->name(),
                 stage->origLayer(),
                 input,
-                newInput);
+                reshaped);
+
+            // NCDHW
+            const auto permuted = model->duplicateData(
+                input,
+                "@permute-batch",
+                DataDesc{convKernelSizeX, batchStepW, convKernelSizeY, batchStepH, convInputC});
 
-            auto newStage = model->addNewStage<StubStage>(
-                stage->origLayerName(),
-                StageType::StubConv,
+            _stageBuilder->addPermuteStage(
+                model,
+                permuted->name(),
                 stage->origLayer(),
-                {newInput, newWeights, newBiases},
-                {newOutput});
-            newStage->attrs().copyFrom(stage->attrs());
-            setConvParameters(newStage, 1, 1);
+                reshaped,
+                permuted,
+                DimValues_<Dim>{{Dim::W, Dim::W}, {Dim::H, Dim::C}, {Dim::D, Dim::H}, {Dim::C, Dim::N}, {Dim::N, Dim::D}});
+
+            // NCHW
+            const auto merged = model->duplicateData(
+                input,
+                "@merge-batch",
+                DataDesc{convKernelSizeX * batchStepW, convKernelSizeY * batchStepH, convInputC, 1});
 
             _stageBuilder->addReshapeStage(
                 model,
-                stage->name() + "@reshapeOut",
+                merged->name(),
                 stage->origLayer(),
-                newOutput,
+                permuted,
+                merged);
+
+            convInput = merged;
+        }
+
+        Data convOutput;
+        if (batchStepW == 1 && batchStepH == 1) {
+            convOutput = model->duplicateData(
+                output,
+                "@reshape",
+                DataDesc{1, 1, output->desc().dim(Dim::C), inBatch});
+
+            _stageBuilder->addReshapeStage(
+                model,
+                convOutput->name(),
+                stage->origLayer(),
+                convOutput,
                 output);
+        } else {
+            // NCDHW
+            const auto reshaped = model->duplicateData(
+                output,
+                "@reshape",
+                DataDesc{1, 1, output->desc().dim(Dim::C), batchStepW, batchStepH});
 
-            model->removeStage(stage);
+            _stageBuilder->addReshapeStage(
+                model,
+                reshaped->name(),
+                stage->origLayer(),
+                reshaped,
+                output);
+
+            // NCDHW
+            const auto permuted = model->duplicateData(
+                output,
+                "@permute-batch",
+                DataDesc{1, batchStepW, 1, batchStepH, output->desc().dim(Dim::C)});
+
+            _stageBuilder->addPermuteStage(
+                model,
+                permuted->name(),
+                stage->origLayer(),
+                permuted,
+                reshaped,
+                DimValues_<Dim>{{Dim::W, Dim::W}, {Dim::H, Dim::D}, {Dim::D, Dim::N}, {Dim::C, Dim::H}, {Dim::N, Dim::C}});
+
+            // NCHW
+            const auto merged = model->duplicateData(
+                output,
+                "@merge-batch",
+                DataDesc{batchStepW, batchStepH, output->desc().dim(Dim::C), 1});
+
+            _stageBuilder->addReshapeStage(
+                model,
+                merged->name(),
+                stage->origLayer(),
+                merged,
+                permuted);
+
+            convOutput = merged;
         }
+
+        const auto convWeights = model->duplicateData(
+            weights,
+            "@fc-to-conv",
+            DataDesc({
+                convKernelSizeX,
+                convKernelSizeY,
+                convInputC,
+                output->desc().dim(Dim::C)}));
+
+        auto convStage = model->addNewStage<StubStage>(
+            stage->name() + "@fc-to-conv",
+            StageType::StubConv,
+            stage->origLayer(),
+            {convInput, convWeights, biases},
+            {convOutput});
+        convStage->attrs().copyFrom(stage->attrs());
+        setConvParameters(convStage, convKernelSizeX, convKernelSizeY);
+
+        model->removeStage(stage);
     }
 }
 
index 0a1fb47..36976ea 100644 (file)
@@ -66,6 +66,10 @@ void PassImpl::run(const Model::Ptr& model) {
         auto padTop = stage->attrs().get<int>("padTop");
         auto padBottom = stage->attrs().get<int>("padBottom");
 
+        auto scaleFactor = stage->attrs().getOrDefault<float>("scaleFactor", 1.0f);
+
+        IE_ASSERT(dilationX >= 1);
+        IE_ASSERT(dilationY >= 1);
         if (dilationX <= 1 && dilationY <= 1) {
             continue;
         }
@@ -75,17 +79,13 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        if ((padTop != padBottom) || (padLeft != padRight)) {
-            stage->attrs().set<bool>("tryHW", false);
-            continue;
-        }
-
-        if ((dilationX != dilationY) || (dilationX != 2)) {
+        if (((padLeft % dilationX) !=0) || ((padTop % dilationY) !=0)) {
             stage->attrs().set<bool>("tryHW", false);
             continue;
         }
 
-        if ((kernelStrideX != 1) || (kernelStrideY != 1)) {
+        if ((std::max(dilationX, kernelStrideX) % std::min(dilationX, kernelStrideX)) ||
+                (std::max(dilationY, kernelStrideY) % std::min(dilationY, kernelStrideY))) {
             stage->attrs().set<bool>("tryHW", false);
             continue;
         }
@@ -94,7 +94,6 @@ void PassImpl::run(const Model::Ptr& model) {
         auto weights = stage->input(1);
         auto biases = stage->input(2);
         auto output = stage->output(0);
-        auto input_org = input;
 
         if (input->desc().dim(Dim::N) > 1) {
             stage->attrs().set<bool>("tryHW", false);
@@ -107,9 +106,7 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        bool Expand_mark = false;
-        // TODO
-        const bool Use_pixel_alignment = false;
+        const bool Use_pixel_alignment = true;
         int pixel_stride_alignment = STRIDE_ALIGNMENT
                 / input->desc().elemSize();
         int InputExtended_width = input->desc().dim(Dim::W);
@@ -119,15 +116,9 @@ void PassImpl::run(const Model::Ptr& model) {
             InputExtended_width = divUp(input->desc().dim(Dim::W),
                     dilationX * pixel_stride_alignment) * dilationX
                     * pixel_stride_alignment;
-            InputExtended_height = divUp(input->desc().dim(Dim::H),
-                    dilationY * pixel_stride_alignment) * dilationY
-                    * pixel_stride_alignment;
-        } else if ((divUp(input->desc().dim(Dim::W), dilationX)
-                < pixel_stride_alignment)
-                || (divUp(input->desc().dim(Dim::H), dilationY)
-                        < pixel_stride_alignment)) {
-            InputExtended_width = pixel_stride_alignment * dilationX;
-            InputExtended_height = pixel_stride_alignment * dilationY;
+
+            InputExtended_height = divUp(input->desc().dim(Dim::H), dilationY)
+                    * dilationY;
         } else {
             InputExtended_width = divUp(input->desc().dim(Dim::W), dilationX)
                     * dilationX;
@@ -135,12 +126,6 @@ void PassImpl::run(const Model::Ptr& model) {
                     * dilationY;
         }
 
-        if ((((InputExtended_width % pixel_stride_alignment) == 0) && (InputExtended_width % (dilationX * pixel_stride_alignment) != 0))
-                || (((InputExtended_height % pixel_stride_alignment) == 0) && (InputExtended_height % (dilationX * dilationY) != 0))) {
-            stage->attrs().set<bool>("tryHW", false);
-            continue;
-        }
-
         float InputExtended_scale = std::max(
                 static_cast<float>(InputExtended_width)
                         / static_cast<float>(input->desc().dim(Dim::W)),
@@ -150,14 +135,16 @@ void PassImpl::run(const Model::Ptr& model) {
         const float MAX_INPUTEXTENDED_SCALE = 1.8;
         const float MIN_INPUTEXTENDED_SCALE = 1;
 
-        if (InputExtended_scale  >= MAX_INPUTEXTENDED_SCALE) {
+        if (InputExtended_scale >= MAX_INPUTEXTENDED_SCALE) {
             stage->attrs().set<bool>("tryHW", false);
             continue;
         }
 
+        bool Expand_mark = false;
+
         Expand_mark = (InputExtended_scale > MIN_INPUTEXTENDED_SCALE);
 
-        model->disconnectStageDatas(stage);
+        model->disconnectStage(stage);
 
         // Expand input if need
         auto newDesc_input = input->desc();
@@ -170,15 +157,24 @@ void PassImpl::run(const Model::Ptr& model) {
             InputExtended = model->duplicateData(input, "@extended-input",
                     newDesc_input);
 
-            _stageBuilder->addBroadcastStage(model,
-                    stage->name() + "@expand-input", stage->origLayer(), input,
+            _stageBuilder->addPadStage(model, stage->name() + "@padding",
+                    stage->origLayer(),
+                    PadMode::Constant, 0.0f, DimValues(),
+                    DimValues({ { Dim::W, (InputExtended_width - input->desc().dim(Dim::W)) },
+                    { Dim::H, (InputExtended_height - input->desc().dim(Dim::H)) }, }),
+                    input,
                     InputExtended);
         }
 
-        DataDesc Reinterpret_inputdataDesc(DataType::FP16, DimsOrder::NCHW,
-                { dilationX, InputExtended->desc().dim(Dim::W) / dilationX,
-                        InputExtended->desc().dim(Dim::H),
-                        InputExtended->desc().dim(Dim::C) });
+        DataDesc Reinterpret_inputdataDesc(
+            DataType::FP16,
+            DimsOrder::NCHW,
+            {
+                dilationX,
+                InputExtended->desc().dim(Dim::W) / dilationX,
+                InputExtended->desc().dim(Dim::H),
+                InputExtended->desc().dim(Dim::C)
+            });
 
         Data Reinterpret_inputdata;
         Reinterpret_inputdata = model->duplicateData(InputExtended,
@@ -188,44 +184,27 @@ void PassImpl::run(const Model::Ptr& model) {
                 stage->name() + "@copy-reinterpret-input-data",
                 stage->origLayer(), InputExtended, Reinterpret_inputdata);
 
-        DataDesc Permuted_inputdataDesc(DataType::FP16, DimsOrder::NCHW,
-                { InputExtended->desc().dim(Dim::W) / dilationX,
-                        InputExtended->desc().dim(Dim::H),
-                        InputExtended->desc().dim(Dim::C),
-                        dilationX });
+        DataDesc Permuted_inputdataDesc(
+            DataType::FP16,
+            DimsOrder::NCHW,
+            {
+                InputExtended->desc().dim(Dim::W) / dilationX,
+                InputExtended->desc().dim(Dim::H),
+                InputExtended->desc().dim(Dim::C),
+                dilationX
+            });
 
         Data Permuted_inputdata;
         Permuted_inputdata = model->duplicateData(InputExtended,
                 "@permuted-input-data", Permuted_inputdataDesc);
 
-        SmallVector<int, MAX_DIMS_64> ieOrder(4, -1);
-
-        ieOrder[0] = 3;
-        ieOrder[1] = 0;
-        ieOrder[2] = 1;
-        ieOrder[3] = 2;
-
-        _stageBuilder->addPermuteStage(model,
-                stage->origLayerName() + "@permute-input-data",
-                stage->origLayer(), { Reinterpret_inputdata }, {
-                        Permuted_inputdata }, ieOrder);
-
-        // for conv output of subtensors
-        auto padx_new = padLeft - (kernelSizeX - 1) * (dilationX - 1) / 2;
-        auto pady_new = padTop - (kernelSizeY - 1) * (dilationY - 1) / 2;
-
-        auto newDesc_Permuted_input = InputExtended->desc();
-        newDesc_Permuted_input.setDim(Dim::W,
-                (((InputExtended->desc().dim(Dim::W) + 2 * padx_new
-                        - kernelSizeX) / kernelStrideX) + 1) / dilationX);
-        newDesc_Permuted_input.setDim(Dim::H,
-                ((InputExtended->desc().dim(Dim::H) + 2 * pady_new
-                        - kernelSizeY) / kernelStrideY) + 1);
-        newDesc_Permuted_input.setDim(Dim::C, output->desc().dim(Dim::C));
-        newDesc_Permuted_input.setDim(Dim::N, dilationX);
-
-        auto Subtensors_outputdata = model->duplicateData(output,
-                "@SubTensors-OutputData", newDesc_Permuted_input);
+        _stageBuilder->addPermuteStage(
+            model,
+            stage->origLayerName() + "@permute-input-data",
+            stage->origLayer(),
+            Reinterpret_inputdata,
+            Permuted_inputdata,
+            DimValues_<Dim>{{Dim::W, Dim::H}, {Dim::H, Dim::C}, {Dim::C, Dim::N}, {Dim::N, Dim::W}});
 
         // for skip rows, use reshape n c h w/2 -> n c h/2 w
         auto Reshape_Permuted_inputdata_Desc = Permuted_inputdata->desc();
@@ -242,36 +221,82 @@ void PassImpl::run(const Model::Ptr& model) {
                 stage->origLayer(), Permuted_inputdata,
                 Reshape_Permuted_inputdata);
 
-        auto Reshape_Permuted_outputdata_Desc = Subtensors_outputdata->desc();
-        Reshape_Permuted_outputdata_Desc.setDim(Dim::H,
-                Subtensors_outputdata->desc().dim(Dim::H) / dilationY);
-        Reshape_Permuted_outputdata_Desc.setDim(Dim::W,
-                Subtensors_outputdata->desc().dim(Dim::W) * dilationY);
-        auto Reshape_Permuted_outputdata = model->duplicateData(
-                Subtensors_outputdata, "@Reshape-Permuted-outputdata",
-                Reshape_Permuted_outputdata_Desc);
-
         // Desc of sub input tensor
         DataDesc Sub_inputdataDesc(
                 { Permuted_inputdata->desc().dim(Dim::W),
                         Permuted_inputdata->desc().dim(Dim::H) / dilationY,
-                        Permuted_inputdata->desc().dim(Dim::C),
-                        1 });
+                        Permuted_inputdata->desc().dim(Dim::C), 1 });
 
         Sub_inputdataDesc.reorder(DimsOrder::NCHW);
 
+        auto Sub_output_dilationX_dimenion = (dilationX / kernelStrideX) > 1 ? (dilationX / kernelStrideX) : 1;
+        auto Sub_output_dilationY_dimenion = (dilationY / kernelStrideY) > 1 ? (dilationY / kernelStrideY) : 1;
+        auto kernelStrideX_new = (dilationX / kernelStrideX) > 1 ? 1 : (kernelStrideX / dilationX);
+        auto kernelStrideY_new = (dilationY / kernelStrideY) > 1 ? 1 : (kernelStrideY / dilationY);
+
+        // for conv output of subtensors
+        auto padLeft_new = padLeft / dilationX;
+        auto padRight_new = padRight / dilationX;
+        auto padTop_new =  padTop / dilationY;
+        auto padBottom_new = padBottom / dilationY;
+
+        auto Subtensors_outputdataDesc = InputExtended->desc();
+
+        Subtensors_outputdataDesc.setDim(Dim::W,
+                ((InputExtended->desc().dim(Dim::W) + padLeft + padRight
+                        - dilationX * (kernelSizeX - 1) - 1 + kernelStrideX)
+                        / kernelStrideX) / Sub_output_dilationX_dimenion);
+
+        Subtensors_outputdataDesc.setDim(Dim::H,
+                (InputExtended->desc().dim(Dim::H) + padTop + padBottom
+                        - dilationY * (kernelSizeY - 1) - 1 + kernelStrideY)
+                        / kernelStrideY);
+
+        Subtensors_outputdataDesc.setDim(Dim::C, output->desc().dim(Dim::C));
+        Subtensors_outputdataDesc.setDim(Dim::N, Sub_output_dilationX_dimenion);
+
+        auto Subtensors_outputdata = model->duplicateData(output,
+                "@SubTensors-OutputData", Subtensors_outputdataDesc);
+
         // Desc of sub output tensor
-        auto Sub_outputdataDesc = Subtensors_outputdata->desc();
+        auto Real_sub_outputdataDesc = Subtensors_outputdata->desc();
+
+        int Real_sub_outputdata_width = ((Sub_inputdataDesc.dim(Dim::W)
+                + padLeft_new + padRight_new - kernelSizeX) / kernelStrideX_new)
+                + 1;
+        int Real_sub_outputdata_height = ((Sub_inputdataDesc.dim(Dim::H)
+                + padTop_new + padBottom_new - kernelSizeY) / kernelStrideY_new)
+                + 1;
+
+        if (Real_sub_outputdata_width != Subtensors_outputdataDesc.dim(Dim::W)) {
+            padRight_new = (Subtensors_outputdataDesc.dim(Dim::W) - 1) * kernelStrideX_new
+                    + kernelSizeX - padLeft_new - Sub_inputdataDesc.dim(Dim::W);
+            Real_sub_outputdata_width = Subtensors_outputdataDesc.dim(Dim::W);
+        }
+
+        if (Real_sub_outputdata_height != (Subtensors_outputdataDesc.dim(Dim::H) / Sub_output_dilationY_dimenion)) {
+            padBottom_new = (Subtensors_outputdataDesc.dim(Dim::H) - 1) * kernelStrideY_new
+                    + kernelSizeY - padTop_new - Sub_inputdataDesc.dim(Dim::H);
+            Real_sub_outputdata_height = (Subtensors_outputdataDesc.dim(Dim::H) / Sub_output_dilationY_dimenion);
+        }
 
-        Sub_outputdataDesc.setDim(Dim::N, 1);
-        Sub_outputdataDesc.setDim(Dim::C,
+        bool Sub_outputdata_expand = false;
+        int Sub_outputdata_width = Real_sub_outputdata_width;
+
+        if ((Real_sub_outputdata_width % pixel_stride_alignment) != 0) {
+            Sub_outputdata_expand = true;
+            Sub_outputdata_width = divUp(Real_sub_outputdata_width,
+                    pixel_stride_alignment) * pixel_stride_alignment;
+            padRight_new = (Sub_outputdata_width - 1) * kernelStrideX_new
+                    + kernelSizeX - padLeft_new - Sub_inputdataDesc.dim(Dim::W);
+        }
+
+        Real_sub_outputdataDesc.setDim(Dim::N, 1);
+        Real_sub_outputdataDesc.setDim(Dim::C,
                 Subtensors_outputdata->desc().dim(Dim::C));
-        Sub_outputdataDesc.setDim(Dim::H,
-                ((Sub_inputdataDesc.dim(Dim::H) + 2 * pady_new - kernelSizeY)
-                        / kernelStrideY) + 1);
-        Sub_outputdataDesc.setDim(Dim::W,
-                ((Sub_inputdataDesc.dim(Dim::W) + 2 * padx_new - kernelSizeX)
-                        / kernelStrideX) + 1);
+
+        Real_sub_outputdataDesc.setDim(Dim::H, Real_sub_outputdata_height);
+        Real_sub_outputdataDesc.setDim(Dim::W, Real_sub_outputdata_width);
 
         DataVector V_Sub_inputdata;
         std::vector<DimValues> V_Sub_inputdatasOffsets;
@@ -282,17 +307,18 @@ void PassImpl::run(const Model::Ptr& model) {
         DataVector V_newWeights;
         DataVector V_newbiases;
 
-        V_Sub_inputdata.reserve(dilationX * dilationY);
-        V_Sub_inputdatasOffsets.reserve(dilationX * dilationY);
-        V_Sub_outputdata.reserve(dilationX * dilationY);
-        V_Sub_outputdatasOffsets.reserve(dilationX * dilationY);
+        V_Sub_inputdata.reserve(Sub_output_dilationX_dimenion * Sub_output_dilationY_dimenion);
+        V_Sub_inputdatasOffsets.reserve(Sub_output_dilationX_dimenion * Sub_output_dilationY_dimenion);
+
+        V_newWeights.reserve(Sub_output_dilationX_dimenion * Sub_output_dilationY_dimenion);
+        V_newbiases.reserve(Sub_output_dilationX_dimenion * Sub_output_dilationY_dimenion);
 
-        V_newWeights.reserve(dilationX * dilationY);
-        V_newbiases.reserve(dilationX * dilationY);
+        V_Sub_outputdata.reserve(Sub_output_dilationX_dimenion * Sub_output_dilationY_dimenion);
+        V_Sub_outputdatasOffsets.reserve(Sub_output_dilationX_dimenion * Sub_output_dilationY_dimenion);
 
-        for (int dilationXInd = 0; dilationXInd < dilationX; ++dilationXInd) {
+        for (int dilationXInd = 0; dilationXInd < dilationX; dilationXInd += (dilationX / Sub_output_dilationX_dimenion)) {
             for (int dilationYInd = 0; dilationYInd < dilationY;
-                    ++dilationYInd) {
+                    dilationYInd += (dilationY / Sub_output_dilationY_dimenion)) {
                 Data Sub_inputdata;
                 Sub_inputdata = model->duplicateData(Permuted_inputdata,
                         "@Sub-InputData", Sub_inputdataDesc);
@@ -302,14 +328,21 @@ void PassImpl::run(const Model::Ptr& model) {
                 Sub_inputdatasOffsets.set(Dim::W,
                         dilationYInd * Sub_inputdataDesc.dim(Dim::W));
 
-                Data Sub_outputdata;
-                Sub_outputdata = model->duplicateData(Subtensors_outputdata,
-                        "@Sub_OutputData", Sub_outputdataDesc);
+                V_Sub_inputdata.emplace_back(Sub_inputdata);
+                V_Sub_inputdatasOffsets.emplace_back(Sub_inputdatasOffsets);
+
+                Data Real_sub_outputdata;
+                Real_sub_outputdata = model->duplicateData(
+                        Subtensors_outputdata, "@Sub_OutputData",
+                        Real_sub_outputdataDesc);
 
                 DimValues Sub_outputdatasOffsets;
-                Sub_outputdatasOffsets.set(Dim::N, dilationXInd);
+                Sub_outputdatasOffsets.set(Dim::N, dilationXInd * Sub_output_dilationX_dimenion / dilationX);
                 Sub_outputdatasOffsets.set(Dim::W,
-                        dilationYInd * Sub_outputdataDesc.dim(Dim::W));
+                        (dilationYInd * Sub_output_dilationY_dimenion / dilationY) * Real_sub_outputdataDesc.dim(Dim::W));
+
+                V_Sub_outputdata.emplace_back(Real_sub_outputdata);
+                V_Sub_outputdatasOffsets.emplace_back(Sub_outputdatasOffsets);
 
                 // reuse weights and biases
                 auto newWeights = model->duplicateData(weights, "@NewWeights",
@@ -317,11 +350,6 @@ void PassImpl::run(const Model::Ptr& model) {
                 auto newbiases = model->duplicateData(biases, "@Newbiases",
                         biases->desc());
 
-                V_Sub_inputdata.emplace_back(Sub_inputdata);
-                V_Sub_inputdatasOffsets.emplace_back(Sub_inputdatasOffsets);
-                V_Sub_outputdata.emplace_back(Sub_outputdata);
-                V_Sub_outputdatasOffsets.emplace_back(Sub_outputdatasOffsets);
-
                 V_newWeights.emplace_back(newWeights);
                 V_newbiases.emplace_back(newbiases);
             }
@@ -333,28 +361,73 @@ void PassImpl::run(const Model::Ptr& model) {
                 V_Sub_inputdata);
 
         // sub tensors convolution
-        for (int index = 0; index < dilationX * dilationY; ++index) {
+        for (int Sub_output_XInd = 0; Sub_output_XInd < Sub_output_dilationX_dimenion; ++Sub_output_XInd) {
+            for (int Sub_output_YInd = 0; Sub_output_YInd < Sub_output_dilationY_dimenion;
+                    ++Sub_output_YInd) {
             // Add SubDataConv stage
+            auto Sub_outputdataDesc = Real_sub_outputdataDesc;
+            Sub_outputdataDesc.setDim(Dim::W, Sub_outputdata_width);
+
+            Data Sub_outputdata;
+            Sub_outputdata = model->duplicateData(Subtensors_outputdata,
+                    "@Sub_OutputData", Sub_outputdataDesc);
+
             auto newStage = model->addNewStage<StubStage>(
                     stage->origLayerName() + "@SubDataConv",
                     StageType::StubConv, stage->origLayer(), {
-                            V_Sub_inputdata[index], V_newWeights[index],
-                            V_newbiases[index] }, { V_Sub_outputdata[index] });
+                            V_Sub_inputdata[Sub_output_XInd * Sub_output_dilationY_dimenion + Sub_output_YInd],
+                            V_newWeights[Sub_output_XInd * Sub_output_dilationY_dimenion + Sub_output_YInd],
+                            V_newbiases[Sub_output_XInd * Sub_output_dilationY_dimenion + Sub_output_YInd] }, { Sub_outputdata });
 
             newStage->attrs().set<int>("kernelSizeX", kernelSizeX);
             newStage->attrs().set<int>("kernelSizeY", kernelSizeY);
-            newStage->attrs().set<int>("kernelStrideX", kernelStrideX);
-            newStage->attrs().set<int>("kernelStrideY", kernelStrideY);
-            newStage->attrs().set<int>("padLeft", padx_new);
-            newStage->attrs().set<int>("padRight", padx_new);
-            newStage->attrs().set<int>("padTop", pady_new);
-            newStage->attrs().set<int>("padBottom", pady_new);
+
+            newStage->attrs().set<int>("kernelStrideX", kernelStrideX_new);
+            newStage->attrs().set<int>("kernelStrideY", kernelStrideY_new);
+
+            newStage->attrs().set<int>("padLeft", padLeft_new);
+            newStage->attrs().set<int>("padRight", padRight_new);
+
+            newStage->attrs().set<int>("padTop", padTop_new);
+            newStage->attrs().set<int>("padBottom", padBottom_new);
             newStage->attrs().set<int>("dilationX", 1);
             newStage->attrs().set<int>("dilationY", 1);
             newStage->attrs().set<int>("groupSize", groupSize);
             newStage->attrs().set<bool>("tryHW", true);
+
+            newStage->attrs().set<float>("scaleFactor", scaleFactor);
+
+            if (Sub_outputdata_expand) {
+                _stageBuilder->addShrinkStage(model,
+                        stage->name() + "@SubConvOutputData",
+                        stage->origLayer(), Sub_outputdata,
+                        V_Sub_outputdata[Sub_output_XInd * Sub_output_dilationY_dimenion + Sub_output_YInd]);
+            } else {
+                V_Sub_outputdata[Sub_output_XInd * Sub_output_dilationY_dimenion + Sub_output_YInd] = Sub_outputdata;
+            }
+        }
         }
 
+        auto Reshape_Permuted_outputdata_Desc = Subtensors_outputdata->desc();
+
+        Reshape_Permuted_outputdata_Desc.setDim(Dim::H,
+                Subtensors_outputdata->desc().dim(Dim::H) / Sub_output_dilationY_dimenion);
+        Reshape_Permuted_outputdata_Desc.setDim(Dim::W,
+                Subtensors_outputdata->desc().dim(Dim::W) * Sub_output_dilationY_dimenion);
+
+        auto Reshape_Permuted_outputdata = model->duplicateData(
+                Subtensors_outputdata, "@Reshape-Permuted-outputdata",
+                Reshape_Permuted_outputdata_Desc);
+
+        auto n = 0;
+        auto c = 0;
+        auto h = 0;
+        auto w = 0;
+        V_Sub_outputdatasOffsets[0].get(Dim::N, n);
+        V_Sub_outputdatasOffsets[0].get(Dim::C, c);
+        V_Sub_outputdatasOffsets[0].get(Dim::H, h);
+        V_Sub_outputdatasOffsets[0].get(Dim::W, w);
+
         auto ConcatSubOutputDataStage = _stageBuilder->addConcatStage(model,
                 stage->name() + "@Concat-Sub-OutputData", stage->origLayer(),
                 std::move(V_Sub_outputdatasOffsets), V_Sub_outputdata,
@@ -366,34 +439,21 @@ void PassImpl::run(const Model::Ptr& model) {
 
         // output permute
         DataDesc permute_outputdataDesc(DataType::FP16, DimsOrder::NCHW,
-                { Subtensors_outputdata->desc().dim(Dim::C),
-                        Subtensors_outputdata->desc().dim(Dim::H),
+                { Subtensors_outputdata->desc().dim(Dim::N),
                         Subtensors_outputdata->desc().dim(Dim::W),
-                        Subtensors_outputdata->desc().dim(Dim::N) });
-
-        permute_outputdataDesc.setDim(Dim::N,
-                Subtensors_outputdata->desc().dim(Dim::C));
-        permute_outputdataDesc.setDim(Dim::C,
-                Subtensors_outputdata->desc().dim(Dim::H));
-        permute_outputdataDesc.setDim(Dim::H,
-                Subtensors_outputdata->desc().dim(Dim::W));
-        permute_outputdataDesc.setDim(Dim::W,
-                Subtensors_outputdata->desc().dim(Dim::N));
+                        Subtensors_outputdata->desc().dim(Dim::H),
+                        Subtensors_outputdata->desc().dim(Dim::C) });
 
         auto permute_outputdata = model->duplicateData(Subtensors_outputdata,
                 "@Permuted-OutputData", permute_outputdataDesc);
 
-        SmallVector<int, MAX_DIMS_64> ieOrder2(4, -1);
-
-        ieOrder2[0] = 1;
-        ieOrder2[1] = 2;
-        ieOrder2[2] = 3;
-        ieOrder2[3] = 0;
-
-        _stageBuilder->addPermuteStage(model,
-                stage->origLayerName() + "@Permute-OutputData",
-                stage->origLayer(), { Subtensors_outputdata }, {
-                        permute_outputdata }, ieOrder2);
+        _stageBuilder->addPermuteStage(
+            model,
+            stage->origLayerName() + "@Permute-OutputData",
+            stage->origLayer(),
+            Subtensors_outputdata,
+            permute_outputdata,
+            DimValues_<Dim>{{Dim::W, Dim::N}, {Dim::H, Dim::W}, {Dim::C, Dim::H}, {Dim::N, Dim::C}});
 
         // Expand output if need
         if (Expand_mark) {
@@ -427,7 +487,6 @@ void PassImpl::run(const Model::Ptr& model) {
                     stage->name() + "@copy-Permute-OutputData",
                     stage->origLayer(), permute_outputdata, output);
         }
-
         model->removeStage(stage);
     }
 }
index eea3075..7f9d647 100644 (file)
@@ -88,7 +88,7 @@ void PassImpl::run(const Model::Ptr& model) {
             continue;
         }
 
-        model->disconnectStageDatas(stage);
+        model->disconnectStage(stage);
 
         auto inGroupDimC = input->desc().dim(Dim::C) / groupSize;
         auto outGroupDimC = output->desc().dim(Dim::C) / groupSize;
@@ -191,8 +191,8 @@ void PassImpl::run(const Model::Ptr& model) {
             // subConvStage
 
             auto subConvStage = model->duplicateStage(
-                stage->name() + postfix,
                 stage,
+                postfix,
                 {subInputs[groupInd], subWeights, subBiases},
                 {subOutputs[groupInd]});
 
index dc38c76..7138d9e 100644 (file)
@@ -104,8 +104,8 @@ void PassImpl::run(const Model::Ptr& model) {
 
         auto numTiles = (convOutput->desc().dim(Dim::C) + tileSize - 1) / tileSize;
 
-        model->disconnectStageDatas(convStage);
-        model->disconnectStageDatas(poolStage);
+        model->disconnectStage(convStage);
+        model->disconnectStage(poolStage);
 
         DataVector subOutputs(numTiles);
 
@@ -187,14 +187,14 @@ void PassImpl::run(const Model::Ptr& model) {
             }
 
             model->duplicateStage(
-                convStage->name() + postfix,
                 convStage,
+                postfix,
                 {convInput, tileWeights, tileBiases},
                 {convOutputTile});
 
             model->duplicateStage(
-                poolStage->name() + postfix,
                 poolStage,
+                postfix,
                 {convOutputTile},
                 {poolOutputTile});
 
index d044429..e0870e9 100644 (file)
@@ -199,7 +199,7 @@ void PassImpl::run(const Model::Ptr& model) {
         // Multiple tiles processing
         //
 
-        model->disconnectStageDatas(stage);
+        model->disconnectStage(stage);
 
         DataVector subInputs(numTiles);
         DataVector subOutputs(numTiles);
@@ -237,8 +237,8 @@ void PassImpl::run(const Model::Ptr& model) {
             auto tileBiases = std::get<1>(constDatas);
 
             auto tileStage = model->duplicateStage(
-                stage->name() + postfix,
                 stage,
+                postfix,
                 {subInputs[tileInd], tileWeights, tileBiases},
                 {subOutputs[tileInd]});
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/passes/strided_slice.cpp b/inference-engine/src/vpu/graph_transformer/src/passes/strided_slice.cpp
new file mode 100644 (file)
index 0000000..98d5fc7
--- /dev/null
@@ -0,0 +1,317 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/pass_manager.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <vpu/compile_env.hpp>
+#include <vpu/stub_stage.hpp>
+#include <vpu/hw/mx_stage.hpp>
+#include <vpu/hw/tiling.hpp>
+#include <vpu/hw/utility.hpp>
+
+namespace vpu {
+
+namespace {
+
+struct StridedSliceParams {
+    DimValues begin;
+    DimValues end;
+    DimValues strides;
+
+    DimValues begin_mask;
+    DimValues end_mask;
+};
+
+struct StridedSliceInternalParams {
+    DimValues begin_dms;
+    DimValues end_dms;
+    DimValues strides_dms;
+};
+
+class PassImpl final : public Pass {
+public:
+    explicit PassImpl(StageBuilder::Ptr stageBuilder) : _stageBuilder(std::move(stageBuilder)) {}
+
+    void run(const Model::Ptr& model) override;
+
+private:
+    StageBuilder::Ptr _stageBuilder;
+
+    static StridedSliceParams parseInputParams(const Stage& stage);
+    static StridedSliceInternalParams computeInternalParams(const Stage& stage, StridedSliceParams params);
+};
+
+StridedSliceParams PassImpl::parseInputParams(const Stage& stage) {
+    auto beginInput   = stage->input(1);
+    auto endInput     = stage->input(2);
+    auto stridesInput = stage->input(3);
+
+    IE_ASSERT(beginInput->content() != nullptr);
+    IE_ASSERT(endInput->content() != nullptr);
+    IE_ASSERT(stridesInput->content() != nullptr);
+
+    StridedSliceParams params;
+
+    size_t num_input_dims = stage->input(0)->desc().numDims();
+
+    auto vectorToDimValues = [](const std::vector<int>& v) {
+        auto dims = DimsOrder::fromNumDims(v.size()).toIndices();
+        int idx = v.size();
+        for (auto& dim : dims) {
+            idx--;
+            dim.second = v[idx];
+        }
+        return dims;
+    };
+
+    params.begin = vectorToDimValues(
+        std::vector<int>(beginInput->content()->get<int>(),
+                         beginInput->content()->get<int>() + beginInput->desc().dims().get(Dim::C, 0)));
+    params.end = vectorToDimValues(
+        std::vector<int>(endInput->content()->get<int>(),
+                         endInput->content()->get<int>() + endInput->desc().dims().get(Dim::C, 0)));
+    params.strides = vectorToDimValues(
+        std::vector<int>(stridesInput->content()->get<int>(),
+                         stridesInput->content()->get<int>() + stridesInput->desc().dims().get(Dim::C, 0)));
+
+    IE_ASSERT(params.begin.size() == num_input_dims);
+    IE_ASSERT(params.end.size() == num_input_dims);
+    IE_ASSERT(params.strides.size() == num_input_dims);
+
+    std::vector<int> begin_mask_values;
+    std::vector<int> end_mask_values;
+
+    std::string begin_mask_str = stage->origLayer()->GetParamAsString("begin_mask", "");
+    for (const auto& c : begin_mask_str) {
+        if (c == '1') begin_mask_values.push_back(1);
+        else if (c == '0') begin_mask_values.push_back(0);
+    }
+    begin_mask_values.insert(begin_mask_values.end(), num_input_dims - begin_mask_values.size(), 1);
+
+    std::string end_mask_str = stage->origLayer()->GetParamAsString("end_mask", "");
+    for (const auto& c : end_mask_str) {
+        if (c == '1') end_mask_values.push_back(1);
+        else if (c == '0') end_mask_values.push_back(0);
+    }
+    end_mask_values.insert(end_mask_values.end(), num_input_dims - end_mask_values.size(), 1);
+
+    std::string ellipsis_mask_str = stage->origLayer()->GetParamAsString("ellipsis_mask", "");
+    for (const auto& c : ellipsis_mask_str) {
+        IE_ASSERT(c != '1') << "VPU doesn't support ellipsis_mask for StridedSlice";
+    }
+
+    std::string new_axis_mask_str = stage->origLayer()->GetParamAsString("new_axis_mask", "");
+    for (const auto& c : new_axis_mask_str) {
+        IE_ASSERT(c != '1') << "VPU doesn't support new_axis_mask for StridedSlice";
+    }
+
+    std::string shrink_axis_mask_str = stage->origLayer()->GetParamAsString("shrink_axis_mask", "");
+    for (const auto& c : shrink_axis_mask_str) {
+        IE_ASSERT(c != '1') << "VPU doesn't support shrink_axis_mask for StridedSlice";
+    }
+
+    params.begin_mask = vectorToDimValues(begin_mask_values);
+    params.end_mask = vectorToDimValues(end_mask_values);
+
+    return params;
+}
+
+StridedSliceInternalParams PassImpl::computeInternalParams(const Stage& stage, StridedSliceParams params) {
+    auto input = stage->input(0);
+
+    StridedSliceInternalParams m_params = StridedSliceInternalParams();
+    size_t numDims = input->desc().numDims();
+
+    for (const auto&  dim : input->desc().dimsOrder().toPermutation()) {
+        m_params.begin_dms.set(dim, 0);
+        m_params.end_dms.set(dim, input->desc().dim(dim));
+        m_params.strides_dms.set(dim, 1);
+    }
+
+    auto clip = [](int value, int min, int max) {
+        return std::min(std::max(min, value), max);
+    };
+
+    for (const auto& dim : input->desc().dimsOrder().toPermutation()) {
+        m_params.strides_dms.set(dim, params.strides[dim]);
+
+        IE_ASSERT(params.begin_mask[dim] == 1 || params.begin_mask[dim] == 0);
+        IE_ASSERT(params.end_mask[dim] == 1 || params.end_mask[dim] == 0);
+
+        m_params.begin_dms.set(dim,
+            params.begin_mask[dim] ? clip(params.begin[dim], 0, input->desc().dim(dim)) : 0);
+        m_params.end_dms.set(dim,
+            params.end_mask[dim] ? clip(params.end[dim], 0, input->desc().dim(dim)) : input->desc().dim(dim));
+
+        IE_ASSERT(dim != Dim::N || numDims < 4 || m_params.strides_dms[dim] == 1)
+            << "VPU doesn't support batch strides for StridedSlice";
+        IE_ASSERT(m_params.begin_dms[dim] >= 0 && m_params.begin_dms[dim] < m_params.end_dms[dim]);
+        IE_ASSERT(m_params.end_dms[dim] <= input->desc().dim(dim));
+        IE_ASSERT(m_params.strides_dms[dim] > 0);
+    }
+
+    return m_params;
+}
+
+void PassImpl::run(const Model::Ptr& model) {
+    VPU_PROFILE(stridedSlice);
+
+    for (const auto& stage : model->getStages()) {
+        if (stage->type() != StageType::StridedSlice) {
+            continue;
+        }
+        IE_ASSERT(stage->numInputs() == 4);
+        IE_ASSERT(stage->numOutputs() == 1);
+
+        auto input  = stage->input(0);
+        auto output = stage->output(0);
+
+        IE_ASSERT(input->desc().numDims() == output->desc().numDims());
+        IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
+
+        auto params = parseInputParams(stage);
+        auto m_params = computeInternalParams(stage, params);
+
+        model->disconnectStage(stage);
+
+        auto directOrder = DimsOrder::fromNumDims(input->desc().numDims());
+        auto perm = directOrder.toPermutation();
+
+        //
+        // Select a region of interest in accordance with the begin and end parameters.
+        //
+
+        const bool needSelectROI = std::any_of(perm.begin(), perm.end(), [&](Dim dim) {
+            return m_params.begin_dms[dim] != 0 || m_params.end_dms[dim] != input->desc().dim(dim); });
+        if (needSelectROI) {
+            auto roiDesc = input->desc();
+            for (const auto &dim : perm) {
+                roiDesc.setDim(dim, m_params.end_dms[dim] - m_params.begin_dms[dim]);
+            }
+            auto roiData = model->duplicateData(input, "@roi", roiDesc);
+            auto shrinkStage = _stageBuilder->addShrinkStage(
+                model,
+                stage->name() + "@roi-selection",
+                stage->origLayer(),
+                input,
+                roiData);
+            shrinkStage->attrs().set("offset", m_params.begin_dms);
+            input = roiData;
+        }
+
+        //
+        // Expand each dimension of the input tensor, if it is not completely divided by stride
+        // for further work.
+        //
+
+        const bool needExpand = std::any_of(perm.begin(), perm.end(), [&](Dim dim) {
+            return input->desc().dim(dim) % m_params.strides_dms[dim] != 0; });
+        if (needExpand) {
+            auto expandDesc = input->desc();
+            for (const auto& dim : perm) {
+                auto alignValue = (m_params.strides_dms[dim] - expandDesc.dim(dim) % m_params.strides_dms[dim])
+                    % m_params.strides_dms[dim];
+                expandDesc.setDim(dim, expandDesc.dim(dim) + alignValue);
+            }
+            auto expandedInputData = model->duplicateData(input, "@extended-input", expandDesc);
+            _stageBuilder->addExpandStage(
+                model,
+                stage->name() + "@expand-input",
+                stage->origLayer(),
+                input,
+                expandedInputData);
+            input = expandedInputData;
+        }
+
+        //
+        // For copying with stride we do reshape in order to put data of interest at the beginning of each dimension,
+        // split into necessary and unnecessary data and then reverse reshape.
+        //
+
+        for (const auto& dim : perm) {
+            if (m_params.strides_dms[dim] == 1)
+                continue;
+
+            auto stride = abs(m_params.strides_dms[dim]);
+            auto reshapedDesc = input->desc();
+            auto subtensorDesc = input->desc();
+            auto intermediateOutDesc = input->desc();
+
+            if (input->desc().numDims() == 1) {
+                reshapedDesc = DataDesc({stride, input->desc().dim(dim) / stride});
+                subtensorDesc = DataDesc({1, input->desc().dim(dim) / stride});
+            } else if (perm.front() == dim) {
+                auto nextDim = perm.at(directOrder.dimInd(dim) + 1);
+                reshapedDesc.setDim(dim, stride);
+                reshapedDesc.setDim(nextDim,
+                                    input->desc().dim(dim) * input->desc().dim(nextDim) / stride);
+                subtensorDesc.setDim(dim, 1);
+                subtensorDesc.setDim(nextDim, reshapedDesc.dim(nextDim));
+            } else {
+                auto previousDim = perm.at(directOrder.dimInd(dim) - 1);
+                reshapedDesc.setDim(dim, input->desc().dim(dim) / stride);
+                reshapedDesc.setDim(previousDim, input->desc().dim(previousDim) * stride);
+
+                subtensorDesc.setDim(dim, reshapedDesc.dim(dim));
+                subtensorDesc.setDim(previousDim, input->desc().dim(previousDim));
+            }
+
+            intermediateOutDesc.setDim(dim, input->desc().dim(dim) / stride);
+
+            auto reshapedInputData = model->duplicateData(
+                input, formatString("@reshaped-input@dim%s", dim), reshapedDesc);
+            auto subtensorData = model->duplicateData(
+                input, formatString("@subtensor@dim%s", dim), subtensorDesc);
+            auto intermediateOutputData = model->duplicateData(
+                input, formatString("@intermediate-output@dim%s", dim), intermediateOutDesc);
+
+            _stageBuilder->addReshapeStage(
+                model,
+                formatString("%s@reshape-input@dim%s", stage->name(), dim),
+                stage->origLayer(),
+                input,
+                reshapedInputData);
+
+            _stageBuilder->addSplitStage(
+                model,
+                formatString("%s@split@dim%s", stage->name(), dim),
+                stage->origLayer(),
+                dim,
+                reshapedInputData,
+                {subtensorData});
+
+            _stageBuilder->addReshapeStage(
+                model,
+                formatString("%s@reshape-output@dim%s", stage->name(), dim),
+                stage->origLayer(),
+                subtensorData,
+                intermediateOutputData);
+
+            input = intermediateOutputData;
+        }
+
+        _stageBuilder->addCopyStage(
+            model,
+            formatString("%s@copy-output", stage->name()),
+            stage->origLayer(),
+            input,
+            output);
+
+        model->removeStage(stage);
+    }
+}
+
+}  // namespace
+
+Pass::Ptr PassManager::stridedSlice() {
+    return std::make_shared<PassImpl>(_stageBuilder);
+}
+
+}  // namespace vpu
index 3588814..ba2a0b8 100644 (file)
@@ -66,17 +66,15 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         auto finalOrder = input->desc().dimsOrder();
         if (finalOrder.dimInd(Dim::C) == 1) {
@@ -87,37 +85,31 @@ private:
         if (_type == StageType::Conv ||
             _type == StageType::Im2ColConvolution) {
             if (finalOrder != input->desc().dimsOrder()) {
-                _orderInfo.setInput(_inputEdges[0], finalOrder);
+                orderInfo.setInput(inputEdge(0), finalOrder);
             }
-            _orderInfo.setOutput(_outputEdges[0], finalOrder);
+            orderInfo.setOutput(outputEdge(0), finalOrder);
         } else if (_type == StageType::DepthConv) {
             if (finalOrder != input->desc().dimsOrder()) {
-                _orderInfo.setInput(_inputEdges[0], finalOrder);
+                orderInfo.setInput(inputEdge(0), finalOrder);
             }
-            _orderInfo.setOutput(_outputEdges[0], finalOrder);
+            orderInfo.setOutput(outputEdge(0), finalOrder);
         } else {
-            _orderInfo.setInput(_inputEdges[0], finalOrder.createMovedDim(Dim::C, 0));
-            _orderInfo.setOutput(_outputEdges[0], finalOrder.createMovedDim(Dim::C, 0));
+            orderInfo.setInput(inputEdge(0), finalOrder.createMovedDim(Dim::C, 0));
+            orderInfo.setOutput(outputEdge(0), finalOrder.createMovedDim(Dim::C, 0));
         }
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
         if (_type != StageType::DepthConv) {
-            _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-            _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+            stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+            stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
         }
     }
 
     void finalizeDataLayoutImpl() override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         auto kernelSizeX = attrs().get<int>("kernelSizeX");
         auto kernelSizeY = attrs().get<int>("kernelSizeY");
@@ -223,18 +215,18 @@ private:
 
         IE_ASSERT(swWeights != nullptr);
 
-        _model->replaceStageInput(_inputEdges[1], swWeights);
+        _model->replaceStageInput(inputEdge(1), swWeights);
     }
 
-    void getBatchSupportInfoImpl() const  override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     void finalCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+              {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}},
+              {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -258,20 +250,17 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto biases = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto biases = inputEdge(2)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
         weights->serializeOldBuffer(handle_from_this(), serializer);
 
-        if (!_tempBufferEdges.empty()) {
-            _tempBufferEdges[0]->tempBuffer()->serializeOldBuffer(handle_from_this(), serializer);
+        if (numTempBuffers() == 1) {
+            tempBuffer(0)->serializeOldBuffer(handle_from_this(), serializer);
         }
 
         // TODO: remove this
index 42874b2..3c2dd91 100644 (file)
@@ -174,17 +174,15 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         auto finalOrder = input->desc().dimsOrder();
         if (finalOrder.dimInd(Dim::C) == 1) {
@@ -194,22 +192,19 @@ private:
 
         if (_type == StageType::DepthDeconv) {
             if (finalOrder != input->desc().dimsOrder()) {
-                _orderInfo.setInput(_inputEdges[0], finalOrder);
+                orderInfo.setInput(inputEdge(0), finalOrder);
             }
-            _orderInfo.setOutput(_outputEdges[0], finalOrder);
+            orderInfo.setOutput(outputEdge(0), finalOrder);
         } else {
-            _orderInfo.setInput(_inputEdges[0], finalOrder.createMovedDim(Dim::C, 0));
-            _orderInfo.setOutput(_outputEdges[0], finalOrder.createMovedDim(Dim::C, 0));
+            orderInfo.setInput(inputEdge(0), finalOrder.createMovedDim(Dim::C, 0));
+            orderInfo.setOutput(outputEdge(0), finalOrder.createMovedDim(Dim::C, 0));
         }
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         auto finalOrder = input->desc().dimsOrder();
         if (finalOrder.dimInd(Dim::C) == 1) {
@@ -220,22 +215,19 @@ private:
         if (_type == StageType::DepthDeconv) {
             if (finalOrder.dimInd(Dim::C) == 0) {
                 // HWC
-                _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-                _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+                stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+                stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
             }
         } else {
-            _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-            _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+            stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+            stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
         }
     }
 
     void finalizeDataLayoutImpl() override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         auto kernelSizeX = attrs().get<int>("kernelSizeX");
         auto kernelSizeY = attrs().get<int>("kernelSizeY");
@@ -314,18 +306,18 @@ private:
 
         IE_ASSERT(swWeights != nullptr);
 
-        _model->replaceStageInput(_inputEdges[1], swWeights);
+        _model->replaceStageInput(inputEdge(1), swWeights);
     }
 
-    void getBatchSupportInfoImpl() const  override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     void finalCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+             {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}},
+             {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -349,20 +341,17 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto biases = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto biases = inputEdge(2)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
         weights->serializeOldBuffer(handle_from_this(), serializer);
 
-        if (!_tempBufferEdges.empty()) {
-            _tempBufferEdges[0]->tempBuffer()->serializeOldBuffer(handle_from_this(), serializer);
+        if (numTempBuffers() == 1) {
+            tempBuffer(0)->serializeOldBuffer(handle_from_this(), serializer);
         }
 
         // TODO: remove this
@@ -404,7 +393,7 @@ void PassImpl::run(const Model::Ptr& model) {
         auto dilationY = stage->attrs().get<int>("dilationY");
         auto groupSize = stage->attrs().get<int>("groupSize");
 
-        model->disconnectStageDatas(stage);
+        model->disconnectStage(stage);
 
         if (groupSize == 0 ||
             (groupSize > input->desc().dim(Dim::C)) ||
index e68ee51..7827bae 100644 (file)
@@ -23,34 +23,26 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
-
-        _orderInfo.setInput(_inputEdges[0], input->desc().dimsOrder().createMovedDim(Dim::C, 0));
-        _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, 0));
+        orderInfo.setInput(inputEdge(0), input->desc().dimsOrder().createMovedDim(Dim::C, 0));
+        orderInfo.setOutput(outputEdge(0), output->desc().dimsOrder().createMovedDim(Dim::C, 0));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto weights = _inputEdges[1]->input();
+        auto weights = inputEdge(1)->input();
 
         auto swWeights = weights->attrs().getOrDefault<Data>("swWeights", nullptr);
         if (swWeights == nullptr) {
@@ -63,31 +55,28 @@ private:
             weights->attrs().set<Data>("swWeights", swWeights);
         }
 
-        _model->replaceStageInput(_inputEdges[1], swWeights);
+        _model->replaceStageInput(inputEdge(1), swWeights);
     }
 
-    void getBatchSupportInfoImpl() const  override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     void finalCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+             {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}},
+             {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto weights = _inputEdges[1]->input();
-        auto biases = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto weights = inputEdge(1)->input();
+        auto biases = inputEdge(2)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
 
@@ -135,7 +124,7 @@ void PassImpl::run(const Model::Ptr& model) {
         auto biases = stage->input(2);
         auto output = stage->output(0);
 
-        model->disconnectStageDatas(stage);
+        model->disconnectStage(stage);
 
         if (biases->usage() != DataUsage::Fake) {
             auto tempOutput = model->duplicateData(
index 25f8417..f1e883b 100644 (file)
@@ -21,15 +21,13 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
         auto finalOrder = input->desc().dimsOrder();
         if (input->desc().dim(Dim::N, 1) > 1) {
@@ -37,15 +35,12 @@ private:
             finalOrder = finalOrder.createMovedDim(Dim::C, 2);
         }
 
-        _orderInfo.setInput(_inputEdges[0], finalOrder);
-        _orderInfo.setOutput(_outputEdges[0], finalOrder);
+        orderInfo.setInput(inputEdge(0), finalOrder);
+        orderInfo.setOutput(outputEdge(0), finalOrder);
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
 
         auto dimsOrder = input->desc().dimsOrder();
 
@@ -56,8 +51,8 @@ private:
             reqs.add(dimsOrder.dimInd(Dim::N), DimStride::Compact);
         }
 
-        _stridesInfo.setInput(_inputEdges[0], reqs);
-        _stridesInfo.setOutput(_outputEdges[0], reqs);
+        stridesInfo.setInput(inputEdge(0), reqs);
+        stridesInfo.setOutput(outputEdge(0), reqs);
 
         //
         // * AvgPool/MaxPool support both YXZ and ZYX orders:
@@ -68,7 +63,7 @@ private:
 
         if (_type == StageType::MaxPool || _type == StageType::AvgPool) {
             if (dimsOrder.dimInd(Dim::C) == 0) {
-                _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
+                stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
             }
         }
     }
@@ -76,11 +71,12 @@ private:
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const  override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>&) override {
         // Pooling will support batch by merging it with previous dimension.
     }
 
     void finalCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -102,11 +98,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         if (_type == StageType::GlobalMaxPool ||
             _type == StageType::GlobalAvgPool) {
@@ -169,7 +162,7 @@ void PassImpl::run(const Model::Ptr& model) {
         auto padBottom = stage->attrs().get<int>("padBottom");
         auto excludePad = stage->attrs().get<bool>("excludePad");
 
-        model->disconnectStageDatas(stage);
+        model->disconnectStage(stage);
 
         auto stageType = StageType::None;
         if (stage->type() == StageType::StubMaxPool) {
index 2b2bdf6..8af1d52 100644 (file)
@@ -106,7 +106,7 @@ void PassImpl::run(const Model::Ptr& model) {
         for (const auto& nextStage : nextStages) {
             auto nextOutput = nextStage->output(0);
 
-            model->disconnectStageDatas(nextStage);
+            model->disconnectStage(nextStage);
 
             DataVector newOutputs;
             newOutputs.reserve(lastInputs.size());
@@ -124,8 +124,8 @@ void PassImpl::run(const Model::Ptr& model) {
                     newDesc);
 
                 model->duplicateStage(
-                    nextStage->name() + postfix,
                     nextStage,
+                    postfix,
                     {curInput},
                     {newOutput});
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/special_stage_processor.cpp b/inference-engine/src/vpu/graph_transformer/src/special_stage_processor.cpp
new file mode 100644 (file)
index 0000000..b8b23e6
--- /dev/null
@@ -0,0 +1,573 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <set>
+
+#include <vpu/special_stage_processor.hpp>
+
+namespace vpu {
+
+void SpecialStageProcessor::processSplit(
+        const Model::Ptr& model,
+        const Stage& stage) {
+    IE_ASSERT(stage->type() == StageType::Split);
+
+    auto input = stage->input(0);
+
+    const auto& offsets = stage->attrs().get<std::vector<DimValues>>("offsets");
+    IE_ASSERT(offsets.size() == checked_cast<size_t>(stage->numOutputs()));
+
+    for (const auto& outEdge : stage->outputEdges()) {
+        IE_ASSERT(outEdge->portInd() >= 0);
+        IE_ASSERT(checked_cast<size_t>(outEdge->portInd()) < offsets.size());
+
+        auto output = outEdge->output();
+        const auto& offsetFromInput = offsets[checked_cast<size_t>(outEdge->portInd())];
+
+        IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
+        IE_ASSERT(offsetFromInput.size() <= checked_cast<size_t>(input->desc().numDims()));
+        for (const auto& p : offsetFromInput) {
+            IE_ASSERT(input->desc().dimsOrder().hasDim(p.first));
+            IE_ASSERT(p.second + output->desc().dim(p.first) <= input->desc().dim(p.first));
+        }
+
+        //
+        // Check if we need to insert Copy stage
+        //
+
+        bool needCopy = false;
+        if (output->usage() != DataUsage::Intermediate) {
+            needCopy = true;
+        } else if (output->parentDataEdge() != nullptr) {
+            needCopy = true;
+        } else {
+            //
+            // Check output StridesRequirement.
+            //
+
+            IE_ASSERT(output->checkStrides(output->requiredStrides()));
+            if (!checkStrides(output->desc(), input->strides(), output->requiredStrides())) {
+                needCopy = true;
+            }
+
+            //
+            // Check consumers StridesRequirement.
+            //
+
+            if (!needCopy) {
+                for (const auto& consumerEdge : output->consumerEdges()) {
+                    const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
+
+                    if (consumerInfo.hasInput(consumerEdge)) {
+                        const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
+                        IE_ASSERT(output->checkStrides(consumerStrideReqs));
+
+                        if (!checkStrides(output->desc(), input->strides(), consumerStrideReqs)) {
+                            needCopy = true;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        //
+        // Insert Copy if needed
+        //
+
+        if (needCopy) {
+            auto outputCopy = model->duplicateData(output, "@copy");
+            outputCopy->resetRequiredStrides();
+
+            auto outPortInd = outEdge->portInd();
+
+            model->replaceStageOutput(outEdge, outputCopy);
+
+            auto copyStage = _stageBuilder->addCopyStage(
+                model,
+                formatString("%s@output=%d@copy-for-split", stage->name(), outPortInd),
+                stage->origLayer(),
+                outputCopy,
+                output);
+            if (stage->attrs().has("batchInd")) {
+                copyStage->attrs().set("batchInd", stage->attrs().get<int>("batchInd"));
+            }
+
+            output = outputCopy;
+        }
+
+        //
+        // Add Data<->Data edge
+        //
+
+        model->connectDatas()
+            .parent(input)
+            .child(output)
+            .mode(SharedDataMode::ROI)
+            .order(SharedDataOrder::ParentWritesToChild)
+            .offset(offsetFromInput)
+            .done();
+    }
+}
+
+void SpecialStageProcessor::processConcat(
+        const Model::Ptr& model,
+        const Stage& stage) {
+    auto output = stage->output(0);
+
+    const auto& offsets = stage->attrs().get<std::vector<DimValues>>("offsets");
+    IE_ASSERT(offsets.size() == checked_cast<size_t>(stage->numInputs()));
+
+    for (const auto& inEdge : stage->inputEdges()) {
+        IE_ASSERT(inEdge->portInd() >= 0);
+        IE_ASSERT(checked_cast<size_t>(inEdge->portInd()) < offsets.size());
+
+        auto input = inEdge->input();
+        const auto& offsetFromOutput = offsets[checked_cast<size_t>(inEdge->portInd())];
+
+        IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
+        IE_ASSERT(offsetFromOutput.size() <= checked_cast<size_t>(output->desc().numDims()));
+        for (const auto& p : offsetFromOutput) {
+            IE_ASSERT(output->desc().dimsOrder().hasDim(p.first));
+            IE_ASSERT(p.second + input->desc().dim(p.first) <= output->desc().dim(p.first));
+        }
+
+        //
+        // Check if we need to insert Copy stage
+        //
+
+        bool needCopy = false;
+        bool optionalCopy = false;
+        if (input->usage() != DataUsage::Intermediate) {
+            needCopy = true;
+            optionalCopy = false;
+        } else if (input->parentDataEdge() != nullptr) {
+            needCopy = true;
+            optionalCopy = false;
+        } else {
+            //
+            // Check input StridesRequirement.
+            //
+
+            IE_ASSERT(input->checkStrides(input->requiredStrides()));
+            if (!checkStrides(input->desc(), output->strides(), input->requiredStrides())) {
+                needCopy = true;
+                optionalCopy = false;
+            }
+
+            //
+            // Check consumers StridesRequirement.
+            //
+
+            if (!needCopy) {
+                for (const auto& consumerEdge : input->consumerEdges()) {
+                    const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
+
+                    if (consumerInfo.hasInput(consumerEdge)) {
+                        const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
+                        IE_ASSERT(input->checkStrides(consumerStrideReqs));
+
+                        if (!checkStrides(input->desc(), output->strides(), consumerStrideReqs)) {
+                            needCopy = true;
+                            optionalCopy = false;
+                        }
+                    }
+                }
+            }
+
+            //
+            // Check producer StridesRequirement.
+            //
+
+            if (!needCopy) {
+                if (auto producerEdge = input->producerEdge()) {
+                    const auto& producerInfo = producerEdge->producer()->getDataStridesRequirements();
+
+                    if (producerInfo.hasOutput(producerEdge)) {
+                        const auto& producerStrideReqs = producerInfo.getOutput(producerEdge);
+                        IE_ASSERT(input->checkStrides(producerStrideReqs));
+
+                        if (!checkStrides(input->desc(), output->strides(), producerStrideReqs)) {
+                            needCopy = true;
+                            optionalCopy = false;
+                        }
+                    }
+
+                    if (!needCopy) {
+                        //
+                        // To reduce the size of HW output (still can be optimized).
+                        //
+
+                        if (producerEdge->producer()->category() == StageCategory::HW) {
+                            needCopy = true;
+                            optionalCopy = true;
+                        }
+                    }
+                }
+            }
+        }
+
+        //
+        // Insert Copy if needed
+        //
+
+        if (needCopy) {
+            Data inputCopy;
+            if (input->usage() == DataUsage::Const) {
+                inputCopy = model->addNewData(
+                    input->name() + "@copy",
+                    input->desc());
+            } else {
+                inputCopy = model->duplicateData(
+                    input,
+                    "@copy");
+                inputCopy->resetRequiredStrides();
+            }
+
+            auto copyStage = _stageBuilder->addCopyStage(
+                model,
+                formatString("%s@input=%d@copy-for-concat", stage->name(), inEdge->portInd()),
+                stage->origLayer(),
+                input,
+                inputCopy);
+            copyStage->attrs().set<bool>("optional", optionalCopy);
+            if (stage->attrs().has("batchInd")) {
+                copyStage->attrs().set("batchInd", stage->attrs().get<int>("batchInd"));
+            }
+
+            model->replaceStageInput(inEdge, inputCopy);
+
+            input = inputCopy;
+        }
+
+        //
+        // Add Data<->Data edge
+        //
+
+        model->connectDatas()
+            .parent(output)
+            .child(input)
+            .mode(SharedDataMode::ROI)
+            .order(SharedDataOrder::ChildWritesToParent)
+            .offset(offsetFromOutput)
+            .done();
+    }
+}
+
+
+void SpecialStageProcessor::processReshape(
+        const Model::Ptr& model,
+        const Stage& stage) {
+    auto input = stage->input(0);
+    auto output = stage->output(0);
+
+    IE_ASSERT(input->desc().dimsOrder() == DimsOrder::fromNumDims(input->desc().numDims()));
+    IE_ASSERT(input->checkStrides(StridesRequirement::compact()));
+
+    IE_ASSERT(output->desc().dimsOrder() == DimsOrder::fromNumDims(output->desc().numDims()));
+    IE_ASSERT(output->checkStrides(StridesRequirement::compact()));
+
+    //
+    // Check if we need to insert Copy stage
+    //
+
+    bool needCopy = false;
+    if (input->usage() != DataUsage::Intermediate &&
+        output->usage() != DataUsage::Intermediate) {
+        needCopy = true;
+    } else if (input->parentDataEdge() != nullptr &&
+               output->parentDataEdge() != nullptr) {
+        needCopy = true;
+    }
+
+    //
+    // Insert Copy if needed
+    //
+
+    if (needCopy) {
+        Data inputCopy;
+        if (input->usage() == DataUsage::Const) {
+            inputCopy = model->addNewData(
+                input->name() + "@copy",
+                input->desc());
+        } else {
+            inputCopy = model->duplicateData(
+                input,
+                "@copy");
+        }
+        inputCopy->updateRequiredStrides(StridesRequirement::compact());
+
+        auto copyStage = _stageBuilder->addCopyStage(
+            model,
+            formatString("%s@copy-for-reshape", stage->name()),
+            stage->origLayer(),
+            input,
+            inputCopy);
+        if (stage->attrs().has("batchInd")) {
+            copyStage->attrs().set("batchInd", stage->attrs().get<int>("batchInd"));
+        }
+
+        model->replaceStageInput(stage->inputEdge(0), inputCopy);
+
+        input = inputCopy;
+    }
+
+    //
+    // Add Data<->Data edge
+    //
+
+    if (input->usage() == DataUsage::Intermediate &&
+        input->parentDataEdge() == nullptr) {
+        model->connectDatas()
+            .parent(output)
+            .child(input)
+            .mode(SharedDataMode::Reshape)
+            .order(SharedDataOrder::ChildWritesToParent)
+            .done();
+    } else {
+        IE_ASSERT(output->usage() == DataUsage::Intermediate);
+        IE_ASSERT(output->parentDataEdge() == nullptr);
+
+        model->connectDatas()
+            .parent(input)
+            .child(output)
+            .mode(SharedDataMode::Reshape)
+            .order(SharedDataOrder::ParentWritesToChild)
+            .done();
+    }
+}
+
+void SpecialStageProcessor::processExpand(
+        const Model::Ptr& model,
+        const Stage& stage) {
+    auto input = stage->input(0);
+    auto output = stage->output(0);
+
+    const auto& offset = stage->attrs().get<DimValues>("offset");
+
+    IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
+
+    IE_ASSERT(offset.size() <= checked_cast<size_t>(output->desc().numDims()));
+    for (const auto& p : offset) {
+        IE_ASSERT(output->desc().dimsOrder().hasDim(p.first));
+        IE_ASSERT(p.second + input->desc().dim(p.first) <= output->desc().dim(p.first));
+    }
+
+    //
+    // Check if we need to insert Copy stage
+    //
+
+    bool needCopy = false;
+    bool optionalCopy = false;
+    if (input->usage() != DataUsage::Intermediate) {
+        needCopy = true;
+        optionalCopy = false;
+    } else if (input->parentDataEdge() != nullptr) {
+        needCopy = true;
+        optionalCopy = false;
+    } else {
+        //
+        // Check input StridesRequirement.
+        //
+
+        IE_ASSERT(input->checkStrides(input->requiredStrides()));
+        if (!checkStrides(input->desc(), output->strides(), input->requiredStrides())) {
+            needCopy = true;
+            optionalCopy = false;
+        }
+
+        //
+        // Check consumers StridesRequirement.
+        //
+
+        if (!needCopy) {
+            for (const auto& consumerEdge : input->consumerEdges()) {
+                const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
+
+                if (consumerInfo.hasInput(consumerEdge)) {
+                    const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
+                    IE_ASSERT(input->checkStrides(consumerStrideReqs));
+
+                    if (!checkStrides(input->desc(), output->strides(), consumerStrideReqs)) {
+                        needCopy = true;
+                        optionalCopy = false;
+                    }
+                }
+            }
+        }
+
+        //
+        // Check producer StridesRequirement.
+        //
+
+        if (!needCopy) {
+            if (auto producerEdge = input->producerEdge()) {
+                const auto& producerInfo = producerEdge->producer()->getDataStridesRequirements();
+
+                if (producerInfo.hasOutput(producerEdge)) {
+                    const auto& producerStrideReqs = producerInfo.getOutput(producerEdge);
+                    IE_ASSERT(input->checkStrides(producerStrideReqs));
+
+                    if (!checkStrides(input->desc(), output->strides(), producerStrideReqs)) {
+                        needCopy = true;
+                        optionalCopy = false;
+                    }
+                }
+
+                if (!needCopy) {
+                    //
+                    // To reduce the size of HW output (still can be optimized).
+                    //
+
+                    if (producerEdge->producer()->category() == StageCategory::HW) {
+                        needCopy = true;
+                        optionalCopy = true;
+                    }
+                }
+            }
+        }
+    }
+
+    //
+    // Insert Copy if needed
+    //
+
+    if (needCopy) {
+        Data inputCopy;
+        if (input->usage() == DataUsage::Const) {
+            inputCopy = model->addNewData(
+                input->name() + "@copy",
+                input->desc());
+        } else {
+            inputCopy = model->duplicateData(
+                input,
+                "@copy");
+            inputCopy->resetRequiredStrides();
+        }
+
+        auto copyStage = _stageBuilder->addCopyStage(
+            model,
+            formatString("%s@copy-for-expand", stage->name()),
+            stage->origLayer(),
+            input,
+            inputCopy);
+        copyStage->attrs().set<bool>("optional", optionalCopy);
+        if (stage->attrs().has("batchInd")) {
+            copyStage->attrs().set("batchInd", stage->attrs().get<int>("batchInd"));
+        }
+
+        model->replaceStageInput(stage->inputEdge(0), inputCopy);
+
+        input = inputCopy;
+    }
+
+    //
+    // Add Data<->Data edge
+    //
+
+    model->connectDatas()
+        .parent(output)
+        .child(input)
+        .mode(SharedDataMode::ROI)
+        .order(SharedDataOrder::ChildWritesToParent)
+        .offset(offset)
+        .done();
+}
+
+void SpecialStageProcessor::processShrink(
+        const Model::Ptr& model,
+        const Stage& stage) {
+    auto input = stage->input(0);
+    auto output = stage->output(0);
+
+    const auto& offset = stage->attrs().get<DimValues>("offset");
+
+    IE_ASSERT(input->desc().dimsOrder() == output->desc().dimsOrder());
+
+    IE_ASSERT(offset.size() <= checked_cast<size_t>(input->desc().numDims()));
+    for (const auto& p : offset) {
+        IE_ASSERT(input->desc().dimsOrder().hasDim(p.first));
+        IE_ASSERT(p.second + output->desc().dim(p.first) <= input->desc().dim(p.first));
+    }
+
+    //
+    // Check if we need to insert Copy for output
+    //
+
+    bool needCopy = false;
+    if (output->usage() != DataUsage::Intermediate) {
+        needCopy = true;
+    } else if (output->parentDataEdge() != nullptr) {
+        needCopy = true;
+    } else {
+        //
+        // Check output StridesRequirement.
+        //
+
+        IE_ASSERT(output->checkStrides(output->requiredStrides()));
+        if (!checkStrides(output->desc(), input->strides(), output->requiredStrides())) {
+            needCopy = true;
+        }
+
+        //
+        // Check consumers StridesRequirement.
+        //
+
+        if (!needCopy) {
+            for (const auto& consumerEdge : output->consumerEdges()) {
+                const auto& consumerInfo = consumerEdge->consumer()->getDataStridesRequirements();
+
+                if (consumerInfo.hasInput(consumerEdge)) {
+                    const auto& consumerStrideReqs = consumerInfo.getInput(consumerEdge);
+                    IE_ASSERT(output->checkStrides(consumerStrideReqs));
+
+                    if (!checkStrides(output->desc(), input->strides(), consumerStrideReqs)) {
+                        needCopy = true;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    //
+    // Insert output Copy if needed
+    //
+
+    if (needCopy) {
+        auto outputCopy = model->duplicateData(
+            output,
+            "@copy");
+        outputCopy->resetRequiredStrides();
+
+        model->replaceStageOutput(stage->outputEdge(0), outputCopy);
+
+        auto copyStage = _stageBuilder->addCopyStage(
+            model,
+            formatString("%s@copy-output-for-shrink", stage->name()),
+            stage->origLayer(),
+            outputCopy,
+            output);
+        if (stage->attrs().has("batchInd")) {
+            copyStage->attrs().set("batchInd", stage->attrs().get<int>("batchInd"));
+        }
+
+        output = outputCopy;
+    }
+
+    //
+    // Add Data<->Data edge
+    //
+
+    model->connectDatas()
+        .parent(input)
+        .child(output)
+        .mode(SharedDataMode::ROI)
+        .order(SharedDataOrder::ParentWritesToChild)
+        .offset(offset)
+        .done();
+}
+
+}  // namespace vpu
index 7ab755e..7dfc3eb 100644 (file)
@@ -20,41 +20,36 @@ private:
         return std::make_shared<ArgMaxStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto has_axis = attrs().get<bool>("has_axis");
         if (has_axis) {
-            _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+            orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
         } else {
             // axis<0 requires flatten so only NCHW layout is supported
-            _orderInfo.setInput(_inputEdges[0], DimsOrder::fromNumDims(input->desc().numDims()));
-            _orderInfo.setOutput(_outputEdges[0], DimsOrder::fromNumDims(output->desc().numDims()));
+            orderInfo.setInput(inputEdge(0), DimsOrder::fromNumDims(input->desc().numDims()));
+            orderInfo.setOutput(outputEdge(0), DimsOrder::fromNumDims(output->desc().numDims()));
         }
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto out_max_val = attrs().get<int32_t>("out_max_val");
         auto top_k = attrs().get<int32_t>("top_k");
@@ -73,11 +68,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
index 6c29dc6..ff9dc6d 100644 (file)
@@ -40,20 +40,18 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
             auto inputScale = inputScales[0];
 
-            _scaleInfo.setInput(_inputEdges[1], inputScale);
-            _scaleInfo.setOutput(_outputEdges[0], inputScale);
+            scaleInfo.setInput(inputEdge(1), inputScale);
+            scaleInfo.setOutput(outputEdge(0), inputScale);
         } else {
             // Bias can only propagate scaling, not generate.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setInput(_inputEdges[1], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setInput(inputEdge(1), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
index efc0143..11cb5c6 100644 (file)
@@ -23,21 +23,19 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
             auto inputScale = inputScales[0];
 
-            _scaleInfo.setOutput(_outputEdges[0], inputScale);
+            scaleInfo.setOutput(outputEdge(0), inputScale);
 
             attrs().get<float>("min_value") *= inputScale;
             attrs().get<float>("max_value") *= inputScale;
         } else {
             // Clamp can only propagate scaling, not generate.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
index 993ab8b..dab1d97 100644 (file)
@@ -27,48 +27,43 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(!_inputEdges.empty());
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto output = _outputEdges[0]->output();
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
+        auto output = outputEdge(0)->output();
 
         if (step == ScalePropagationStep::Propagate) {
             // Keep the largest input scale factor.
             auto maxScale = std::numeric_limits<float>::lowest();
-            for (const auto& inEdge : _inputEdges) {
+            for (const auto& inEdge : inputEdges()) {
                 maxScale = std::max(maxScale, inputScales[inEdge->portInd()]);
             }
 
             IE_ASSERT(maxScale > 0.0f);
 
-            for (const auto& inEdge : _inputEdges) {
+            for (const auto& inEdge : inputEdges()) {
                 auto curScale = inputScales[inEdge->portInd()];
 
                 if (!isFloatEqual(curScale, maxScale)) {
-                    _scaleInfo.setInput(inEdge, maxScale / curScale);
+                    scaleInfo.setInput(inEdge, maxScale / curScale);
                 }
             }
 
-            _scaleInfo.setOutput(_outputEdges[0], maxScale);
+            scaleInfo.setOutput(outputEdge(0), maxScale);
         } else {
             // Concat can only propagate scaling.
-            for (const auto& inEdge : _inputEdges) {
-                _scaleInfo.setInput(inEdge, 1.0f);
+            for (const auto& inEdge : inputEdges()) {
+                scaleInfo.setInput(inEdge, 1.0f);
             }
 
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(!_inputEdges.empty());
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto output = outputEdge(0)->output();
 
         DimsOrderMap<int> dimsOrderVotes;
-        for (const auto& inEdge : _inputEdges) {
+        for (const auto& inEdge : inputEdges()) {
             dimsOrderVotes[inEdge->input()->desc().dimsOrder()]++;
         }
 
@@ -96,18 +91,15 @@ protected:
         IE_ASSERT(finalOrder.numDims() > 0);
         IE_ASSERT(curVotes > 0);
 
-        for (const auto& inEdge : _inputEdges) {
-            _orderInfo.setInput(inEdge, finalOrder);
+        for (const auto& inEdge : inputEdges()) {
+            orderInfo.setInput(inEdge, finalOrder);
         }
 
-        _orderInfo.setOutput(_outputEdges[0], finalOrder);
+        orderInfo.setOutput(outputEdge(0), finalOrder);
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(!_inputEdges.empty());
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto output = _outputEdges[0]->output();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto output = outputEdge(0)->output();
 
         auto dimsOrder = output->desc().dimsOrder();
 
@@ -117,7 +109,7 @@ protected:
 
         auto minConcatDimInd = dimsOrder.numDims() - 1;
 
-        for (const auto& inEdge : _inputEdges) {
+        for (const auto& inEdge : inputEdges()) {
             auto input = inEdge->input();
 
             for (const auto& p : output->desc().dims()) {
@@ -144,7 +136,7 @@ protected:
         // Merge input StridesRequirement.
         //
 
-        for (const auto& inEdge : _inputEdges) {
+        for (const auto& inEdge : inputEdges()) {
             auto curInput = inEdge->input();
             auto curInputReqs = curInput->requiredStrides();
 
@@ -183,19 +175,24 @@ protected:
         // Return merged StridesRequirement.
         //
 
-        for (const auto& inEdge : _inputEdges) {
-            _stridesInfo.setInput(inEdge, inputReqs);
+        for (const auto& inEdge : inputEdges()) {
+            stridesInfo.setInput(inEdge, inputReqs);
         }
-        _stridesInfo.setOutput(_outputEdges[0], outputReqs);
+        stridesInfo.setOutput(outputEdge(0), outputReqs);
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        IE_ASSERT(numInputs() > 0);
+        IE_ASSERT(numOutputs() == 1);
+
+        const auto& firstInputPrecision = input(0)->desc().type();
+        assertAllInputsOutputsTypes(this, {firstInputPrecision}, {firstInputPrecision});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
index 1698658..5a6d0b6 100644 (file)
@@ -31,8 +31,10 @@ void FrontEnd::parseConvolution(
     auto input = inputs[0];
     auto output = outputs[0];
 
-    if (!(input->desc().numDims() == 3 || input->desc().numDims() == 4)) {
-        VPU_THROW_EXCEPTION << "Convolution supports only 3D or 4D input";
+    bool is3D = input->desc().numDims() > 4;  // i.e. == 5
+
+    if (input->desc().numDims() < 3 || input->desc().numDims() > 5) {
+        VPU_THROW_EXCEPTION << "Convolution supports only 3D or 4D or 5D input";
     }
     if (output->desc().numDims() != input->desc().numDims()) {
         VPU_THROW_EXCEPTION << "Convolution supports only same num dims in input and output";
@@ -47,18 +49,23 @@ void FrontEnd::parseConvolution(
 
     int kernelSizeX = convLayer->_kernel_x;
     int kernelSizeY = convLayer->_kernel_y;
+    int kernelSizeZ = is3D ? convLayer->_kernel.at(ie::Z_AXIS) : 1;
 
     int kernelStrideX = convLayer->_stride_x;
     int kernelStrideY = convLayer->_stride_y;
+    int kernelStrideZ = is3D ? convLayer->_stride.at(ie::Z_AXIS) : 1;
 
     auto paddings = getPaddings(*convLayer);
     int padLeft = paddings.begin.exist(ie::X_AXIS) ? paddings.begin[ie::X_AXIS] : 0;
     int padRight = paddings.end.exist(ie::X_AXIS) ? paddings.end[ie::X_AXIS] : padLeft;
     int padTop = paddings.begin.exist(ie::Y_AXIS) ? paddings.begin[ie::Y_AXIS] : 0;
     int padBottom = paddings.end.exist(ie::Y_AXIS) ? paddings.end[ie::Y_AXIS] : padTop;
+    int padFront = paddings.begin.exist(ie::Z_AXIS) ? paddings.begin[ie::Z_AXIS] : 0;
+    int padBack = paddings.end.exist(ie::Z_AXIS) ? paddings.end[ie::Z_AXIS] : padFront;
 
     int dilationX = convLayer->_dilation_x;
     int dilationY = convLayer->_dilation_y;
+    int dilationZ = is3D ? convLayer->_dilation.at(ie::Z_AXIS) : 1;
 
     int groupSize = convLayer->_group;
 
@@ -73,11 +80,11 @@ void FrontEnd::parseConvolution(
     }
 
     // TODO: support dilated convolution
-    if ((dilationX != 1 || dilationY != 1) && (!env.config.hwDilation)) {
+    if ((dilationX != 1 || dilationY != 1 || dilationZ != 1) && (!env.config.hwDilation)) {
         tryHW = false;
     }
 
-    if (kernelSizeX > 15 || kernelSizeY > 15 || kernelStrideX > 8) {
+    if (kernelSizeX > 15 || kernelSizeY > 15 || kernelSizeZ > 1 || kernelStrideX > 8) {
         tryHW = false;
     }
 
@@ -85,7 +92,7 @@ void FrontEnd::parseConvolution(
         tryHW = false;
     }
 
-    if (output->desc().numDims() < 4) {
+    if (output->desc().numDims() < 4 || is3D) {
         tryHW = false;
     }
 
@@ -97,15 +104,25 @@ void FrontEnd::parseConvolution(
     std::tie(weights, biases) = getWeightsAndBiases(model, layer);
 
     IE_ASSERT(weights->desc().totalDimSize() >=
-              kernelSizeX * kernelSizeY * (input->desc().dim(Dim::C) / groupSize) * output->desc().dim(Dim::C));
-    weights = model->duplicateData(
-        weights,
-        "@conv",
+              kernelSizeX * kernelSizeY * kernelSizeZ * (input->desc().dim(Dim::C) / groupSize) * output->desc().dim(Dim::C));
+
+    auto weightsDesc = is3D ?
         DataDesc({
             kernelSizeX,
             kernelSizeY,
+            kernelSizeZ,
             input->desc().dim(Dim::C) / groupSize,
-            output->desc().dim(Dim::C)}));
+            output->desc().dim(Dim::C)}) :
+        DataDesc({
+            kernelSizeX,
+            kernelSizeY,
+            input->desc().dim(Dim::C) / groupSize,
+            output->desc().dim(Dim::C)});
+
+    weights = model->duplicateData(
+        weights,
+        "@conv",
+        weightsDesc);
 
     if (biases->usage() != DataUsage::Fake) {
         IE_ASSERT(biases->desc().totalDimSize() >= output->desc().dim(Dim::C));
@@ -140,6 +157,14 @@ void FrontEnd::parseConvolution(
     stage->attrs().set<int>("dilationX", dilationX);
     stage->attrs().set<int>("dilationY", dilationY);
 
+    if (is3D) {
+        stage->attrs().set<int>("kernelSizeZ", kernelSizeZ);
+        stage->attrs().set<int>("kernelStrideZ", kernelStrideZ);
+        stage->attrs().set<int>("padFront", padFront);
+        stage->attrs().set<int>("padBack", padBack);
+        stage->attrs().set<int>("dilationZ", dilationZ);
+    }
+
     stage->attrs().set<int>("groupSize", groupSize);
 
     stage->attrs().set<bool>("tryHW", tryHW);
index df9fdc3..e75175a 100644 (file)
@@ -33,58 +33,48 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
-            _scaleInfo.setOutput(_outputEdges[0], inputScales[0]);
+            scaleInfo.setOutput(outputEdge(0), inputScales[0]);
         } else {
             // Copy can only propagate scaling.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement().remove(0));
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement().remove(0));
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement().remove(0));
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement().remove(0));
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::NotNeeded;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         if (input->desc().dimsOrder() == DimsOrder::NC) {
             if (!input->checkStrides(StridesRequirement().add(0, DimStride::Compact)) ||
index 030d1d7..04d8d67 100644 (file)
@@ -19,62 +19,54 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() >= 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
             auto inputScale = inputScales[0];
-            _scaleInfo.setOutput(_outputEdges[0], inputScale);
+            scaleInfo.setOutput(outputEdge(0), inputScale);
         } else {
             // Crop can only propagate scaling, not generate.
 
-            for (const auto& inEdge : _inputEdges) {
-                _scaleInfo.setInput(inEdge, 1.0f);
+            for (const auto& inEdge : inputEdges()) {
+                scaleInfo.setInput(inEdge, 1.0f);
             }
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() >= 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
         auto inOrder = input->desc().dimsOrder();
 
         // HWC only
-        _orderInfo.setInput(_inputEdges[0], inOrder.createMovedDim(Dim::C, 0));
-        _orderInfo.setOutput(_outputEdges[0], inOrder.createMovedDim(Dim::C, 0));
+        orderInfo.setInput(inputEdge(0), inOrder.createMovedDim(Dim::C, 0));
+        orderInfo.setOutput(outputEdge(0), inOrder.createMovedDim(Dim::C, 0));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() >= 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() >= 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        for (const auto& inEdge : _inputEdges) {
-            _batchInfo.setInput(inEdge, BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        for (const auto& inEdge : inputEdges()) {
+            batchInfo.setInput(inEdge, BatchSupport::Split);
         }
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::NotNeeded;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        IE_ASSERT(numInputs() == 1 || numInputs() == 2);
+        IE_ASSERT(numOutputs() == 1);
+        assertAllInputsOutputsTypes(this, DataType::FP16, DataType::FP16);
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -86,12 +78,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() >= 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
@@ -124,12 +112,6 @@ void FrontEnd::parseCrop(
             << "] has invalid axis value. Expected: 0 <= axis < 4, Actual: " << cropAxis;
     }
 
-    if (cropAxis == 0) {
-        VPU_THROW_EXCEPTION
-            << "Layer " << layer->name << " [" << layer->type
-            << "] Can't crop batch channel";
-    }
-
     auto stage = model->addNewStage<CropStage>(
         layer->name,
         StageType::Crop,
index ca9ccf9..026105e 100644 (file)
@@ -18,54 +18,42 @@ private:
         return std::make_shared<CTCDecoderStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto cInd = input->desc().dimsOrder().dimInd(Dim::C);
-        _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, cInd));
+        orderInfo.setOutput(outputEdge(0), output->desc().dimsOrder().createMovedDim(Dim::C, cInd));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::OnlyOne;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         input0->serializeOldBuffer(handle_from_this(), serializer);
         input1->serializeOldBuffer(handle_from_this(), serializer);
index 89ac460..dd4490f 100644 (file)
@@ -41,66 +41,67 @@ private:
         return std::make_shared<CustomStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
         const auto& inputOrders = attrs().get<std::map<int, DimsOrder>>("inputOrders");
         const auto& outputOrders = attrs().get<std::map<int, DimsOrder>>("outputOrders");
 
-        for (const auto& inEdge : _inputEdges) {
+        for (const auto& inEdge : inputEdges()) {
             // last input is always OpenCL binary, so use it as is.
-            if (inEdge->portInd() == _inputEdges.size() - 1) {
+            if (inEdge->portInd() == numInputs() - 1) {
                 break;
             }
 
             auto it = inputOrders.find(inEdge->portInd());
             if (it != inputOrders.end()) {
                 auto requiredOrder = it->second;
-                _orderInfo.setInput(inEdge, requiredOrder);
+                orderInfo.setInput(inEdge, requiredOrder);
             }
         }
 
-        for (const auto& outEdge : _outputEdges) {
+        for (const auto& outEdge : outputEdges()) {
             auto it = outputOrders.find(outEdge->portInd());
             if (it != outputOrders.end()) {
                 auto requiredOrder = it->second;
-                _orderInfo.setOutput(outEdge, requiredOrder);
+                orderInfo.setOutput(outEdge, requiredOrder);
             }
         }
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        for (const auto& inEdge : _inputEdges) {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        for (const auto& inEdge : inputEdges()) {
             // last input is always OpenCL binary, so use it as is.
-            if (inEdge->portInd() == _inputEdges.size() - 1) {
+            if (inEdge->portInd() == numInputs() - 1) {
                 break;
             }
 
-            _stridesInfo.setInput(inEdge, StridesRequirement::compact());
+            stridesInfo.setInput(inEdge, StridesRequirement::compact());
         }
-        for (const auto& outEdge : _outputEdges) {
-            _stridesInfo.setOutput(outEdge, StridesRequirement::compact());
+        for (const auto& outEdge : outputEdges()) {
+            stridesInfo.setOutput(outEdge, StridesRequirement::compact());
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        for (const auto& inEdge : _inputEdges) {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        std::vector<CustomDataFormat> formats = attrs().get<std::vector<CustomDataFormat>>("formats");
+
+        for (const auto& inEdge : inputEdges()) {
+            IE_ASSERT(inEdge->portInd() < formats.size());
+
             // last input is always OpenCL binary, so use it as is.
-            if (inEdge->portInd() == _inputEdges.size() - 1) {
+            if ((inEdge->portInd() == numInputs() - 1) || (formats[inEdge->portInd()] == CustomDataFormat::Any)) {
                 break;
             }
 
-            _batchInfo.setInput(inEdge, BatchSupport::Split);
+            batchInfo.setInput(inEdge, BatchSupport::Split);
         }
-        for (const auto& outEdge : _outputEdges) {
-            _batchInfo.setOutput(outEdge, BatchSupport::Split);
+        for (const auto& outEdge : outputEdges()) {
+            batchInfo.setOutput(outEdge, BatchSupport::Split);
         }
     }
 
-    void finalCheckImpl() const override {
-    }
-
     void serializeParamsImpl(BlobSerializer& serializer) const override {
         const auto& customLayer = attrs().get<CustomLayer::Ptr>("customLayer");
         const auto& gws = attrs().get<SmallVector<int, 3>>("gws");
@@ -136,7 +137,7 @@ private:
         // Total number of blobs
         //
 
-        serializer.append(static_cast<int32_t>(_inputEdges.size() + _outputEdges.size()));
+        serializer.append(static_cast<int32_t>(numInputs() + numOutputs()));
 
         //
         // Number of kernel parameters
@@ -200,15 +201,26 @@ private:
                             auto blob = parameter.irSource.substr(0, pos);
                             auto dim = parameter.irSource.substr(pos + 1, std::string::npos);
 
+                            IE_ASSERT(dim.length() == 1)
+                                    << "Unable to deduce parameter " << parameter.argName << " for "
+                                    << _origLayer->type <<" layer. Name is: " << _origLayer->name;
+                            char dimLetter = dim[0];
+
                             ie::DataPtr origData;
                             if (blob == "I") {
                                 origData = _origLayer->insData[parameter.portIndex].lock();
                             } else {
-                                origData = _origLayer->outData[0];
+                                origData = _origLayer->outData[parameter.portIndex];
                             }
                             IE_ASSERT(origData != nullptr);
 
                             auto dims = origData->getDims();
+                            int ndims = dims.size();
+
+                            if (ndims > 4)
+                                VPU_THROW_EXCEPTION
+                                    << "Unable to deduce parameter " << parameter.argName << " for "
+                                    << _origLayer->type <<" layer. Name is: " << _origLayer->name;
 
                             const std::map<char, int> vars = {
                                 { 'b', 0 }, { 'B', 0 },
@@ -217,8 +229,9 @@ private:
                                 { 'x', 3 }, { 'X', 3 },
                             };
 
-                            if (vars.find(dim[0]) != vars.end()) {
-                                auto res = dims.at(vars.at(dim[0]));
+                            auto var = vars.find(dimLetter);
+                            if (var != vars.end()) {
+                                auto res = dims.at(var->second-4+ndims);
 
                                 serializer.append(static_cast<uint32_t>(res));
                                 serializer.append(static_cast<int32_t>(-1));
@@ -258,15 +271,19 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_tempBufferEdges.empty());
+        IE_ASSERT(numTempBuffers() == 1);
 
-        for (const auto& inEdge : _inputEdges) {
+        for (const auto& inEdge : inputEdges()) {
             inEdge->input()->serializeOldBuffer(handle_from_this(), serializer);
         }
 
-        for (const auto& outEdge : _outputEdges) {
+        for (const auto& outEdge : outputEdges()) {
             outEdge->output()->serializeOldBuffer(handle_from_this(), serializer);
         }
+
+        for (const auto& tempEdge : tempBufferEdges()) {
+            tempEdge->tempBuffer()->serializeOldBuffer(handle_from_this(), serializer);
+        }
     }
 };
 
@@ -362,15 +379,18 @@ void FrontEnd::parseCustom(
         auto customLayer = customLayersForType[stage_num];
 
         std::map<std::string, int> ports;
+        std::vector<CustomDataFormat> formats;
 
         // Gather inputs
         DataVector stageInputs;
         for (auto& param : customLayer->bindings()) {
             if (param.type == CustomParamType::Input) {
                 ports[param.argName] = stageInputs.size();
+                formats.emplace_back(param.format);
                 stageInputs.emplace_back(inputs[param.portIndex]);
             } else if (param.type == CustomParamType::InputBuffer) {
                 ports[param.argName] = stageInputs.size();
+                formats.emplace_back(CustomDataFormat::BFYX);
                 stageInputs.emplace_back(tempBuffsMap[param.portIndex]);
             }
         }
@@ -386,12 +406,14 @@ void FrontEnd::parseCustom(
                         DataDesc({origBlob->size()}),
                         ieBlobContent(origBlob));
                     ports[param.argName] = stageInputs.size();
+                    formats.emplace_back(param.format);
                     stageInputs.emplace_back(std::move(customBlob));
                 }
             }
         }
 
         customLayer->setStageNumInputs(stageInputs.size());
+        formats.emplace_back(CustomDataFormat::Any);
 
         // Get kernel binary
         auto kernelNode = kernelNodes.find(customLayer->kernelBinary());
@@ -429,6 +451,7 @@ void FrontEnd::parseCustom(
 
         stage->attrs().set("customLayer", customLayer);
         stage->attrs().set("ports", ports);
+        stage->attrs().set("formats", formats);
 
         SmallVector<int, 3> gws;
         SmallVector<int, 3> lws;
@@ -447,25 +470,27 @@ void FrontEnd::parseCustom(
             b2b[kp.argName] = kp;
         }
 
-        const std::map<CustomDataFormat, DimsOrder> formats = {
+        const std::map<CustomDataFormat, DimsOrder> formatsMap = {
             { CustomDataFormat::BYXF, DimsOrder::NHWC },
-            { CustomDataFormat::BFYX, DimsOrder::NCHW }
+            { CustomDataFormat::BFYX, DimsOrder::NCHW },
+            { CustomDataFormat::YXF, DimsOrder::HWC },
+            { CustomDataFormat::FYX, DimsOrder::CHW }
         };
 
         for (const auto& kp : customLayer->parameters()) {
             const auto& parameter = b2b[kp];
 
             if (parameter.type == CustomParamType::Input) {
-                auto it = formats.find(parameter.format);
-                if (it != formats.end()) {
+                auto it = formatsMap.find(parameter.format);
+                if (it != formatsMap.end()) {
                     auto requiredOrder = it->second;
                     inputOrders[parameter.portIndex] = requiredOrder;
                 }
             }
 
             if (parameter.type == CustomParamType::Output) {
-                auto it = formats.find(parameter.format);
-                if (it != formats.end()) {
+                auto it = formatsMap.find(parameter.format);
+                if (it != formatsMap.end()) {
                     auto requiredOrder = it->second;
                     outputOrders[parameter.portIndex] = requiredOrder;
                 }
@@ -474,6 +499,11 @@ void FrontEnd::parseCustom(
 
         stage->attrs().set("inputOrders", std::move(inputOrders));
         stage->attrs().set("outputOrders", std::move(outputOrders));
+
+        int buffer_size = customLayer->kernelBinary().length() + 1024;
+        model->addTempBuffer(
+            stage,
+            DataDesc({buffer_size}));
     }
 }
 
index afbf79d..7e3e021 100644 (file)
@@ -95,28 +95,28 @@ private:
         return std::make_shared<DetectionOutputStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3 || _inputEdges.size() == 5);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        for (const auto& inEdge : _inputEdges) {
-            _stridesInfo.setInput(inEdge, StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        for (const auto& inEdge : inputEdges()) {
+            stridesInfo.setInput(inEdge, StridesRequirement::compact());
         }
-        for (const auto& outEdge : _outputEdges) {
-            _stridesInfo.setOutput(outEdge, StridesRequirement::compact());
+        for (const auto& outEdge : outputEdges()) {
+            stridesInfo.setOutput(outEdge, StridesRequirement::compact());
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        IE_ASSERT(numInputs() == 3 || numInputs() == 5);
+        IE_ASSERT(numOutputs() == 1);
+        assertAllInputsOutputsTypes(this, DataType::FP16, DataType::FP16);
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -126,25 +126,21 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3 || _inputEdges.size() == 5);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.size() == 1);
-
-        auto loc = _inputEdges[0]->input();
-        auto conf = _inputEdges[1]->input();
-        auto priors = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
+        auto loc = inputEdge(0)->input();
+        auto conf = inputEdge(1)->input();
+        auto priors = inputEdge(2)->input();
+        auto output = outputEdge(0)->output();
 
         loc->serializeNewBuffer(serializer);
         conf->serializeNewBuffer(serializer);
         priors->serializeNewBuffer(serializer);
-        if (_inputEdges.size() == 5) {
-            _inputEdges[3]->input()->serializeNewBuffer(serializer);
-            _inputEdges[4]->input()->serializeNewBuffer(serializer);
+        if (numInputs() == 5) {
+            inputEdge(3)->input()->serializeNewBuffer(serializer);
+            inputEdge(4)->input()->serializeNewBuffer(serializer);
         }
         output->serializeNewBuffer(serializer);
 
-        _tempBufferEdges[0]->tempBuffer()->serializeNewBuffer(serializer);
+        tempBuffer(0)->serializeNewBuffer(serializer);
     }
 };
 
index c4094fb..71494a3 100644 (file)
@@ -72,7 +72,6 @@ const std::map<ie::EltwiseLayer::eOperation, std::function<StageType(ie::Eltwise
         MAP_ELEMENTS(Logical_XOR,   moreThanOneInput),
         MAP_ELEMENTS(Pow,           onlyTwoInputs),
         MAP_ELEMENTS(Floor_mod,     onlyTwoInputs),
-        MAP_ELEMENTS(Select,        onlyThreeInputs),
 };
 
 class EltwiseStage final : public StageNode {
@@ -83,17 +82,15 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto output = _outputEdges[0]->output();
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
+        auto output = outputEdge(0)->output();
 
         if (_type != StageType::Prod &&
             step == ScalePropagationStep::Propagate) {
             // Keep the largest input scale factor.
             auto maxScale = std::numeric_limits<float>::lowest();
-            for (const auto& inEdge : _inputEdges) {
+            for (const auto& inEdge : inputEdges()) {
                 if (inEdge->input()->usage() == DataUsage::Fake) {
                     continue;
                 }
@@ -101,7 +98,7 @@ private:
                 maxScale = std::max(maxScale, inputScales[inEdge->portInd()]);
             }
 
-            for (const auto& inEdge : _inputEdges) {
+            for (const auto& inEdge : inputEdges()) {
                 if (inEdge->input()->usage() == DataUsage::Fake) {
                     continue;
                 }
@@ -109,29 +106,26 @@ private:
                 auto curScale = inputScales[inEdge->portInd()];
 
                 if (!isFloatEqual(curScale, maxScale)) {
-                    _scaleInfo.setInput(inEdge, maxScale / curScale);
+                    scaleInfo.setInput(inEdge, maxScale / curScale);
                 }
             }
 
-            _scaleInfo.setOutput(_outputEdges[0], maxScale);
+            scaleInfo.setOutput(outputEdge(0), maxScale);
         } else {
             // Eltwise can only propagate scaling for Sum and Max cases.
-            for (const auto& inEdge : _inputEdges) {
-                _scaleInfo.setInput(inEdge, 1.0f);
+            for (const auto& inEdge : inputEdges()) {
+                scaleInfo.setInput(inEdge, 1.0f);
             }
 
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto input2 = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
+        auto input2 = inputEdge(2)->input();
+        auto output = outputEdge(0)->output();
 
         auto in0Desc = input0->desc();
         auto in1Desc = input1->desc();
@@ -159,26 +153,27 @@ private:
             finalOrder = outDesc.dimsOrder();
         }
 
-        _orderInfo.setInput(_inputEdges[0], finalOrder.numDims() == in0Desc.numDims() ? finalOrder : in0Desc.dimsOrder());
-        _orderInfo.setInput(_inputEdges[1], finalOrder.numDims() == in1Desc.numDims() ? finalOrder : in1Desc.dimsOrder());
-        _orderInfo.setInput(_inputEdges[2], finalOrder.numDims() == in2Desc.numDims() ? finalOrder : in2Desc.dimsOrder());
-        _orderInfo.setOutput(_outputEdges[0], finalOrder);
+        orderInfo.setInput(inputEdge(0), finalOrder.numDims() == in0Desc.numDims() ? finalOrder : in0Desc.dimsOrder());
+        orderInfo.setInput(inputEdge(1), finalOrder.numDims() == in1Desc.numDims() ? finalOrder : in1Desc.dimsOrder());
+        orderInfo.setInput(inputEdge(2), finalOrder.numDims() == in2Desc.numDims() ? finalOrder : in2Desc.dimsOrder());
+        orderInfo.setOutput(outputEdge(0), finalOrder);
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::CanBeLimited;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -198,14 +193,10 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto input2 = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
+        auto input2 = inputEdge(2)->input();
+        auto output = outputEdge(0)->output();
 
         input0->serializeNewBuffer(serializer, output->desc().dimsOrder());
         output->serializeNewBuffer(serializer);
@@ -255,7 +246,7 @@ void FrontEnd::parseEltwise(
     auto output = outputs[0];
 
     auto tempOutput = output;
-    if ((stageType != StageType::Select) && (inputs.size() > 2)) {
+    if (inputs.size() > 2) {
         tempOutput = model->duplicateData(
             output,
             formatString("@temp@1/%d", inputs.size() - 2));
@@ -269,10 +260,7 @@ void FrontEnd::parseEltwise(
     else
         tempInputs[1] = inputs[1];
 
-    if (stageType == StageType::Select)
-        tempInputs[2] = inputs[2];
-    else
-        tempInputs[2] = model->addFakeData();
+    tempInputs[2] = model->addFakeData();
 
     auto stage = model->addNewStage<EltwiseStage>(
         layer->name,
@@ -298,33 +286,51 @@ void FrontEnd::parseEltwise(
     stage->attrs().set<float>("min_value", 0.0f);
     stage->attrs().set<float>("max_value", 1.0f);
 
-    if (stageType != StageType::Select) {
-        tempInputs[0] = tempOutput;
-        for (int ind = 2; ind < inputs.size(); ++ind) {
-            tempInputs[1] = inputs[ind];
-
-            if (ind + 1 == inputs.size()) {
-                tempOutput = output;
-            } else {
-                tempOutput = model->duplicateData(
-                    output,
-                    formatString("@temp@%d/%d", ind, inputs.size() - 2));
-            }
+    tempInputs[0] = tempOutput;
+    for (int ind = 2; ind < inputs.size(); ++ind) {
+        tempInputs[1] = inputs[ind];
 
-            stage = model->addNewStage<EltwiseStage>(
-                layer->name + "@" + std::to_string(ind - 1),
-                stageType,
-                layer,
-                tempInputs,
-                {tempOutput});
+        if (ind + 1 == inputs.size()) {
+            tempOutput = output;
+        } else {
+            tempOutput = model->duplicateData(
+                output,
+                formatString("@temp@%d/%d", ind, inputs.size() - 2));
+        }
 
-            if (layer->coeff.size() > ind) {
-                stage->attrs().set<float>("coeff2", layer->coeff[ind]);
-            }
+        stage = model->addNewStage<EltwiseStage>(
+            layer->name + "@" + std::to_string(ind - 1),
+            stageType,
+            layer,
+            tempInputs,
+            {tempOutput});
 
-            tempInputs[0] = tempOutput;
+        if (layer->coeff.size() > ind) {
+            stage->attrs().set<float>("coeff2", layer->coeff[ind]);
         }
+
+        tempInputs[0] = tempOutput;
+    }
+}
+
+void FrontEnd::parseSelect(
+        const Model::Ptr& model,
+        const ie::CNNLayerPtr& _layer,
+        const DataVector& inputs,
+        const DataVector& outputs) {
+    auto layer = std::dynamic_pointer_cast<ie::SelectLayer>(_layer);
+    IE_ASSERT(layer != nullptr);
+
+    if (inputs.size() != 3) {
+        VPU_THROW_EXCEPTION << "Select supports only three inputs";
     }
+
+    auto stage = model->addNewStage<EltwiseStage>(
+        layer->name,
+        StageType::Select,
+        layer,
+        inputs,
+        outputs);
 }
 
 Stage StageBuilder::addSumStage(
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/exp.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/exp.cpp
new file mode 100644 (file)
index 0000000..9204767
--- /dev/null
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vector>
+#include <memory>
+#include <set>
+
+#include <vpu/sw/post_op_stage.hpp>
+
+namespace vpu {
+
+namespace {
+
+class ExpStage final : public PostOpStage {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<ExpStage>(*this);
+    }
+
+    void serializeParamsImpl(BlobSerializer&) const override {
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseExp(
+        const Model::Ptr& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) {
+    IE_ASSERT(inputs.size() == 1);
+    IE_ASSERT(outputs.size() == 1);
+
+    model->addNewStage<ExpStage>(
+        layer->name,
+        StageType::Exp,
+        layer,
+        inputs,
+        outputs);
+}
+
+}  // namespace vpu
+
@@ -17,49 +17,44 @@ namespace vpu {
 
 namespace {
 
-class BroadcastStage final : public StageNode {
+class ExpandStage final : public StageNode {
 protected:
     StagePtr cloneImpl() const override {
-        return std::make_shared<BroadcastStage>(*this);
+        return std::make_shared<ExpandStage>(*this);
     }
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto dimsOrder = output->desc().dimsOrder();
 
         //
-        // Get smallest Dim over which Broadcast is done.
+        // Get smallest Dim over which Expand is done.
         //
 
-        auto minBroadcastDimInd = dimsOrder.numDims();
+        auto minExpandDimInd = dimsOrder.numDims();
 
         for (const auto& p : output->desc().dims()) {
             if (input->desc().dim(p.first) != p.second) {
-                minBroadcastDimInd = std::min(minBroadcastDimInd, dimsOrder.dimInd(p.first));
+                minExpandDimInd = std::min(minExpandDimInd, dimsOrder.dimInd(p.first));
             }
         }
 
-        IE_ASSERT(minBroadcastDimInd < dimsOrder.numDims());
+        IE_ASSERT(minExpandDimInd < dimsOrder.numDims());
 
         //
         // Initial StridesRequirement for input and output.
@@ -68,7 +63,7 @@ protected:
         auto outputReqs = output->requiredStrides();
 
         auto inputReqs = outputReqs;
-        for (int i = minBroadcastDimInd + 1; i < dimsOrder.numDims(); ++i) {
+        for (int i = minExpandDimInd + 1; i < dimsOrder.numDims(); ++i) {
             inputReqs.remove(i);
         }
 
@@ -82,7 +77,7 @@ protected:
             if (consumerInfo.hasInput(consumerEdge)) {
                 const auto& consumerReqs = consumerInfo.getInput(consumerEdge);
 
-                for (int i = 0; i < minBroadcastDimInd + 1; ++i) {
+                for (int i = 0; i < minExpandDimInd + 1; ++i) {
                     if (outputReqs.get(i) == DimStride::Any) {
                         if (consumerReqs.get(i) != DimStride::Any) {
                             inputReqs.add(i, consumerReqs.get(i));
@@ -97,17 +92,19 @@ protected:
         // Return merged StridesRequirements.
         //
 
-        _stridesInfo.setInput(_inputEdges[0], inputReqs);
-        _stridesInfo.setOutput(_outputEdges[0], outputReqs);
+        stridesInfo.setInput(inputEdge(0), inputReqs);
+        stridesInfo.setOutput(outputEdge(0), outputReqs);
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        const auto& firstInputPrecision = input(0)->desc().type();
+        assertInputsOutputsTypes(this, {{firstInputPrecision}}, {{firstInputPrecision}});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
@@ -121,16 +118,16 @@ protected:
 
 }  // namespace
 
-Stage StageBuilder::addBroadcastStage(
+Stage StageBuilder::addExpandStage(
         const Model::Ptr& model,
         const std::string& name,
         const ie::CNNLayerPtr& layer,
         const Data& input,
         const Data& output,
         const DimValues& offset) {
-    auto stage = model->addNewStage<BroadcastStage>(
+    auto stage = model->addNewStage<ExpandStage>(
         name,
-        StageType::Broadcast,
+        StageType::Expand,
         layer,
         {input},
         {output});
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/floor.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/floor.cpp
new file mode 100644 (file)
index 0000000..bbc5869
--- /dev/null
@@ -0,0 +1,45 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vector>
+#include <memory>
+#include <set>
+
+#include <vpu/sw/post_op_stage.hpp>
+
+namespace vpu {
+
+namespace {
+
+class FloorStage final : public PostOpStage {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<FloorStage>(*this);
+    }
+
+    void serializeParamsImpl(BlobSerializer&) const override {
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseFloor(
+        const Model::Ptr& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) {
+    IE_ASSERT(inputs.size() == 1);
+    IE_ASSERT(outputs.size() == 1);
+
+    model->addNewStage<FloorStage>(
+            layer->name,
+            StageType::Floor,
+            layer,
+            inputs,
+            outputs);
+}
+
+}  // namespace vpu
index d5dea79..29e704e 100644 (file)
@@ -1,17 +1,5 @@
-//
-// Copyright 2019 Intel Corporation.
-//
-// This software and the related documents are Intel copyrighted materials,
-// and your use of them is governed by the express license under which they
-// were provided to you (End User License Agreement for the Intel(R) Software
-// Development Products (Version May 2017)). Unless the License provides
-// otherwise, you may not use, modify, copy, publish, distribute, disclose or
-// transmit this software or the related documents without Intel's prior
-// written permission.
-//
-// This software and the related documents are provided as is, with no
-// express or implied warranties, other than those that are expressly
-// stated in the License.
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
 
 #include <vpu/frontend/frontend.hpp>
@@ -55,69 +43,60 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        if (step == ScalePropagationStep::Propagate) {
-            _scaleInfo.setOutput(_outputEdges[0], inputScales[0]);
-        } else {
-            // Gather can only propagate scaling.
-            for (const auto& inEdge : _inputEdges) {
-                _scaleInfo.setInput(inEdge, 1.0f);
-            }
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
-        }
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
+         if (step == ScalePropagationStep::Propagate) {
+             scaleInfo.setOutput(outputEdge(0), inputScales[0]);
+         } else {
+             // Gather can only propagate scaling.
+             for (const auto& inEdge : inputEdges()) {
+                 scaleInfo.setInput(inEdge, 1.0f);
+             }
+             scaleInfo.setOutput(outputEdge(0), 1.0f);
+         }
     }
 
-    void propagateDataOrderImpl() const override {
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        for (const auto& inEdge : _inputEdges) {
-            _stridesInfo.setInput(inEdge, StridesRequirement::compact());
-        }
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+         for (const auto& inEdge : inputEdges()) {
+             stridesInfo.setInput(inEdge, StridesRequirement::compact());
+         }
+         stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::NotNeeded;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-
-        auto input = _inputEdges[0]->input();
+         auto input = inputEdge(0)->input();
 
-        auto axis = attrs().get<Dim>("axis");
-        auto axisInd = input->desc().dimsOrder().dimInd(axis);
+         auto axis = attrs().get<Dim>("axis");
+         auto axisInd = input->desc().dimsOrder().dimInd(axis);
 
-        serializer.append(static_cast<int>(axisInd));
+         serializer.append(static_cast<int>(axisInd));
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+         auto input0 = inputEdge(0)->input();
+         auto input1 = inputEdge(1)->input();
+         auto output = outputEdge(0)->output();
 
-        input0->serializeNewBuffer(serializer);
-        output->serializeNewBuffer(serializer);
-        input1->serializeNewBuffer(serializer);
+         input0->serializeNewBuffer(serializer);
+         output->serializeNewBuffer(serializer);
+         input1->serializeNewBuffer(serializer);
     }
 };
 
index d297d01..fdf3c39 100644 (file)
@@ -1,17 +1,5 @@
-//
-// Copyright (C) 2019 Intel Corporation.
-//
-// This software and the related documents are Intel copyrighted materials,
-// and your use of them is governed by the express license under which they
-// were provided to you (End User License Agreement for the Intel(R) Software
-// Development Products (Version May 2017)). Unless the License provides
-// otherwise, you may not use, modify, copy, publish, distribute, disclose or
-// transmit this software or the related documents without Intel's prior
-// written permission.
-//
-// This software and the related documents are provided as is, with no
-// express or implied warranties, other than those that are expressly
-// stated in the License.
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
 
 #include <vpu/frontend/frontend.hpp>
@@ -33,14 +21,11 @@ private:
         return std::make_shared<GEMMStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto inputDimsOrder0 = inputEdge(0)->input()->desc().dimsOrder();
+        auto inputDimsOrder1 = inputEdge(1)->input()->desc().dimsOrder();
 
-        auto inputDimsOrder0 = _inputEdges[0]->input()->desc().dimsOrder();
-        auto inputDimsOrder1 = _inputEdges[1]->input()->desc().dimsOrder();
-        auto inputDimsOrder2 = _inputEdges[2]->input()->desc().dimsOrder();
-        auto outputDimsOrder = _outputEdges[0]->output()->desc().dimsOrder();
+        auto outputDimsOrder = outputEdge(0)->output()->desc().dimsOrder();
 
         if (inputDimsOrder0.numDims() >= 3) {
             inputDimsOrder0.moveDim(Dim::C, 2);  // ->...CHW
@@ -48,61 +33,60 @@ private:
         if (inputDimsOrder1.numDims() >= 3) {
             inputDimsOrder1.moveDim(Dim::C, 2);  // ->...CHW
         }
-        if (inputDimsOrder2.numDims() >= 3) {
-            inputDimsOrder2.moveDim(Dim::C, 2);  // ->...CHW
-        }
         if (outputDimsOrder.numDims() >= 3) {
             outputDimsOrder.moveDim(Dim::C, 2);  // ->...CHW
         }
 
-        _orderInfo.setInput(_inputEdges[0], inputDimsOrder0);
-        _orderInfo.setInput(_inputEdges[1], inputDimsOrder1);
-        _orderInfo.setInput(_inputEdges[2], inputDimsOrder2);
-        _orderInfo.setOutput(_outputEdges[0], outputDimsOrder);
+        orderInfo.setInput(inputEdge(0), inputDimsOrder0);
+        orderInfo.setInput(inputEdge(1), inputDimsOrder1);
+        orderInfo.setOutput(outputEdge(0), outputDimsOrder);
+
+        if (numInputs() == 3) {
+            auto inputDimsOrder2 = inputEdge(2)->input()->desc().dimsOrder();
+            if (inputDimsOrder2.numDims() >= 3) {
+                inputDimsOrder2.moveDim(Dim::C, 2);  // ->...CHW
+            }
+            orderInfo.setInput(inputEdge(2), inputDimsOrder2);
+        }
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        IE_ASSERT(numInputs() == 2 || numInputs() == 3);
+        IE_ASSERT(numOutputs() == 1);
+        assertAllInputsOutputsTypes(this, DataType::FP16, DataType::FP16);
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
         auto alpha = attrs().get<float>("alpha");
         auto beta = attrs().get<float>("beta");
         auto transposeA = attrs().get<bool>("transposeA");
         auto transposeB = attrs().get<bool>("transposeB");
+        auto hasThreeInputs = numInputs() == 3;
 
         serializer.append(static_cast<float>(alpha));
         serializer.append(static_cast<float>(beta));
+        serializer.append(static_cast<int>(hasThreeInputs));
         serializer.append(static_cast<int>(transposeA));
         serializer.append(static_cast<int>(transposeB));
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input1 = _inputEdges[0]->input();
-        auto input2 = _inputEdges[1]->input();
-        auto input3 = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
-
-        input1->serializeNewBuffer(serializer);
-        input2->serializeNewBuffer(serializer);
-        input3->serializeNewBuffer(serializer);
-        output->serializeNewBuffer(serializer);
+        inputEdge(0)->input()->serializeNewBuffer(serializer);
+        inputEdge(1)->input()->serializeNewBuffer(serializer);
+        if (numInputs() == 3) {
+            inputEdge(2)->input()->serializeNewBuffer(serializer);
+        }
+        outputEdge(0)->output()->serializeNewBuffer(serializer);
     }
 };
 
@@ -113,7 +97,7 @@ void FrontEnd::parseGEMM(
         const ie::CNNLayerPtr& _layer,
         const DataVector& inputs,
         const DataVector& outputs) {
-    IE_ASSERT(inputs.size() == 3);
+    IE_ASSERT(inputs.size() == 2 || inputs.size() == 3);
     IE_ASSERT(outputs.size() == 1);
 
     auto layer = std::dynamic_pointer_cast<ie::GemmLayer>(_layer);
@@ -127,9 +111,7 @@ void FrontEnd::parseGEMM(
         layer->beta,
         layer->transpose_a,
         layer->transpose_b,
-        inputs[0],
-        inputs[1],
-        inputs[2],
+        inputs,
         outputs[0]);
 }
 
@@ -141,15 +123,13 @@ Stage StageBuilder::addGemmStage(
         const float beta,
         const bool transposeA,
         const bool transposeB,
-        const Data& inputA,
-        const Data& inputB,
-        const Data& inputC,
+        const DataVector& inputs,
         const Data& output) {
     auto stage = model->addNewStage<GEMMStage>(
         name,
         StageType::GEMM,
         layer,
-        {inputA, inputB, inputC},
+        inputs,
         {output});
 
     stage->attrs().set<float>("alpha", alpha);
index 633d457..e608895 100644 (file)
@@ -19,30 +19,25 @@ private:
         return std::make_shared<GRNStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -52,12 +47,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
index 40e6e95..8a56b4b 100644 (file)
@@ -19,30 +19,25 @@ private:
         return std::make_shared<InterpStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -52,12 +47,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
index a3f886d..7363df5 100644 (file)
@@ -1,17 +1,5 @@
-//
-// Copyright 2019 Intel Corporation.
-//
-// This software and the related documents are Intel copyrighted materials,
-// and your use of them is governed by the express license under which they
-// were provided to you (End User License Agreement for the Intel(R) Software
-// Development Products (Version May 2017)). Unless the License provides
-// otherwise, you may not use, modify, copy, publish, distribute, disclose or
-// transmit this software or the related documents without Intel's prior
-// written permission.
-//
-// This software and the related documents are provided as is, with no
-// express or implied warranties, other than those that are expressly
-// stated in the License.
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
 
 #include <vpu/frontend/frontend.hpp>
index e202b58..d9d69b8 100644 (file)
@@ -32,37 +32,31 @@ private:
         return std::make_shared<MTCNNStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
-
-        _orderInfo.setInput(_inputEdges[0], input->desc().dimsOrder().createMovedDim(Dim::C, 2));
-        _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, 0));
+        orderInfo.setInput(inputEdge(0), input->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setOutput(outputEdge(0), output->desc().dimsOrder().createMovedDim(Dim::C, 0));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+            {{DataType::U8, DataType::FP16}, {DataType::U8, DataType::FP16}},
+            {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -85,13 +79,9 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         input0->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
index 37736e2..63e3e4c 100644 (file)
@@ -9,6 +9,7 @@
 #include <unordered_set>
 #include <memory>
 #include <set>
+#include <precision_utils.h>
 
 namespace vpu {
 
@@ -20,47 +21,40 @@ private:
         return std::make_shared<MVNStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
         auto normalize = attrs().get<int>("normalize");
         auto across_channels = attrs().get<int>("across_channels");
+        auto eps = attrs().get<float>("eps");
 
         serializer.append(static_cast<int32_t>(normalize));
         serializer.append(static_cast<int32_t>(across_channels));
+        serializer.append(static_cast<float>(eps));
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
@@ -80,15 +74,6 @@ void FrontEnd::parseMVN(
     auto layer = std::dynamic_pointer_cast<ie::MVNLayer>(_layer);
     IE_ASSERT(layer != nullptr);
 
-    float def_eps = 1e-9f;
-    float eps = layer->GetParamAsFloat("eps", def_eps);
-
-    if (eps > 1e-7f) {
-        VPU_THROW_EXCEPTION
-            << "Layer " << layer->name << " [" << layer->type
-            <<  "] in our kernel we use const value 1e-9f. Actual = " << eps;
-    }
-
     auto stage = model->addNewStage<MVNStage>(
         layer->name,
         StageType::MVN,
@@ -98,6 +83,7 @@ void FrontEnd::parseMVN(
 
     stage->attrs().set<int>("normalize", layer->normalize);
     stage->attrs().set<int>("across_channels", layer->across_channels);
+    stage->attrs().set<float>("eps", layer->GetParamAsFloat("eps", 0.0f));
 }
 
 }  // namespace vpu
index 7489d98..819fd14 100644 (file)
@@ -21,31 +21,29 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
-        for (const auto& outEdge : _outputEdges) {
-            _scaleInfo.setOutput(outEdge, 1.0f);
+            ScalePropagationStep,
+            StageDataInfo<float>& scaleInfo) override {
+        for (const auto& outEdge : outputEdges()) {
+            scaleInfo.setOutput(outEdge, 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::NotNeeded;
     }
 
-    void finalCheckImpl() const override {
-    }
-
     void serializeParamsImpl(BlobSerializer&) const override {
     }
 
index 9ba01e3..465d9b0 100644 (file)
@@ -21,20 +21,14 @@ private:
         return std::make_shared<LRNStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
 
         // LRN supports both HWC and CHW orders, but requires that input and output have the same stride
 
@@ -44,22 +38,20 @@ private:
             reqs.add(1, DimStride::Aligned);
         }
 
-        _stridesInfo.setInput(_inputEdges[0], reqs);
-        _stridesInfo.setOutput(_outputEdges[0], reqs);
+        stridesInfo.setInput(inputEdge(0), reqs);
+        stridesInfo.setOutput(outputEdge(0), reqs);
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -76,12 +68,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
index 68cb5b1..b9309bd 100644 (file)
@@ -20,37 +20,29 @@ private:
         return std::make_shared<NormalizeStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        if (_inputEdges[0]->input()->desc().dimsOrder().dimInd(Dim::C) == 0) {
-            _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-            _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        if (input(0)->desc().dimsOrder().dimInd(Dim::C) == 0) {
+            stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+            stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -64,16 +56,11 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto scales = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto scales = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         auto inputDesc  = input->desc();
-        auto outputDesc = input->desc();
         auto iDimsOrder = inputDesc.dimsOrder();
 
         if (iDimsOrder == DimsOrder::NC || iDimsOrder == DimsOrder::C) {
index 88d8f1e..3c5f646 100644 (file)
@@ -23,54 +23,45 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
-            _scaleInfo.setOutput(_outputEdges[0], inputScales[0]);
+            scaleInfo.setOutput(outputEdge(0), inputScales[0]);
         } else {
             // Copy can only propagate scaling.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
         // TODO: try merge with last dimension
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::CanBeLimited;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+        auto input = inputEdge(0)->input();
 
         auto perm = input->desc().dimsOrder().toPermutation();
         IE_ASSERT(perm.size() <= 4);
@@ -95,12 +86,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
index 58a79ca..e33d15a 100644 (file)
@@ -15,22 +15,6 @@ namespace vpu {
 
 namespace {
 
-template <class Cont1, class Cont2>
-SmallVector<typename Cont1::value_type, MAX_DIMS_64> permuteArray(const Cont1& src, const Cont2& permutation) {
-    SmallVector<typename Cont1::value_type, MAX_DIMS_64> out(permutation.size());
-
-    for (int i = 0; i < out.size(); i++) {
-        auto newInd = static_cast<int>(permutation[i]);
-
-        IE_ASSERT(newInd >= 0);
-        IE_ASSERT(newInd < src.size());
-
-        out[i] = src[newInd];
-    }
-
-    return out;
-}
-
 class PermuteStage final : public StageNode {
 private:
     StagePtr cloneImpl() const override {
@@ -39,73 +23,55 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
-            _scaleInfo.setOutput(_outputEdges[0], inputScales[0]);
+            scaleInfo.setOutput(outputEdge(0), inputScales[0]);
         } else {
             // Copy can only propagate scaling.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        orderInfo.setOutput(outputEdge(0), input(0)->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>&) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>&) override {
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::CanBeLimited;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-
-        const auto& order = attrs().get<SmallVector<int, MAX_DIMS_64>>("order");
+        const auto& permutation = attrs().get<DimValues_<Dim>>("permutation");
 
-        auto perm = input->desc().dimsOrder().toPermutation();
-        auto ind = input->desc().dimsOrder().toIndices();
-
-        auto dimPerm = permuteArray(order, perm);
-        auto memoryOrderPerm = permuteArray(ind.toVector(-1), dimPerm);
-
-        int i = 0;
-        for (i = 0; i < memoryOrderPerm.size(); i++) {
-            serializer.append(static_cast<uint32_t>(memoryOrderPerm[i]));
+        for (auto dstDim : output(0)->desc().dimsOrder().toPermutation()) {
+            const auto srcDim = permutation[dstDim];
+            const auto srcDimInd = input(0)->desc().dimsOrder().dimInd(srcDim);
+            serializer.append(static_cast<uint32_t>(srcDimInd));
         }
-        for (; i < MAX_DIMS_32; i++) {
+
+        for (int i = output(0)->desc().numDims(); i < MAX_DIMS_32; i++) {
             serializer.append(static_cast<uint32_t>(-1));
         }
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
@@ -122,55 +88,33 @@ void FrontEnd::parsePermute(
     IE_ASSERT(inputs.size() == 1);
     IE_ASSERT(outputs.size() == 1);
 
-    auto ieOrder = layer->GetParamAsInts("order");
+    const auto ieOrder = layer->GetParamAsUInts("order");
+    const auto perm = DimsOrder::fromNumDims(checked_cast<int>(ieOrder.size())).toPermutation();
 
-    auto maxIeOrder = *std::max_element(ieOrder.begin(), ieOrder.end());
-
-    SmallVector<int, MAX_DIMS_64> vpuOrder(MAX_DIMS_64, -1);
+    DimValues_<Dim> permutation;
     for (size_t i = 0; i < ieOrder.size(); i++) {
-        vpuOrder[i] = maxIeOrder - ieOrder[ieOrder.size() - 1 - i];
+        const auto srcDim = perm[ieOrder.size() - ieOrder[i] - 1];
+        const auto dstDim = perm[ieOrder.size() - i - 1];
+        permutation.set(dstDim, srcDim);
     }
 
-    auto input = inputs[0];
-    auto output = outputs[0];
-
-    auto stage = model->addNewStage<PermuteStage>(
-        layer->name,
-        StageType::Permute,
-        layer,
-        inputs,
-        outputs);
-
-    stage->attrs().set<SmallVector<int, MAX_DIMS_64>>("order", vpuOrder);
+    _stageBuilder->addPermuteStage(model, layer->name, layer, inputs[0], outputs[0], permutation);
 }
 
 Stage StageBuilder::addPermuteStage(
         const Model::Ptr& model,
         const std::string& name,
         const ie::CNNLayerPtr& layer,
-        const DataVector& inputs,
-        const DataVector& outputs,
-        const SmallVector<int, MAX_DIMS_64>& ieOrder) {
-    IE_ASSERT(inputs.size() == 1);
-    IE_ASSERT(outputs.size() == 1);
-
-    auto maxIeOrder = *std::max_element(ieOrder.begin(), ieOrder.end());
-
-    SmallVector<int, MAX_DIMS_64> vpuOrder(MAX_DIMS_64, -1);
-    for (size_t i = 0; i < ieOrder.size(); i++) {
-        vpuOrder[i] = maxIeOrder - ieOrder[ieOrder.size() - 1 - i];
-    }
-
-    auto input = inputs[0];
-    auto output = outputs[0];
-
+        const Data& input,
+        const Data& output,
+        const DimValues_<Dim>& permutation) {
     auto stage = model->addNewStage<PermuteStage>(
-        layer->name,
+        name,
         StageType::Permute,
         layer,
-        inputs,
-        outputs);
-    stage->attrs().set<SmallVector<int, MAX_DIMS_64>>("order", vpuOrder);
+        {input},
+        {output});
+    stage->attrs().set("permutation", permutation);
 
     return stage;
 }
index 2f83e63..9504c73 100644 (file)
@@ -48,21 +48,19 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         auto power = attrs().get<float>("power");
         auto& scale = attrs().get<float>("scale");
         auto& bias = attrs().get<float>("bias");
 
         if (power != 1.0f) {
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         } else {
             auto inputScale = inputScales[0];
 
-            _scaleInfo.setOutput(_outputEdges[0], inputScale);
+            scaleInfo.setOutput(outputEdge(0), inputScale);
 
             if (step == ScalePropagationStep::ScaleInput) {
                 scale *= inputScale;
index 6f292b9..746ad53 100644 (file)
@@ -22,34 +22,29 @@ private:
         return std::make_shared<ProposalStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
 
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-
-        _orderInfo.setInput(_inputEdges[0], input0->desc().dimsOrder().createMovedDim(Dim::C, 2));
-        _orderInfo.setInput(_inputEdges[1], input1->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setInput(inputEdge(0), input0->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setInput(inputEdge(1), input1->desc().dimsOrder().createMovedDim(Dim::C, 2));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setInput(_inputEdges[1], StridesRequirement::compact());
-        _stridesInfo.setInput(_inputEdges[2], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setInput(inputEdge(1), StridesRequirement::compact());
+        stridesInfo.setInput(inputEdge(2), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -104,20 +99,16 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.size() == 1);
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto input2 = _inputEdges[2]->input();
-        auto output = _outputEdges[0]->output();
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
+        auto input2 = inputEdge(2)->input();
+        auto output = outputEdge(0)->output();
 
         input0->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
         input1->serializeNewBuffer(serializer);
         input2->serializeNewBuffer(serializer);
-        _tempBufferEdges[0]->tempBuffer()->serializeNewBuffer(serializer);
+        tempBuffer(0)->serializeNewBuffer(serializer);
     }
 };
 
index 67d45d7..a77d536 100644 (file)
@@ -19,33 +19,28 @@ private:
         return std::make_shared<PSROIPoolingStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input0 = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
-        auto input0 = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
-
-        _orderInfo.setInput(_inputEdges[0], input0->desc().dimsOrder().createMovedDim(Dim::C, 2));
-        _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setInput(inputEdge(0), input0->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setOutput(outputEdge(0), output->desc().dimsOrder().createMovedDim(Dim::C, 2));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setInput(_inputEdges[1], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setInput(inputEdge(1), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -59,13 +54,9 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         input0->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
index d889fd9..a00fdfe 100644 (file)
@@ -1,21 +1,10 @@
-//
-// Copyright 2019 Intel Corporation.
-//
-// This software and the related documents are Intel copyrighted materials,
-// and your use of them is governed by the express license under which they
-// were provided to you (End User License Agreement for the Intel(R) Software
-// Development Products (Version May 2017)). Unless the License provides
-// otherwise, you may not use, modify, copy, publish, distribute, disclose or
-// transmit this software or the related documents without Intel's prior
-// written permission.
-//
-// This software and the related documents are provided as is, with no
-// express or implied warranties, other than those that are expressly
-// stated in the License.
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
 
 #include <vpu/frontend/frontend.hpp>
 
+#include <algorithm>
 #include <memory>
 #include <set>
 
@@ -29,36 +18,30 @@ private:
         return std::make_shared<ReduceStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+         auto input0 = inputEdge(0)->input();
+         auto input1 = inputEdge(1)->input();
+         auto output = outputEdge(0)->output();
 
-        auto in0Desc = input0->desc();
-        auto in1Desc = input1->desc();
-        auto outDesc = output->desc();
+         auto in0Desc = input0->desc();
+         auto in1Desc = input1->desc();
+         auto outDesc = output->desc();
 
-        auto in0Order = DimsOrder::fromNumDims(in0Desc.numDims());
-        auto in1Order = DimsOrder::fromNumDims(in1Desc.numDims());
-        auto outOrder = DimsOrder::fromNumDims(outDesc.numDims());
+         auto in0Order = DimsOrder::fromNumDims(in0Desc.numDims());
+         auto in1Order = DimsOrder::fromNumDims(in1Desc.numDims());
+         auto outOrder = DimsOrder::fromNumDims(outDesc.numDims());
 
-        _orderInfo.setInput(_inputEdges[0], in0Order);
-        _orderInfo.setInput(_inputEdges[1], in1Order);
-        _orderInfo.setOutput(_outputEdges[0], outOrder);
+         orderInfo.setInput(inputEdge(0), in0Order);
+         orderInfo.setInput(inputEdge(1), in1Order);
+         orderInfo.setOutput(outputEdge(0), outOrder);
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
 
         auto in0Desc = input0->desc();
         auto in1Desc = input1->desc();
@@ -68,7 +51,7 @@ private:
         size_t ndims = in0Desc.numDims();
         IE_ASSERT(in1Desc.numDims() == 1);
         size_t indicesSize = in1Desc.totalDimSize();
-        IE_ASSERT(indicesSize < ndims);
+        IE_ASSERT(indicesSize <= ndims);
 
         const auto oldIndices = input1->content()->get<int32_t>();
 
@@ -89,6 +72,7 @@ private:
             index = static_cast<int32_t>(perm[ndims - 1 - index]);
             newIndices[i] = index;
         }
+        std::sort(newIndices, newIndices + indicesSize);
 
         auto newList = _model->duplicateData(
             input1,
@@ -96,17 +80,18 @@ private:
             DataDesc(),
             ieBlobContent(newIndicesBlob));
 
-        _model->replaceStageInput(_inputEdges[1], newList);
+        _model->replaceStageInput(inputEdge(1), newList);
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
         return StageSHAVEsRequirements::CanBeLimited;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::S32}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -116,16 +101,13 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+         auto input0 = inputEdge(0)->input();
+         auto input1 = inputEdge(1)->input();
+         auto output = outputEdge(0)->output();
 
-        input0->serializeNewBuffer(serializer);
-        output->serializeNewBuffer(serializer);
-        input1->serializeNewBuffer(serializer);
+         input0->serializeNewBuffer(serializer);
+         output->serializeNewBuffer(serializer);
+         input1->serializeNewBuffer(serializer);
     }
 };
 
@@ -145,6 +127,8 @@ void FrontEnd::parseReduce(
     auto stageType = StageType::None;
     if (layer->type == "ReduceAnd") {
         stageType = StageType::ReduceAnd;
+    } else if (layer->type == "ReduceMin") {
+        stageType = StageType::ReduceMin;
     } else {
         VPU_THROW_EXCEPTION << "Reduce operation: " << layer->type << " is not supported";
     }
index cda718c..9563497 100644 (file)
@@ -19,39 +19,26 @@ private:
         return std::make_shared<RegionYoloStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto output = _outputEdges[0]->output();
-
-        if (!attrs().get<bool>("doSoftMax")) {
-            _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, 2));  // CHW
-        }
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
         if (attrs().get<bool>("doSoftMax")) {
             // Major dimension must be compact.
-            _stridesInfo.setInput(_inputEdges[0], StridesRequirement().add(2, DimStride::Compact));
+            stridesInfo.setInput(inputEdge(0), StridesRequirement().add(2, DimStride::Compact));
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -69,12 +56,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
index ff4cd9a..d99c5c5 100644 (file)
@@ -26,24 +26,22 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
             auto inputScale = inputScales[0];
-            _scaleInfo.setOutput(_outputEdges[0], inputScale);
+            scaleInfo.setOutput(outputEdge(0), inputScale);
         } else {
             // ReLU can only propagate scaling, not generate.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
         auto negativeSlope = attrs().get<float>("negativeSlope");
 
-        serializer.append(static_cast<uint32_t>(_inputEdges.size() == 2));
+        serializer.append(static_cast<uint32_t>(numInputs() == 2));
         serializer.append(negativeSlope);
     }
 };
index b6176e5..03b6763 100644 (file)
@@ -21,54 +21,44 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
-            _scaleInfo.setOutput(_outputEdges[0], inputScales[0]);
+            scaleInfo.setOutput(outputEdge(0), inputScales[0]);
         } else {
             // ReorgYolo can only propagate scaling.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
 
         auto inOrder = input->desc().dimsOrder();
 
         if (inOrder.dimInd(Dim::C) == 0) {
-            _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-            _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+            stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+            stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -78,12 +68,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
index 2f6ab2d..d3e6bd4 100644 (file)
@@ -25,30 +25,25 @@ private:
         return std::make_shared<ResampleStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -62,12 +57,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeOldBuffer(handle_from_this(), serializer);
         output->serializeOldBuffer(handle_from_this(), serializer);
index a9a3c45..7e35592 100644 (file)
@@ -22,53 +22,41 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
-            _scaleInfo.setOutput(_outputEdges[0], inputScales[0]);
+            scaleInfo.setOutput(outputEdge(0), inputScales[0]);
         } else {
             // Reshape can only propagate scaling.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         // Only default order is supported
-        _orderInfo.setInput(_inputEdges[0], DimsOrder::fromNumDims(input->desc().numDims()));
-        _orderInfo.setOutput(_outputEdges[0], DimsOrder::fromNumDims(output->desc().numDims()));
+        orderInfo.setInput(inputEdge(0), DimsOrder::fromNumDims(input->desc().numDims()));
+        orderInfo.setOutput(outputEdge(0), DimsOrder::fromNumDims(output->desc().numDims()));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
-
-        IE_ASSERT(input->desc().totalDimSize() == output->desc().totalDimSize());
+    void initialCheckImpl() const override {
+        const auto& firstInputPrecision = input(0)->desc().type();
+        assertInputsOutputsTypes(this, {{firstInputPrecision}}, {{firstInputPrecision}});
+        IE_ASSERT(input(0)->desc().totalDimSize() == output(0)->desc().totalDimSize());
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
index 66b19a4..886b700 100644 (file)
@@ -1,17 +1,5 @@
-//
-// Copyright 2019 Intel Corporation.
-//
-// This software and the related documents are Intel copyrighted materials,
-// and your use of them is governed by the express license under which they
-// were provided to you (End User License Agreement for the Intel(R) Software
-// Development Products (Version May 2017)). Unless the License provides
-// otherwise, you may not use, modify, copy, publish, distribute, disclose or
-// transmit this software or the related documents without Intel's prior
-// written permission.
-//
-// This software and the related documents are provided as is, with no
-// express or implied warranties, other than those that are expressly
-// stated in the License.
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
 
 #include <vpu/frontend/frontend.hpp>
@@ -30,52 +18,44 @@ private:
         return std::make_shared<ReverseSequenceStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+         auto input = inputEdge(0)->input();
+         orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto seq_lengths = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+         auto input = inputEdge(0)->input();
+         auto seq_lengths = inputEdge(1)->input();
+         auto output = outputEdge(0)->output();
 
-        auto seq_axis = input->desc().dimsOrder().dimInd(attrs().get<Dim>("seq_axis"));
-        auto batch_axis = input->desc().dimsOrder().dimInd(attrs().get<Dim>("batch_axis"));
+         auto seq_axis = input->desc().dimsOrder().dimInd(attrs().get<Dim>("seq_axis"));
+         auto batch_axis = input->desc().dimsOrder().dimInd(attrs().get<Dim>("batch_axis"));
 
-        serializer.append(static_cast<int32_t>(seq_axis));
-        serializer.append(static_cast<int32_t>(batch_axis));
+         serializer.append(static_cast<int32_t>(seq_axis));
+         serializer.append(static_cast<int32_t>(batch_axis));
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto seq_lengths = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+         auto input = inputEdge(0)->input();
+         auto seq_lengths = inputEdge(1)->input();
+         auto output = outputEdge(0)->output();
 
-        input->serializeNewBuffer(serializer);
-        seq_lengths->serializeNewBuffer(serializer);
-        output->serializeNewBuffer(serializer);
+         input->serializeNewBuffer(serializer);
+         seq_lengths->serializeNewBuffer(serializer);
+         output->serializeNewBuffer(serializer);
     }
 };
 
index 0a9e10f..78779a8 100644 (file)
@@ -21,12 +21,9 @@ private:
         return std::make_shared<LSTMCellStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 5);
-        IE_ASSERT(_outputEdges.size() == 2);
-
-        auto output = _outputEdges[0]->output();
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto output = outputEdge(0)->output();
+        auto input = inputEdge(0)->input();
 
         auto inputDimsOrder = input->desc().dimsOrder();
         auto outputDimsOrder = output->desc().dimsOrder();
@@ -38,29 +35,29 @@ private:
             outputDimsOrder.moveDim(Dim::C, 2);  // ->...CHW
         }
 
-        _orderInfo.setInput(_inputEdges[0], inputDimsOrder);
-        _orderInfo.setOutput(_outputEdges[0], outputDimsOrder);
+        orderInfo.setInput(inputEdge(0), inputDimsOrder);
+        orderInfo.setOutput(outputEdge(0), outputDimsOrder);
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 5);
-        IE_ASSERT(_outputEdges.size() == 2);
-
-        for (const auto& inEdge : _inputEdges) {
-            _stridesInfo.setInput(inEdge, StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        for (const auto& inEdge : inputEdges()) {
+            stridesInfo.setInput(inEdge, StridesRequirement::compact());
         }
-        for (const auto& outEdge : _outputEdges) {
-            _stridesInfo.setOutput(outEdge, StridesRequirement::compact());
+        for (const auto& outEdge : outputEdges()) {
+            stridesInfo.setOutput(outEdge, StridesRequirement::compact());
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        IE_ASSERT(numInputs() == 5);
+        IE_ASSERT(numOutputs() > 0);
+        assertAllInputsOutputsTypes(this, DataType::FP16, DataType::FP16);
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -73,23 +70,21 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 5);
-        IE_ASSERT(_outputEdges.size() == 2);
-
         int nCells = attrs().get<int>("nCells");
 
         bool useTempBuffer = (nCells > 1);
-        IE_ASSERT((_tempBufferEdges.size() == 1 && useTempBuffer) || !useTempBuffer);
+        IE_ASSERT((numTempBuffers() == 1 && useTempBuffer) || !useTempBuffer);
 
-        for (const auto& inEdge : _inputEdges) {
+        for (const auto& inEdge : inputEdges()) {
             inEdge->input()->serializeNewBuffer(serializer);
         }
-        for (const auto& outEdge : _outputEdges) {
+        for (const auto& outEdge : outputEdges()) {
             outEdge->output()->serializeNewBuffer(serializer);
         }
 
-        if (useTempBuffer)
-            _tempBufferEdges[0]->tempBuffer()->serializeNewBuffer(serializer);
+        if (useTempBuffer) {
+            tempBuffer(0)->serializeNewBuffer(serializer);
+        }
     }
 };
 
@@ -139,7 +134,7 @@ void FrontEnd::parseRNN(
     size_t input_size = inputs[0]->desc().dim(Dim::W);
     IE_ASSERT(input_size == inputs[0]->desc().totalDimSize() / nCells / nBatches);
 
-    size_t state_size = inputs[1]->desc().totalDimSize() / nBatches;
+    size_t state_size = outputs[0]->desc().totalDimSize() / nCells / nBatches;
     size_t cell_state_size = inputs[2]->desc().totalDimSize() / nBatches;
     IE_ASSERT(state_size == cell_state_size);
 
@@ -201,14 +196,46 @@ void FrontEnd::parseLSTMCell(
     auto layer = std::dynamic_pointer_cast<ie::LSTMCell>(_layer);
     IE_ASSERT(layer != nullptr);
 
-    Data weights, biases;
-    std::tie(weights, biases) = getWeightsAndBiases(model, layer);
+    DataVector stageInputs = inputs;
+    auto origWeights = layer->_weights;
+
+    IE_ASSERT(origWeights != nullptr) << "weights are empty for layer: " << layer->name;
+
+    if (lstmWeights.count(origWeights) != 0) {
+        stageInputs.emplace_back(lstmWeights[origWeights]);
+    } else {
+        auto weights = model->addConstData(
+                layer->name + "@weights",
+                DataDesc({origWeights->size()}),
+                ieBlobContent(origWeights));
+        lstmWeights[origWeights] = weights;
+        stageInputs.emplace_back(weights);
+    }
+
+    auto origBiases = layer->_biases;
+
+    Data biases;
+    if (origBiases == nullptr) {
+        biases = model->addFakeData();
+    } else {
+        if (lstmBiases.count(origBiases) != 0) {
+            biases = lstmBiases[origBiases];
+        } else {
+            biases = model->addConstData(
+                    layer->name + "@biases",
+                    DataDesc({origBiases->size()}),
+                    ieBlobContent(origBiases));
+            lstmBiases[origBiases] = biases;
+        }
+    }
+
+    stageInputs.emplace_back(biases);
 
     auto stage = model->addNewStage<LSTMCellStage>(
             layer->name,
             StageType::LSTMCell,
             layer,
-            {inputs[0], inputs[1], inputs[2], weights, biases},
+            stageInputs,
             outputs);
     stage->attrs().set<bool>("RNNForward", true);
     stage->attrs().set<int>("nCells", 1);
index 96cdc1f..0b40214 100644 (file)
@@ -27,33 +27,28 @@ private:
         return std::make_shared<ROIPoolingStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input0 = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
-        auto input0 = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
-
-        _orderInfo.setInput(_inputEdges[0], input0->desc().dimsOrder().createMovedDim(Dim::C, 2));
-        _orderInfo.setOutput(_outputEdges[0], output->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setInput(inputEdge(0), input0->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setOutput(outputEdge(0), output->desc().dimsOrder().createMovedDim(Dim::C, 2));
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
-        _stridesInfo.setInput(_inputEdges[1], StridesRequirement::compact());
-        _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());
+        stridesInfo.setInput(inputEdge(1), StridesRequirement::compact());
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
@@ -69,13 +64,9 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 2);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input0 = _inputEdges[0]->input();
-        auto input1 = _inputEdges[1]->input();
-        auto output = _outputEdges[0]->output();
+        auto input0 = inputEdge(0)->input();
+        auto input1 = inputEdge(1)->input();
+        auto output = outputEdge(0)->output();
 
         input0->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
index eec2694..af07f88 100644 (file)
@@ -23,17 +23,15 @@ private:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 2 || _inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         auto inputScale = inputScales[0];
 
-        _scaleInfo.setInput(_inputEdges[1], step == ScalePropagationStep::Propagate ? 1.0f : inputScale);
-        if (_inputEdges.size() == 3) {
-            _scaleInfo.setInput(_inputEdges[2], inputScale);
+        scaleInfo.setInput(inputEdge(1), step == ScalePropagationStep::Propagate ? 1.0f : inputScale);
+        if (numInputs() == 3) {
+            scaleInfo.setInput(inputEdge(2), inputScale);
         }
-        _scaleInfo.setOutput(_outputEdges[0], inputScale);
+        scaleInfo.setOutput(outputEdge(0), inputScale);
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
index 450842e..ebb33e4 100644 (file)
@@ -25,25 +25,20 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>&,
-            ScalePropagationStep) override {
+            ScalePropagationStep,
+            StageDataInfo<float>&) override {
         VPU_THROW_EXCEPTION << "Must never be called";
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto dimsOrder = input->desc().dimsOrder();
 
@@ -100,17 +95,19 @@ protected:
         // Return merged StridesRequirements.
         //
 
-        _stridesInfo.setInput(_inputEdges[0], inputReqs);
-        _stridesInfo.setOutput(_outputEdges[0], outputReqs);
+        stridesInfo.setInput(inputEdge(0), inputReqs);
+        stridesInfo.setOutput(outputEdge(0), outputReqs);
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        const auto& firstInputPrecision = input(0)->desc().type();
+        assertInputsOutputsTypes(this, {{firstInputPrecision}}, {{firstInputPrecision}});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
index 5847be1..01ff1c3 100644 (file)
@@ -20,31 +20,27 @@ private:
         return std::make_shared<SoftMaxStage>(*this);
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        auto input = _inputEdges[0]->input();
-
-        _orderInfo.setOutput(_outputEdges[0], input->desc().dimsOrder());
+        orderInfo.setOutput(outputEdge(0), input->desc().dimsOrder());
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+        auto input = inputEdge(0)->input();
 
         auto axis = attrs().get<Dim>("axis");
         auto axisInd = input->desc().dimsOrder().dimInd(axis);
@@ -53,12 +49,8 @@ private:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
index fdd9706..aff1f7f 100644 (file)
@@ -23,42 +23,34 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(!_outputEdges.empty());
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
             auto inputScale = inputScales[0];
 
-            for (const auto& outEdge : _outputEdges) {
-                _scaleInfo.setOutput(outEdge, inputScale);
+            for (const auto& outEdge : outputEdges()) {
+                scaleInfo.setOutput(outEdge, inputScale);
             }
         } else {
             // Split can only propagate scaling.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
 
-            for (const auto& outEdge : _outputEdges) {
-                _scaleInfo.setOutput(outEdge, 1.0f);
+            for (const auto& outEdge : outputEdges()) {
+                scaleInfo.setOutput(outEdge, 1.0f);
             }
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(!_outputEdges.empty());
-
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
-        for (const auto& outEdge : _outputEdges) {
-            _orderInfo.setOutput(outEdge, input->desc().dimsOrder());
+        for (const auto& outEdge : outputEdges()) {
+            orderInfo.setOutput(outEdge, input->desc().dimsOrder());
         }
     }
 
-    void getDataStridesRequirementsImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(!_outputEdges.empty());
-
-        auto input = _inputEdges[0]->input();
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto input = inputEdge(0)->input();
 
         auto dimsOrder = input->desc().dimsOrder();
 
@@ -68,7 +60,7 @@ protected:
 
         auto minSplitDimInd = dimsOrder.numDims();
 
-        for (const auto& outEdge : _outputEdges) {
+        for (const auto& outEdge : outputEdges()) {
             auto output = outEdge->output();
 
             for (const auto& p : input->desc().dims()) {
@@ -90,7 +82,7 @@ protected:
         // Merge output consumers StridesRequirement.
         //
 
-        for (const auto& outEdge : _outputEdges) {
+        for (const auto& outEdge : outputEdges()) {
             auto curOutput = outEdge->output();
 
             for (const auto& consumerEdge : curOutput->consumerEdges()) {
@@ -123,19 +115,23 @@ protected:
         // Return merged StridesRequirements.
         //
 
-        _stridesInfo.setInput(_inputEdges[0], inputReqs);
-        for (const auto& outEdge : _outputEdges) {
-            _stridesInfo.setOutput(outEdge, outputReqs);
+        stridesInfo.setInput(inputEdge(0), inputReqs);
+        for (const auto& outEdge : outputEdges()) {
+            stridesInfo.setOutput(outEdge, outputReqs);
         }
     }
 
     void finalizeDataLayoutImpl() override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& /*batchInfo*/) override {
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        IE_ASSERT(numInputs() == 1);
+        IE_ASSERT(numOutputs() > 0);
+        const auto& firstInputPrecision = input(0)->desc().type();
+        assertAllInputsOutputsTypes(this, {firstInputPrecision}, {firstInputPrecision});
     }
 
     void serializeParamsImpl(BlobSerializer&) const override {
@@ -165,6 +161,15 @@ void FrontEnd::parseSplit(
     auto inDesc = input->desc();
     auto perm = inDesc.dimsOrder().toPermutation();
 
+    // Detect unused data
+    DataVector onlyUsedOutputs;
+    for (const auto& output : outputs) {
+        if (!output->origData()->getInputTo().empty()) {
+            onlyUsedOutputs.push_back(output);
+        }
+    }
+    IE_ASSERT(!onlyUsedOutputs.empty());
+
     // Check whether it is Split(copy) or Slice Caffe layer
     // and we do not trust to IE layer type value.
     bool isSplit = true;
@@ -235,7 +240,7 @@ void FrontEnd::parseSplit(
             }
         }
 
-        _stageBuilder->addSplitStage(model, layer->name, layer, axis, input, outputs);
+        _stageBuilder->addSplitStage(model, layer->name, layer, axis, input, onlyUsedOutputs);
     }
 }
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/strided_slice.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/strided_slice.cpp
new file mode 100644 (file)
index 0000000..dcb5d9c
--- /dev/null
@@ -0,0 +1,77 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vector>
+#include <memory>
+#include <set>
+
+#include <vpu/sw/post_op_stage.hpp>
+
+namespace vpu {
+
+namespace {
+
+class StridedSliceStage final : public StageNode {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<StridedSliceStage>(*this);
+    }
+
+    void propagateScaleFactorsImpl(
+        const SmallVector<float>& inputScales,
+        ScalePropagationStep step,
+        StageDataInfo<float>& scaleInfo) override {
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& /*batchInfo*/) override {
+    }
+
+    void initialCheckImpl() const override {
+        IE_ASSERT(numInputs() == 4);
+        IE_ASSERT(numOutputs() == 1);
+        assertInputsOutputsTypes(
+            this,
+            {{DataType::FP16}, {DataType::S32}, {DataType::S32}, {DataType::S32}},
+            {{DataType::FP16}});
+    }
+
+    void serializeParamsImpl(BlobSerializer&) const override {
+        VPU_THROW_EXCEPTION << "Must never be called";
+    }
+
+    void serializeDataImpl(BlobSerializer&) const override {
+        VPU_THROW_EXCEPTION << "Must never be called";
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseStridedSlice(
+        const Model::Ptr& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) {
+    IE_ASSERT(inputs.size() == 4);
+    IE_ASSERT(outputs.size() == 1);
+
+    model->addNewStage<StridedSliceStage>(
+        layer->name,
+        StageType::StridedSlice,
+        layer,
+        inputs,
+        outputs);
+}
+
+}  // namespace vpu
index c026692..64c091b 100644 (file)
@@ -21,37 +21,32 @@ protected:
 
     void propagateScaleFactorsImpl(
             const SmallVector<float>& inputScales,
-            ScalePropagationStep step) override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
+            ScalePropagationStep step,
+            StageDataInfo<float>& scaleInfo) override {
         if (step == ScalePropagationStep::Propagate) {
             auto inputScale = inputScales[0];
-            _scaleInfo.setOutput(_outputEdges[0], inputScale);
+            scaleInfo.setOutput(outputEdge(0), inputScale);
         } else {
             // Tile can only propagate scaling, not generate.
-            _scaleInfo.setInput(_inputEdges[0], 1.0f);
-            _scaleInfo.setOutput(_outputEdges[0], 1.0f);
+            scaleInfo.setInput(inputEdge(0), 1.0f);
+            scaleInfo.setOutput(outputEdge(0), 1.0f);
         }
     }
 
-    void propagateDataOrderImpl() const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        auto input = inputEdge(0)->input();
 
         auto inOrder = input->desc().dimsOrder();
         auto finalOrder = inOrder;
 
-        _orderInfo.setInput(_inputEdges[0], finalOrder);
-        _orderInfo.setOutput(_outputEdges[0], finalOrder);
+        orderInfo.setInput(inputEdge(0), finalOrder);
+        orderInfo.setOutput(outputEdge(0), finalOrder);
     }
 
-    void getDataStridesRequirementsImpl() const override {
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
     }
 
-    void getBatchSupportInfoImpl() const override {
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
     }
 
     void finalizeDataLayoutImpl() override {
@@ -61,15 +56,13 @@ protected:
         return StageSHAVEsRequirements::OnlyOne;
     }
 
-    void finalCheckImpl() const override {
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
     }
 
     void serializeParamsImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         auto axis = attrs().get<Dim>("axis");
         auto tiles = attrs().get<int>("tiles");
@@ -82,12 +75,8 @@ protected:
     }
 
     void serializeDataImpl(BlobSerializer& serializer) const override {
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-        IE_ASSERT(_tempBufferEdges.empty());
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
         input->serializeNewBuffer(serializer);
         output->serializeNewBuffer(serializer);
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/topk.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/topk.cpp
new file mode 100644 (file)
index 0000000..463e630
--- /dev/null
@@ -0,0 +1,147 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <memory>
+#include <set>
+
+namespace vpu {
+
+static TopKMode getMode(const std::shared_ptr<ie::TopKLayer> layer) {
+    const auto& mode = layer->mode;
+    if (mode == "max")
+        return TopKMode::Max;
+    if (mode == "min")
+        return TopKMode::Min;
+    VPU_THROW_EXCEPTION << layer->name << " TopK can take only 'max' or 'min' for mode, but actually it has: " << mode;
+}
+
+static TopKSort getSort(const std::shared_ptr<ie::TopKLayer> layer) {
+    const auto& sort = layer->sort;
+    if (sort == "none")
+        return TopKSort::None;
+    if (sort == "value")
+        return TopKSort::Value;
+    if (sort == "index")
+        return TopKSort::Index;
+    VPU_THROW_EXCEPTION << layer->name << " TopK can take only 'value', 'index' or 'none' for sort, but actually it has: " << sort;
+}
+
+namespace {
+
+class TopKStage final : public StageNode {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<TopKStage>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        IE_ASSERT(_inputEdges.size() == 2);
+        IE_ASSERT(_outputEdges.size() == 2);
+
+        auto inputValues = _inputEdges[0]->input();
+
+        auto outputOrder = inputValues->desc().dimsOrder();
+
+        orderInfo.setOutput(_outputEdges[0], outputOrder);
+        orderInfo.setOutput(_outputEdges[1], outputOrder);
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& /*stridesInfo*/) override {
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& /*batchInfo*/) override {
+    }
+
+    StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
+        return StageSHAVEsRequirements::CanBeLimited;
+    }
+
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+            {{DataType::FP16}, {DataType::S32}},
+            {{DataType::FP16}, {DataType::S32}});
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+        IE_ASSERT(_inputEdges.size() == 2);
+
+        auto inputValues = _inputEdges[0]->input();
+
+        auto axis = attrs().get<Dim>("axis");
+        auto axisInd = inputValues->desc().dimsOrder().dimInd(axis);
+
+        auto mode = attrs().get<TopKMode>("mode");
+        auto sort = attrs().get<TopKSort>("sort");
+
+        serializer.append(static_cast<int32_t>(axisInd));
+        serializer.append(static_cast<int32_t>(mode));
+        serializer.append(static_cast<int32_t>(sort));
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        IE_ASSERT(_inputEdges.size() == 2);
+        IE_ASSERT(_outputEdges.size() == 2);
+
+        auto inputValues = _inputEdges[0]->input();
+        auto inputK = _inputEdges[1]->input();
+        auto outputValues = _outputEdges[0]->output();
+        auto outputIndices = _outputEdges[1]->output();
+
+        inputValues->serializeNewBuffer(serializer);
+        outputValues->serializeNewBuffer(serializer);
+        inputK->serializeNewBuffer(serializer);
+        outputIndices->serializeNewBuffer(serializer);
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseTopK(
+        const Model::Ptr& model,
+        const ie::CNNLayerPtr& _layer,
+        const DataVector& inputs,
+        const DataVector& outputs) {
+    auto layer = std::dynamic_pointer_cast<ie::TopKLayer>(_layer);
+    IE_ASSERT(layer != nullptr);
+
+    IE_ASSERT(inputs.size() == 2);
+    IE_ASSERT(outputs.size() == 2);
+
+    auto inputValues = inputs[0];
+    auto inputK = inputs[1];
+    auto outputValues = outputs[0];
+    auto outputIndices = outputs[1];
+
+    const auto numDims = inputValues->desc().numDims();
+
+    IE_ASSERT(inputK->desc().numDims() == 1);
+    IE_ASSERT(outputValues->desc().numDims() == numDims);
+    IE_ASSERT(outputIndices->desc().numDims() == numDims);
+
+    IE_ASSERT(layer->axis < numDims);
+
+    auto perm = DimsOrder::fromNumDims(numDims).toPermutation();
+    auto axis = perm[numDims - 1 - layer->axis];
+
+    TopKMode mode = getMode(layer);
+    TopKSort sort = getSort(layer);
+
+    auto stage = model->addNewStage<TopKStage>(
+        layer->name,
+        StageType::TopK,
+        layer,
+        inputs,
+        outputs);
+
+    stage->attrs().set<Dim>("axis", axis);
+    stage->attrs().set<TopKMode>("mode", mode);
+    stage->attrs().set<TopKSort>("sort", sort);
+}
+
+}  // namespace vpu
index 70dd080..ef3c8b4 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <vector>
+#include <utility>
 
 #include <vpu/model/edges.hpp>
 #include <vpu/model/data.hpp>
@@ -18,44 +19,39 @@ StagePtr StubStage::cloneImpl() const {
 
 void StubStage::propagateScaleFactorsImpl(
         const SmallVector<float>& inputScales,
-        ScalePropagationStep step) {
+        ScalePropagationStep step,
+        StageDataInfo<float>& scaleInfo) {
     if (_type == StageType::StubConv ||
         _type == StageType::StubFullyConnected ||
         _type == StageType::StubDeconv) {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto weights = _inputEdges[1]->input();
-        auto biases = _inputEdges[2]->input();
+        auto weights = inputEdge(1)->input();
+        auto biases = inputEdge(2)->input();
 
         IE_ASSERT(weights->usage() == DataUsage::Const);
         IE_ASSERT(biases->usage() == DataUsage::Const || biases->usage() == DataUsage::Fake);
 
         auto inputScale = inputScales[0];
 
-        _scaleInfo.setInput(_inputEdges[1], step == ScalePropagationStep::Propagate ? 1.0f : inputScale);
+        scaleInfo.setInput(inputEdge(1), step == ScalePropagationStep::Propagate ? 1.0f : inputScale);
         if (biases->usage() == DataUsage::Const) {
-            _scaleInfo.setInput(_inputEdges[2], inputScale);
+            scaleInfo.setInput(inputEdge(2), inputScale);
         }
-        _scaleInfo.setOutput(_outputEdges[0], inputScale);
+        scaleInfo.setOutput(outputEdge(0), inputScale);
     } else {
         IE_ASSERT(_type == StageType::StubMaxPool || _type == StageType::StubAvgPool);
 
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto input = _inputEdges[0]->input();
-        auto output = _outputEdges[0]->output();
+        auto input = inputEdge(0)->input();
+        auto output = outputEdge(0)->output();
 
-        _scaleInfo.setOutput(_outputEdges[0], inputScales[0]);
+        scaleInfo.setOutput(outputEdge(0), inputScales[0]);
     }
 }
 
-void StubStage::propagateDataOrderImpl() const {
+void StubStage::propagateDataOrderImpl(StageDataInfo<DimsOrder>&) {
     VPU_THROW_EXCEPTION << "Must be replaced with real stage";
 }
 
-void StubStage::getDataStridesRequirementsImpl() const {
+void StubStage::getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>&) {
     VPU_THROW_EXCEPTION << "Must be replaced with real stage";
 }
 
@@ -63,31 +59,37 @@ void StubStage::finalizeDataLayoutImpl() {
     VPU_THROW_EXCEPTION << "Must be replaced with real stage";
 }
 
-void StubStage::getBatchSupportInfoImpl() const {
+void StubStage::getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) {
     if (_type == StageType::StubConv ||
         _type == StageType::StubFullyConnected ||
         _type == StageType::StubDeconv) {
-        IE_ASSERT(_inputEdges.size() == 3);
-        IE_ASSERT(_outputEdges.size() == 1);
-
-        auto weights = _inputEdges[1]->input();
-        auto biases = _inputEdges[2]->input();
+        auto weights = inputEdge(1)->input();
+        auto biases = inputEdge(2)->input();
 
         IE_ASSERT(weights->usage() == DataUsage::Const);
         IE_ASSERT(biases->usage() == DataUsage::Const || biases->usage() == DataUsage::Fake);
 
-        _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
-        _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
+        batchInfo.setInput(inputEdge(0), BatchSupport::Split);
+        batchInfo.setOutput(outputEdge(0), BatchSupport::Split);
     } else {
         IE_ASSERT(_type == StageType::StubMaxPool || _type == StageType::StubAvgPool);
 
-        IE_ASSERT(_inputEdges.size() == 1);
-        IE_ASSERT(_outputEdges.size() == 1);
-
         // Pooling will support batch by merging it with previous dimension.
     }
 }
 
+void StubStage::initialCheckImpl() const {
+    if (_type == StageType::StubConv || _type == StageType::StubFullyConnected || _type == StageType::StubDeconv) {
+        assertInputsOutputsTypes(this,
+            {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}},
+            {{DataType::FP16}});
+    } else if (_type == StageType::StubMaxPool || _type == StageType::StubAvgPool) {
+        assertInputsOutputsTypes(this, {{DataType::FP16}}, {{DataType::FP16}});
+    } else {
+        VPU_THROW_EXCEPTION << "unknown type";
+    }
+}
+
 void StubStage::finalCheckImpl() const {
     VPU_THROW_EXCEPTION << "Must never be called";
 }
index 1c3c567..cf1aa25 100644 (file)
 
 namespace vpu {
 
-void PostOpStage::propagateDataOrderImpl() const {
-    IE_ASSERT(!_inputEdges.empty());
-    IE_ASSERT(_outputEdges.size() == 1);
-
-    auto input = _inputEdges[0]->input();
+void PostOpStage::propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) {
+    auto input = inputEdge(0)->input();
 
     auto inDimsOrder = input->desc().dimsOrder();
 
-    _orderInfo.setOutput(_outputEdges[0], inDimsOrder);
+    orderInfo.setOutput(outputEdge(0), inDimsOrder);
 }
 
-void PostOpStage::getDataStridesRequirementsImpl() const {
-    IE_ASSERT(!_inputEdges.empty());
-    IE_ASSERT(_outputEdges.size() == 1);
-
-    auto input = _inputEdges[0]->input();
+void PostOpStage::getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) {
+    auto input = inputEdge(0)->input();
 
     StridesRequirement reqs;
     reqs.add(2, DimStride::Compact);
 
-    _stridesInfo.setInput(_inputEdges[0], reqs);
-    _stridesInfo.setOutput(_outputEdges[0], reqs);
+    stridesInfo.setInput(inputEdge(0), reqs);
+    stridesInfo.setOutput(outputEdge(0), reqs);
 }
 
 void PostOpStage::finalizeDataLayoutImpl() {
 }
 
-void PostOpStage::getBatchSupportInfoImpl() const {
-    IE_ASSERT(!_inputEdges.empty());
-    IE_ASSERT(_outputEdges.size() == 1);
+void PostOpStage::getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& /*batchInfo*/) {
 }
 
 StageSHAVEsRequirements PostOpStage::getSHAVEsRequirementsImpl() const {
@@ -48,22 +40,21 @@ StageSHAVEsRequirements PostOpStage::getSHAVEsRequirementsImpl() const {
     return StageSHAVEsRequirements::TwoOrOne;
 }
 
-void PostOpStage::finalCheckImpl() const {
+void PostOpStage::initialCheckImpl() const {
+    IE_ASSERT(numInputs() > 0);
+    IE_ASSERT(numOutputs() == 1);
+    assertAllInputsOutputsTypes(this, DataType::FP16, DataType::FP16);
 }
 
 void PostOpStage::serializeDataImpl(BlobSerializer& serializer) const {
-    IE_ASSERT(!_inputEdges.empty());
-    IE_ASSERT(_outputEdges.size() == 1);
-    IE_ASSERT(_tempBufferEdges.empty());
-
-    auto input = _inputEdges[0]->input();
-    auto output = _outputEdges[0]->output();
+    auto input = inputEdge(0)->input();
+    auto output = outputEdge(0)->output();
 
     input->serializeNewBuffer(serializer);
     output->serializeNewBuffer(serializer);
 
-    for (int i = 1; i < _inputEdges.size(); ++i) {
-        _inputEdges[i]->input()->serializeNewBuffer(serializer);
+    for (int i = 1; i < numInputs(); ++i) {
+        this->input(i)->serializeNewBuffer(serializer);
     }
 }
 
index b51f1a6..7023513 100644 (file)
@@ -11,6 +11,11 @@ ie_add_plugin(NAME ${TARGET_NAME}
               SOURCES ${SOURCES}
               VERSION_DEFINES_FOR api/myriad_api.cpp)
 
+add_dependencies(${TARGET_NAME} vpu_copy_firmware)
+if(TARGET vpu_compile_custom_kernels)
+    add_dependencies(${TARGET_NAME} vpu_compile_custom_kernels)
+endif()
+
 target_include_directories(${TARGET_NAME}
     PRIVATE
         "${CMAKE_CURRENT_SOURCE_DIR}"
@@ -31,8 +36,3 @@ endif()
 target_link_libraries(${TARGET_NAME}
     PRIVATE
         ${INTEL_ITT_LIBS} inference_engine vpu_graph_transformer mvnc)
-if (LINUX)
-    add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
-        COMMAND "${CMAKE_COMMAND}" -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/../vpu_custom_kernels
-                                                     $<TARGET_FILE_DIR:${TARGET_NAME}>/vpu_custom_kernels)
-endif()
index ad09790..1fa7983 100644 (file)
@@ -13,7 +13,7 @@ using namespace vpu::MyriadPlugin;
 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
     try {
         auto mvnc = std::make_shared<Mvnc>();
-        plugin = make_ie_compatible_plugin({2, 0, CI_BUILD_NUMBER, "myriadPlugin"}, std::make_shared<Engine>(mvnc));
+        plugin = make_ie_compatible_plugin({{2, 1}, CI_BUILD_NUMBER, "myriadPlugin"}, std::make_shared<Engine>(mvnc));
         return OK;
     }
     catch (std::exception &ex) {
index 7a1733e..0c7d472 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <memory>
 #include "myriad_async_infer_request.h"
+#include <vpu/utils/profiling.hpp>
 
 using namespace vpu::MyriadPlugin;
 using namespace InferenceEngine;
@@ -21,6 +22,7 @@ MyriadAsyncInferRequest::MyriadAsyncInferRequest(MyriadInferRequest::Ptr request
 
 
 InferenceEngine::StagedTask::Ptr MyriadAsyncInferRequest::createAsyncRequestTask() {
+    VPU_PROFILE(createAsyncRequestTask);
     return std::make_shared<StagedTask>([this]() {
         auto asyncTaskCopy = _asyncTask;
         try {
index 3a9fb04..6e9edf0 100644 (file)
@@ -39,11 +39,19 @@ MyriadConfig::MyriadConfig(const std::map<std::string, std::string> &config, Con
         { CONFIG_VALUE(YES), std::chrono::milliseconds(1000) },
         { CONFIG_VALUE(NO), std::chrono::milliseconds(0) }
     };
+    static const std::unordered_map<std::string, PowerConfig> powerConfigs = {
+        { VPU_MYRIAD_CONFIG_VALUE(POWER_FULL),         PowerConfig::FULL },
+        { VPU_MYRIAD_CONFIG_VALUE(POWER_INFER),        PowerConfig::INFER },
+        { VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE),        PowerConfig::STAGE },
+        { VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE_SHAVES), PowerConfig::STAGE_SHAVES },
+        { VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE_NCES),   PowerConfig::STAGE_NCES },
+    };
 
     setOption(forceReset, boolSwitches, config, VPU_MYRIAD_CONFIG_KEY(FORCE_RESET));
     setOption(platform, platformSwitches, config, VPU_MYRIAD_CONFIG_KEY(PLATFORM));
     setOption(protocol, protocolSwitches, config, VPU_MYRIAD_CONFIG_KEY(PROTOCOL));
     setOption(watchdogInterval, watchdogSwitches, config, VPU_MYRIAD_CONFIG_KEY(WATCHDOG));
+    setOption(powerConfig, powerConfigs, config, VPU_MYRIAD_CONFIG_KEY(POWER_MANAGEMENT));
 
 IE_SUPPRESS_DEPRECATED_START
     static const std::unordered_map<std::string, ncDevicePlatform_t> platformSwitchesDepr = {
@@ -84,6 +92,10 @@ IE_SUPPRESS_DEPRECATED_START
         {VPU_MYRIAD_CONFIG_KEY(PROTOCOL),
                 { VPU_MYRIAD_CONFIG_VALUE(PCIE), VPU_MYRIAD_CONFIG_VALUE(USB), std::string()}},
         {VPU_MYRIAD_CONFIG_KEY(WATCHDOG),    {CONFIG_VALUE(YES), CONFIG_VALUE(NO)}},
+        {VPU_MYRIAD_CONFIG_KEY(POWER_MANAGEMENT),
+                { VPU_MYRIAD_CONFIG_VALUE(POWER_FULL), VPU_MYRIAD_CONFIG_VALUE(POWER_INFER),
+                  VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE), VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE_SHAVES),
+                  VPU_MYRIAD_CONFIG_VALUE(POWER_STAGE_NCES)}},
 
         {VPU_CONFIG_KEY(FORCE_RESET),        {CONFIG_VALUE(YES), CONFIG_VALUE(NO)}},
         {VPU_CONFIG_KEY(PLATFORM),
@@ -129,6 +141,7 @@ IE_SUPPRESS_DEPRECATED_START
         {VPU_MYRIAD_CONFIG_KEY(PROTOCOL)},
         {VPU_MYRIAD_CONFIG_KEY(WATCHDOG)},
         {VPU_MYRIAD_CONFIG_KEY(THROUGHPUT_STREAMS)},
+        {VPU_MYRIAD_CONFIG_KEY(POWER_MANAGEMENT)},
 
         {VPU_CONFIG_KEY(FORCE_RESET)},
         {VPU_CONFIG_KEY(PLATFORM)},
index a4c4437..61b2db4 100644 (file)
 namespace vpu {
 namespace MyriadPlugin {
 
+VPU_DECLARE_ENUM(PowerConfig,
+    FULL         = 0,
+    INFER        = 1,
+    STAGE        = 2,
+    STAGE_SHAVES = 3,
+    STAGE_NCES   = 4,
+)
+
 struct MyriadConfig final : ParsedConfig {
     bool forceReset = false;
+    PowerConfig powerConfig = PowerConfig::FULL;
     ncDevicePlatform_t platform = NC_ANY_PLATFORM;
     ncDeviceProtocol_t protocol = NC_ANY_PROTOCOL;
     std::chrono::milliseconds watchdogInterval = std::chrono::milliseconds(1000);
index 504337e..1b736d6 100644 (file)
@@ -6,8 +6,11 @@
 #include <utility>
 
 #include <ie_metric_helpers.hpp>
+#include "cnn_network_impl.hpp"
+#include "exec_graph_info.hpp"
 #include <myriad_executable_network.h>
 #include <vpu/blob_reader.hpp>
+#include <vpu/utils/profiling.hpp>
 #include <net_pass.h>
 
 using namespace InferenceEngine;
@@ -43,6 +46,7 @@ static void selectNumberOfExecutors(const ncDevicePlatform_t& platform,
 
 ExecutableNetwork::ExecutableNetwork(std::vector<DevicePtr> &devicePool,
     const std::map<std::string, std::string> &config, ConfigMode mode) {
+    VPU_PROFILE(ExecutableNetwork);
     _config = std::make_shared<MyriadConfig>(config, mode);
 
     _log = std::make_shared<Logger>("MyriadPlugin", _config->hostLogLevel, consoleOutput());
@@ -65,6 +69,7 @@ ExecutableNetwork::ExecutableNetwork(std::vector<DevicePtr> &devicePool,
 ExecutableNetwork::ExecutableNetwork(ICNNNetwork &network, std::vector<DevicePtr> &devicePool,
                                      const std::map<std::string, std::string> &config) :
                                      ExecutableNetwork(devicePool, config) {
+    VPU_PROFILE(ExecutableNetwork);
     bool ti_proc_ok = !NetPass::CombineRNNSeq(network) ? NetPass::UnrollTI(network) : true;
     if (!ti_proc_ok)
         THROW_IE_EXCEPTION << "Plugin doesn't support Tensor Iterator in pure form. "
@@ -80,7 +85,7 @@ ExecutableNetwork::ExecutableNetwork(ICNNNetwork &network, std::vector<DevicePtr
                             compiledGraph->numShaves, compiledGraph->numSlices, _config->numExecutors);
 
     _graphBlob = std::move(compiledGraph->blob);
-    _stagesMetaData = std::move(compiledGraph->stagesMeta);
+    _graphMetaData = std::move(compiledGraph->graphMeta);
 
     _inputInfo  = std::move(compiledGraph->inputInfo);
     _outputInfo = std::move(compiledGraph->outputInfo);
@@ -109,6 +114,7 @@ ExecutableNetwork::ExecutableNetwork(const std::string &blobFilename,
                            std::vector<DevicePtr> &devicePool,
                            const std::map<std::string, std::string> &config) :
                            ExecutableNetwork(devicePool, config, ConfigMode::RUNTIME_MODE) {
+    VPU_PROFILE(ExecutableNetwork);
     std::ifstream blobFile(blobFilename, std::ios::binary);
     std::ostringstream blobContentStream;
     blobContentStream << blobFile.rdbuf();
@@ -140,8 +146,8 @@ ExecutableNetwork::ExecutableNetwork(const std::string &blobFilename,
     _executor->allocateGraph(_device, _graphDesc, _graphBlob, blobHeader, numStages, networkName,
                              _config->numExecutors);
 
-    _stagesMetaData.resize(numStages);
-    for (auto &meta : _stagesMetaData) {
+    _graphMetaData.stagesMeta.resize(numStages);
+    for (auto &meta : _graphMetaData.stagesMeta) {
         meta.stageName = meta.stageType = meta.layerName = meta.layerType = "UNKNOWN";
         meta.status = InferenceEngineProfileInfo::LayerStatus::EXECUTED;
     }
@@ -174,5 +180,138 @@ void ExecutableNetwork::GetMetric(const std::string &name, Parameter &result, Re
     }
 }
 
+void ExecutableNetwork::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) {
+    graphPtr = buildRuntimeGraph(_graphMetaData);
+}
+
+InferenceEngine::ICNNNetwork::Ptr ExecutableNetwork::buildRuntimeGraph(GraphMetaInfo& graphMetaInfo) {
+    auto net = std::make_shared<InferenceEngine::details::CNNNetworkImpl>();
+    net->setPrecision(Precision::FP16);
+    net->setName(graphMetaInfo.graphName);
+
+    std::map<size_t, CNNLayerPtr> stageMetaIndexToLayer;
+
+    auto createLayerFromMeta = [&](const StageMetaInfo &stageMetaInfo) -> CNNLayer::Ptr {
+        auto layer = std::make_shared<CNNLayer>(LayerParams{stageMetaInfo.stageName,
+                                          stageMetaInfo.layerType,
+                                          Precision::FP16});
+
+        layer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = stageMetaInfo.layerName;
+        layer->params[ExecGraphInfoSerialization::IMPL_TYPE] = stageMetaInfo.stageType;
+        layer->params[ExecGraphInfoSerialization::EXECUTION_ORDER] = std::to_string(stageMetaInfo.execOrder);
+
+        std::stringstream layoutStream;
+        int ind = 0;
+        for (auto &outLayout : stageMetaInfo.outLayouts) {
+            if (ind == 0) {
+                layoutStream << outLayout;
+                ind++;
+                continue;
+            }
+            layoutStream << ',' << outLayout;
+        }
+        layer->params[ExecGraphInfoSerialization::OUTPUT_LAYOUTS] = layoutStream.str();
+
+        std::string outPrecisionsStr;
+        ind = 0;
+        for (auto &outPrecision : stageMetaInfo.outPrecisions) {
+            if (ind == 0) {
+                outPrecisionsStr += outPrecision.name();
+                ind++;
+                continue;
+            }
+            outPrecisionsStr += ',' + std::string(outPrecision.name());
+        }
+        layer->params[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outPrecisionsStr;
+
+        if (stageMetaInfo.execOrder < 0) {
+            layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = "not_executed";
+        } else {
+            layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = std::to_string(stageMetaInfo.execTime);
+        }
+
+        return layer;
+    };
+
+    //
+    // Write performance counts
+    //
+
+    auto perfInfo = _executor->getPerfTimeInfo(_graphDesc._graphHandle);
+
+    const auto deviceTimings = perfInfo.data();
+    auto deviceTimingsCount = perfInfo.size();
+
+    if (deviceTimingsCount > 0) {
+        std::size_t timeIndex = 0;
+
+        for (auto &stageMeta : graphMetaInfo.stagesMeta) {
+            if (stageMeta.status == ie::InferenceEngineProfileInfo::EXECUTED &&
+                timeIndex < deviceTimingsCount) {
+                stageMeta.execTime += deviceTimings[timeIndex];
+                timeIndex++;
+            }
+        }
+    }
+
+    //
+    // Add all stages to network
+    //
+
+    for (std::size_t i = 0; i < graphMetaInfo.stagesMeta.size(); i++) {
+        const auto stageMetaData = graphMetaInfo.stagesMeta[i];
+
+        if (stageMetaData.status == ie::InferenceEngineProfileInfo::LayerStatus::OPTIMIZED_OUT ||
+            stageMetaData.stageName == "<Receive-Tensor>" ||
+            stageMetaData.stageName == "<none>") {
+            continue;
+        }
+
+        auto layer = createLayerFromMeta(stageMetaData);
+        stageMetaIndexToLayer.insert(std::make_pair(i, layer));
+        net->addLayer(layer);
+    }
+
+    //
+    // Add all edges to network
+    //
+
+    for (const auto &dataMetaData : graphMetaInfo.datasMeta) {
+        DataPtr data;
+
+        auto parent = stageMetaIndexToLayer[dataMetaData.parentIndex];
+        data = std::make_shared<Data>(dataMetaData.name, dataMetaData.desc);
+        parent->outData.push_back(data);
+        data->getCreatorLayer() = parent;
+
+        for (auto &childMetaIndex : dataMetaData.childrenIndices) {
+            auto child = stageMetaIndexToLayer[childMetaIndex];
+            data->getInputTo()[child->name] = child;
+            child->insData.push_back(data);
+        }
+    }
+
+    //
+    // Specify inputs data
+    //
+
+    for (std::size_t i = 0; i < graphMetaInfo.stagesMeta.size(); i++) {
+        const auto stageMetaData = graphMetaInfo.stagesMeta[i];
+
+        if (stageMetaData.inputsNum != 0 ||
+            stageMetaData.stageName == "<Receive-Tensor>" ||
+            stageMetaData.stageName == "<none>") {
+            continue;
+        }
+
+        auto input = stageMetaIndexToLayer[i];
+        auto inputInfo = std::make_shared<InputInfo>();
+        inputInfo->setInputData(input->outData[0]);
+        net->setInputInfo(inputInfo);
+    }
+
+    return net;
+}
+
 }  // namespace MyriadPlugin
 }  // namespace vpu
index 9de33ec..2aba485 100644 (file)
@@ -8,6 +8,7 @@
 #include <string>
 #include <vector>
 #include <map>
+#include <unordered_map>
 #include <queue>
 #include <sstream>
 #include <fstream>
@@ -55,7 +56,7 @@ public:
                                                                       InferenceEngine::OutputsDataMap networkOutputs) override {
         return std::make_shared<MyriadInferRequest>(_graphDesc, networkInputs, networkOutputs,
                                                     _inputInfo, _outputInfo,
-                                                    _stagesMetaData, _config, _log, _executor);
+                                                    _graphMetaData.stagesMeta, _config, _log, _executor);
     }
 
     void CreateInferRequest(InferenceEngine::IInferRequest::Ptr &asyncRequest) override {
@@ -66,7 +67,7 @@ public:
 
         auto syncRequestImpl = std::make_shared<MyriadInferRequest>(_graphDesc, _networkInputs, _networkOutputs,
                                                                     _inputInfo, _outputInfo,
-                                                                    _stagesMetaData, _config, _log,
+                                                                    _graphMetaData.stagesMeta, _config, _log,
                                                                     _executor);
         syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
         auto taskExecutorGetResult = getNextTaskExecutor();
@@ -90,6 +91,8 @@ public:
 
     void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
 
+    void GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) override;
+
     void GetMappedTopology(
             std::map<std::string, std::vector<InferenceEngine::PrimitiveInfo::Ptr>> &deployedTopology) override {
         THROW_IE_EXCEPTION << "GetMappedTopology is not implemented\n";
@@ -101,7 +104,7 @@ private:
     std::vector<char> _graphBlob;
     GraphDesc _graphDesc;
     DevicePtr _device;
-    std::vector<StageMetaInfo> _stagesMetaData;
+    GraphMetaInfo _graphMetaData;
     std::shared_ptr<MyriadConfig> _config;
     std::vector<std::string> _supportedMetrics;
 
@@ -126,6 +129,8 @@ private:
 
         return taskExecutor;
     }
+
+    InferenceEngine::ICNNNetwork::Ptr buildRuntimeGraph(GraphMetaInfo& graphMetaInfo);
 };
 
 }  // namespace MyriadPlugin
index f0d16d4..0f3bfca 100644 (file)
@@ -18,6 +18,7 @@
 #include <vpu/vpu_plugin_config.hpp>
 #include <vpu/utils/extra.hpp>
 #include <vpu/utils/logger.hpp>
+#include <vpu/utils/profiling.hpp>
 
 #include "myriad_executor.h"
 #include "myriad_config.h"
@@ -36,6 +37,7 @@ using namespace vpu;
 static std::mutex device_mutex;
 
 MyriadExecutor::MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log) {
+    VPU_PROFILE(MyriadExecutor);
     _mvnc = std::make_shared<Mvnc>();
     int ncResetAll = forceReset;
     auto status = ncGlobalSetOption(NC_RW_RESET_ALL, &ncResetAll, sizeof(ncResetAll));
@@ -75,7 +77,9 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
                                           const std::string& configDevName,
                                           const ncDevicePlatform_t &configPlatform,
                                           const ncDeviceProtocol_t &configProtocol,
-                                          int watchdogInterval) {
+                                          int watchdogInterval,
+                                          PowerConfig powerConfig) {
+    VPU_PROFILE(bootNextDevice);
 // #-17972, #-16790
 #if defined(NO_BOOT)
     if (!devicePool.empty()) {
@@ -186,6 +190,14 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
         device._name = deviceName;
     }
 
+    status = ncDeviceSetOption(device._deviceHandle, NC_RW_DEVICE_POWER_CONFIG, reinterpret_cast<void*>(&powerConfig), sizeof(dataLength));
+
+    if (status != NC_OK) {
+        _log->warning("Failed to set configuration for Power Manager");
+        ncDeviceClose(&device._deviceHandle);
+        return status;
+    }
+
     /* TODO: what should we do if we do not know maximum available graphs? What if we got number <= 0? */
     device._graphNum = 1;
     device._deviceIdx = lastDeviceIdx + 1;
@@ -195,6 +207,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector<DevicePtr> &devicePool,
 
 DevicePtr MyriadExecutor::openDevice(std::vector<DevicePtr> &devicePool,
                                      const std::shared_ptr<MyriadConfig> &config) {
+    VPU_PROFILE(openDevice);
     std::lock_guard<std::mutex> lock(device_mutex);
 
     auto firstBootedButEmptyDevice = std::find_if(devicePool.begin(), devicePool.end(),
@@ -227,7 +240,7 @@ DevicePtr MyriadExecutor::openDevice(std::vector<DevicePtr> &devicePool,
     }
 
     ncStatus_t booted = bootNextDevice(devicePool, config->deviceName,
-        config->platform, config->protocol, config->watchdogInterval.count());
+        config->platform, config->protocol, config->watchdogInterval.count(), config->powerConfig);
 
     // TODO Is any tests for this case? #-19309
     // In case, then there is no another not booted device, use already booted with minimum number of executors
@@ -272,6 +285,7 @@ VPU_PACKED(bin_header {
 };)
 
 void MyriadExecutor::closeDevices(std::vector<DevicePtr> &devicePool) {
+    VPU_PROFILE(closeDevices);
     std::lock_guard<std::mutex> lock(device_mutex);
     for (auto &device : devicePool) {
         if (device->_deviceHandle != nullptr) {
@@ -287,6 +301,7 @@ void MyriadExecutor::allocateGraph(DevicePtr &device, GraphDesc &graphDesc,
                                    const std::vector<char> &graphFileContent,
                                    const std::pair<const char*, size_t> &graphHeaderDesc,
                                    size_t numStages, const char* networkName, int executors) {
+    VPU_PROFILE(allocateGraph);
     _numStages = numStages;
     graphDesc._name = networkName;
     if (device->_deviceHandle == nullptr) {
@@ -373,6 +388,7 @@ void MyriadExecutor::allocateGraph(DevicePtr &device, GraphDesc &graphDesc,
 
 void MyriadExecutor::queueInference(GraphDesc &graphDesc, void *input_data, size_t input_bytes,
                     void *result_data, size_t result_bytes) {
+    VPU_PROFILE(queueInference);
 #ifndef NDEBUG
     if (auto dumpFileName = std::getenv("IE_VPU_DUMP_INPUT_FILE_NAME")) {
         std::ofstream file(dumpFileName, std::ios_base::binary | std::ios_base::out);
@@ -410,6 +426,7 @@ void MyriadExecutor::getResult(GraphDesc &graphDesc, void *result_data, unsigned
 }
 
 void MyriadExecutor::deallocateGraph(DevicePtr &device, GraphDesc &graphDesc) {
+    VPU_PROFILE(deallocateGraph);
     std::lock_guard<std::mutex> lock(device_mutex);
 
     if (graphDesc._inputFifoHandle != nullptr) {
index daec8a5..af40fda 100644 (file)
@@ -132,7 +132,8 @@ private:
                               const std::string& configDevName,
                               const ncDevicePlatform_t &configPlatform,
                               const ncDeviceProtocol_t &configProtocol,
-                              int watchdogInterval);
+                              int watchdogInterval,
+                              PowerConfig powerConfig);
 };
 
 typedef std::shared_ptr<MyriadExecutor> MyriadExecutorPtr;
index 79530c8..65af2cb 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <vpu/utils/perf_report.hpp>
 #include <vpu/utils/ie_helpers.hpp>
+#include <vpu/utils/profiling.hpp>
 
 #include "myriad_executable_network.h"
 #include "myriad_infer_request.h"
@@ -35,23 +36,27 @@ MyriadInferRequest::MyriadInferRequest(GraphDesc &graphDesc,
         _log(log), _stagesMetaData(blobMetaData), _config(myriadConfig),
         _inputInfo(inputInfo), _outputInfo(outputInfo),
         _graphDesc(graphDesc) {
-    _deviceLayout = _config->compileConfig.hwOptimization ? NCHW : NHWC;
-    if (_config->compileConfig.forceLayout == ComputeLayout::NCHW)
-        _deviceLayout = NCHW;
-    if (_config->compileConfig.forceLayout == ComputeLayout::NHWC)
-        _deviceLayout = NHWC;
+    VPU_PROFILE(MyriadInferRequest);
+    _layoutPreference = _config->compileConfig.hwOptimization ?
+                            LayoutPreference::ChannelMajor :
+                            LayoutPreference::ChannelMinor;
+    if (_config->compileConfig.forceLayout == ComputeLayout::NCHW ||
+        _config->compileConfig.forceLayout == ComputeLayout::NCDHW)
+        _layoutPreference = LayoutPreference::ChannelMajor;
+    if (_config->compileConfig.forceLayout == ComputeLayout::NHWC ||
+        _config->compileConfig.forceLayout == ComputeLayout::NDHWC)
+        _layoutPreference = LayoutPreference::ChannelMinor;
+
+    const auto& ioStrides = _config->compileConfig.ioStrides;
     // allocate inputs
     for (auto &networkInput : _networkInputs) {
+        IE_ASSERT(ioStrides.find(networkInput.first) == ioStrides.end())
+            << " input blob with strides is not supported";
+
         SizeVector dims      = networkInput.second->getTensorDesc().getDims();
         Precision  precision = networkInput.second->getTensorDesc().getPrecision();
         Layout     layout    = networkInput.second->getTensorDesc().getLayout();
 
-        if (precision != Precision::FP32 &&
-            precision != Precision::FP16 &&
-            precision != Precision::U8) {
-            THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported input precision: "
-                                   << precision << "! Supported precisions: FP32, FP16 and U8";
-        }
         Blob::Ptr inputBlob = make_blob_with_precision(TensorDesc(
             precision,
             dims,
@@ -64,15 +69,13 @@ MyriadInferRequest::MyriadInferRequest(GraphDesc &graphDesc,
     }
     // allocate outputs
     for (auto &networkOutput : _networkOutputs) {
+        IE_ASSERT(ioStrides.find(networkOutput.first) == ioStrides.end())
+            << " output blob with strides is not supported";
+
         SizeVector dims      = networkOutput.second->getTensorDesc().getDims();
         Precision  precision = networkOutput.second->getTensorDesc().getPrecision();
         Layout     layout    = networkOutput.second->getTensorDesc().getLayout();
 
-        if (precision != Precision::FP32 &&
-            precision != Precision::FP16) {
-            THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported output precision: "
-                                << precision << "! Supported precisions: FP32, FP16";
-        }
         Blob::Ptr outputBlob = make_blob_with_precision(TensorDesc(
             precision,
             dims,
@@ -97,19 +100,7 @@ void MyriadInferRequest::InferImpl() {
 }
 
 void MyriadInferRequest::InferAsync() {
-    for (auto input : _inputs) {
-        auto const inputBlobPtr = input.second;
-        if (inputBlobPtr->getTensorDesc().getPrecision() != Precision::FP16
-            && inputBlobPtr->getTensorDesc().getPrecision() != Precision::FP32
-            && inputBlobPtr->getTensorDesc().getPrecision() != Precision::U8)
-            THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported input blob precision";
-    }
-    for (auto output : _outputs) {
-        auto const outputBlobPtr = output.second;
-        if (outputBlobPtr->getTensorDesc().getPrecision() != Precision::FP16
-            && outputBlobPtr->getTensorDesc().getPrecision() != Precision::FP32)
-            THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported output blob precision";
-    }
+    VPU_PROFILE(InferAsync);
 
     // execute input pre-processing
     execDataPreprocessing(_inputs, true);  // "true" stands for serial preprocessing in case of OpenMP
@@ -124,14 +115,14 @@ void MyriadInferRequest::InferAsync() {
             auto inputBlob = input.second;
             size_t byteSize = inputBlob->byteSize();
             Layout layout = inputBlob->getTensorDesc().getLayout();
-            if (layout != _deviceLayout && (layout == NCHW || layout == NHWC)) {
-                // TODO copyBlob allocates new memory, but we already have allocated buffer of enough size
-                inputBlob = copyBlob(inputBlob, _deviceLayout);
+            Layout vpuLayout = deviceLayout(layout, _layoutPreference);
+            if (layout != vpuLayout) {
+                inputBlob = copyBlob(inputBlob, vpuLayout);
             }
 
             const auto input_offset_it = _inputInfo.offset.find(input.first);
             if (input_offset_it != _inputInfo.offset.end()) {
-                size_t required_buff_size = checked_cast<size_t>(input_offset_it->second) + byteSize;
+                size_t required_buff_size = vpu::checked_cast<size_t>(input_offset_it->second) + byteSize;
                 IE_ASSERT(required_buff_size <= inputBuffer.size());
                 MEMCPY(&inputBuffer[input_offset_it->second], inputBlob->buffer().as<uint8_t*>(), byteSize);
             }
@@ -146,9 +137,9 @@ void MyriadInferRequest::InferAsync() {
 
         tmpBlob = foundInputBlob->second;
         Layout layout = tmpBlob->getTensorDesc().getLayout();
-        if (layout != _deviceLayout && (layout == NCHW || layout == NHWC)) {
-            // TODO copyBlob allocates new memory, but we already have allocated buffer of enough size
-            tmpBlob = copyBlob(tmpBlob, _deviceLayout);
+        Layout vpuLayout = deviceLayout(layout, _layoutPreference);
+        if (layout != vpuLayout) {
+            tmpBlob = copyBlob(tmpBlob, vpuLayout);
         }
 
         inputPtr = tmpBlob->buffer();
@@ -158,13 +149,14 @@ void MyriadInferRequest::InferAsync() {
 }
 
 void MyriadInferRequest::GetResult() {
+    VPU_PROFILE(GetResult);
     _executor->getResult(_graphDesc, resultBuffer.data(), resultBuffer.size());
 
     for (auto pp : _outputs) {
         const auto offset_it = _outputInfo.offset.find(pp.first);
 
         if (offset_it !=  _outputInfo.offset.end()) {
-            size_t resultOffset = checked_cast<size_t>(offset_it->second);
+            size_t resultOffset = vpu::checked_cast<size_t>(offset_it->second);
             if (resultOffset > resultBuffer.size()) {
                 THROW_IE_EXCEPTION << "unexpected result data size";
             }
@@ -173,7 +165,7 @@ void MyriadInferRequest::GetResult() {
             auto outDesc = outputBlob->getTensorDesc();
 
             // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called
-            auto vpuLayout = (outDesc.getLayout() == NCHW || outDesc.getLayout() == NHWC) ? _deviceLayout : outDesc.getLayout();
+            auto vpuLayout = deviceLayout(outDesc.getLayout(), _layoutPreference);
             ie::TensorDesc tempTensorDesc(outDesc.getPrecision(), outDesc.getDims(), vpuLayout);
             auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset);
 
index b9e56e9..9420c1b 100644 (file)
@@ -14,6 +14,7 @@
 #include <cpp_interfaces/impl/ie_executable_network_internal.hpp>
 
 #include <vpu/utils/logger.hpp>
+#include <vpu/utils/ie_helpers.hpp>
 
 #include "myriad_executor.h"
 #include "myriad_config.h"
@@ -23,7 +24,7 @@ namespace MyriadPlugin {
 
 class MyriadInferRequest : public InferenceEngine::InferRequestInternal {
     MyriadExecutorPtr _executor;
-    InferenceEngine::Layout _deviceLayout;
+    LayoutPreference _layoutPreference;
     Logger::Ptr _log;
     std::vector<StageMetaInfo> _stagesMetaData;
     std::shared_ptr<MyriadConfig> _config;
index 9ac80a6..7b86aaa 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <vpu/vpu_plugin_config.hpp>
 #include <vpu/parsed_config.hpp>
+#include <vpu/utils/profiling.hpp>
 
 #include "myriad_plugin.h"
 
@@ -23,6 +24,7 @@ using namespace vpu::MyriadPlugin;
 
 ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const ICore * /*core*/, ICNNNetwork &network,
                                                           const std::map<std::string, std::string> &config) {
+    VPU_PROFILE(LoadExeNetworkImpl);
     InputsDataMap networkInputs;
     OutputsDataMap networkOutputs;
 
@@ -76,6 +78,7 @@ void Engine::QueryNetwork(const ICNNNetwork& network, QueryNetworkResult& res) c
 
 void Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config,
                           QueryNetworkResult& res) const {
+    VPU_PROFILE(QueryNetwork);
     auto layerNames = getSupportedLayers(
         network,
         Platform::MYRIAD_2,
@@ -107,6 +110,7 @@ Engine::Engine(std::shared_ptr<IMvnc> mvnc) :
 // ImportNetwork gets a config provided by an user. LoadNetwork gets the plugin config and merge it with user's config.
 // Need to found a common way to handle configs
 IExecutableNetwork::Ptr Engine::ImportNetwork(const std::string &modelFileName, const std::map<std::string, std::string> &config) {
+    VPU_PROFILE(ImportNetwork);
     std::ifstream blobFile(modelFileName, std::ios::binary);
 
     if (!blobFile.is_open()) {
index ab92e78..2c6ba6a 100644 (file)
@@ -27,8 +27,6 @@ enable_testing()
 
 add_subdirectory(helpers)
 
-disable_deprecated_warnings()
-
 if(ENABLE_TESTS)
   add_subdirectory(unit)
 endif()
index 9452fc3..aaf1aff 100644 (file)
@@ -115,25 +115,73 @@ void BufferWrapper::insert(size_t index, float value) {
     }
 }
 
-void CompareCommon(const Blob::Ptr& actual, const Blob::Ptr& expected, float tolerance) {
+void CompareCommonAbsolute(const Blob::Ptr& actual, const Blob::Ptr& expected, float tolerance) {
     ASSERT_NE(actual, nullptr);
     ASSERT_NE(expected, nullptr);
 
-    Layout res_layout = actual->getTensorDesc().getLayout();
-    Layout ref_layout = expected->getTensorDesc().getLayout();
-    SizeVector res_dims = actual->getTensorDesc().getDims();
+    BufferWrapper res_ptr(actual);
+    BufferWrapper ref_ptr(expected);
+    float max_abs_error = 0;
+    size_t actualMaxErrId = 0;
+    size_t expectedMaxErrId = 0;
+    std::function<void(size_t, size_t)> absoluteErrorUpdater = [&](size_t actualIdx, size_t expectedIdx) {
+        auto actual = res_ptr[actualIdx];
+        auto expected = ref_ptr[expectedIdx];
+        float abs_error = fabsf(actual - expected);
+        if (abs_error > max_abs_error) {
+            max_abs_error = abs_error;
+            actualMaxErrId = actualIdx;
+            expectedMaxErrId = expectedIdx;
+        }
+    };
+    CompareCommon(actual, expected, tolerance, absoluteErrorUpdater);
+
+    ASSERT_NEAR(ref_ptr[expectedMaxErrId], res_ptr[actualMaxErrId], tolerance)
+                        << "expectedMaxErrId = " << expectedMaxErrId
+                        << " actualMaxErrId = " << actualMaxErrId;
+}
+
+void CompareCommonRelative(const Blob::Ptr& actual, const Blob::Ptr& expected, float tolerance) {
+    ASSERT_NE(actual, nullptr);
+    ASSERT_NE(expected, nullptr);
 
     BufferWrapper res_ptr(actual);
     BufferWrapper ref_ptr(expected);
+    float max_rel_error = 0;
+    size_t actualMaxErrId = 0;
+    size_t expectedMaxErrId = 0;
+    std::function<void(size_t, size_t)> relatedErrorUpdater = [&](size_t actualIdx, size_t expectedIdx) {
+        auto actual = res_ptr[actualIdx];
+        auto expected = ref_ptr[expectedIdx];
+        float abs_error = fabsf(actual - expected);
+        float rel_error = expected != 0.0 ? fabsf(abs_error / expected) : abs_error;
+        if (rel_error > max_rel_error) {
+            max_rel_error = rel_error;
+            actualMaxErrId = actualIdx;
+            expectedMaxErrId = expectedIdx;
+        }
+    };
+    CompareCommon(actual, expected, tolerance, relatedErrorUpdater);
+
+    float abs_threshold = fabsf(ref_ptr[expectedMaxErrId]) * tolerance;
+    ASSERT_NEAR(ref_ptr[expectedMaxErrId], res_ptr[actualMaxErrId], abs_threshold)
+                        << "expectedMaxErrId = " << expectedMaxErrId
+                        << " actualMaxErrId = " << actualMaxErrId;
+}
+
+void CompareCommon(const Blob::Ptr& actual, const Blob::Ptr& expected, float tolerance,
+                   const std::function<void(size_t, size_t)>& errorUpdater) {
+    ASSERT_NE(actual, nullptr);
+    ASSERT_NE(expected, nullptr);
+
+    Layout res_layout = actual->getTensorDesc().getLayout();
+    Layout ref_layout = expected->getTensorDesc().getLayout();
+    SizeVector res_dims = actual->getTensorDesc().getDims();
 
     size_t res_size = actual->size();
     size_t ref_size = expected->size();
     ASSERT_EQ(res_size, ref_size);
 
-    float max_error = 0;
-    size_t actualMaxErrId = 0;
-    size_t expectedMaxErrId = 0;
-
     if (res_layout == NCHW || res_layout == NHWC) {
         size_t N = res_dims[0];
         size_t C = res_dims[1];
@@ -150,12 +198,7 @@ void CompareCommon(const Blob::Ptr& actual, const Blob::Ptr& expected, float tol
                         size_t expectedIdx = ref_layout == NCHW ?
                                              w + h * W + c * W * H + n * W * H * C : c + w * C + h * C * W +
                                                                                      n * C * W * H;
-                        float cur_diff = fabs(res_ptr[actualIdx] - ref_ptr[expectedIdx]);
-                        if (cur_diff > max_error) {
-                            max_error = cur_diff;
-                            actualMaxErrId = actualIdx;
-                            expectedMaxErrId = expectedIdx;
-                        }
+                        errorUpdater(actualIdx, expectedIdx);
                     }
                 }
             }
@@ -168,28 +211,15 @@ void CompareCommon(const Blob::Ptr& actual, const Blob::Ptr& expected, float tol
             for (size_t n = 0; n < N; n++) {
                 for (size_t c = 0; c < C; c++) {
                     size_t actualIdx =   c +  n * C;
-                    float cur_diff = fabs(res_ptr[actualIdx] - ref_ptr[actualIdx]);
-                    if (cur_diff > max_error) {
-                        max_error = cur_diff;
-                        actualMaxErrId = actualIdx;
-                        expectedMaxErrId = actualIdx;
-                    }
+                    errorUpdater(actualIdx, actualIdx);
                 }
             }
         } else {
             for (size_t i = 0; i < ref_size; i++) {
-                float cur_diff = fabs(res_ptr[i] - ref_ptr[i]);
-                if (cur_diff > max_error) {
-                    max_error = cur_diff;
-                    actualMaxErrId = expectedMaxErrId = i;
-                }
+                errorUpdater(i, i);
             }
         }
     }
-
-    ASSERT_NEAR(ref_ptr[expectedMaxErrId], res_ptr[actualMaxErrId], tolerance)
-                                << "expectedMaxErrId = " << expectedMaxErrId
-                                << " actualMaxErrId = " << actualMaxErrId;
 }
 
 void fill_data_common(BufferWrapper& data, size_t size, size_t duty_ratio) {
index e7a998c..bb89ae1 100644 (file)
@@ -11,6 +11,7 @@
 #include <xml_net_builder.hpp>
 #include <xml_helper.hpp>
 #include <common_layers_params.hpp>
+#include <tests_common.hpp>
 
 #ifndef USE_BOOST_RE
 
 #define FIND_STR(SRC, PATTERN) boost::regex_search(SRC, boost::regex(PATTERN))
 #endif
 
-#define REPLACE_WITH_NUM(SRC, PATTERN, NUM) REPLACE_WITH_STR(SRC, PATTERN, std::to_string(NUM))
+#define REPLACE_WITH_NUM(SRC, PATTERN, NUM) REPLACE_WITH_STR(SRC, PATTERN, to_string_c_locale(NUM))
 #define REPLACE_WITH_NUM_VECTOR(SRC, PATTERN, NUMS) \
        { std::string result; \
         if (NUMS.size() > 0) { \
-            result += std::to_string(NUMS[0]); \
+            result += to_string_c_locale(NUMS[0]); \
             for (int i = 1; i < NUMS.size(); i++) { \
-                    result += "," + std::to_string(NUMS[i]); \
+                    result += "," + to_string_c_locale(NUMS[i]); \
             } \
         } \
        REPLACE_WITH_STR(SRC, PATTERN, result); }
@@ -38,9 +39,9 @@
        { std::string result; \
         auto nums_size = NUMS.size(); \
         if (nums_size > 0) { \
-            result += std::to_string(NUMS[nums_size - 1]); \
+            result += to_string_c_locale(NUMS[nums_size - 1]); \
             for (int i = 2; i <= nums_size; i++) { \
-                    result += "," + std::to_string(NUMS[nums_size - i]); \
+                    result += "," + to_string_c_locale(NUMS[nums_size - i]); \
             } \
         } \
        REPLACE_WITH_STR(SRC, PATTERN, result); }
@@ -133,7 +134,17 @@ public:
     void insert(size_t index, float value);
 };
 
-void
-CompareCommon(const InferenceEngine::Blob::Ptr &actual, const InferenceEngine::Blob::Ptr &expected, float tolerance);
+void CompareCommon(const InferenceEngine::Blob::Ptr &actual,
+                   const InferenceEngine::Blob::Ptr &expected,
+                   float tolerance,
+                   const std::function<void(size_t, size_t)> &errorUpdater);
+
+void CompareCommonAbsolute(const InferenceEngine::Blob::Ptr &actual,
+                           const InferenceEngine::Blob::Ptr &expected,
+                           float tolerance);
+
+void CompareCommonRelative(const InferenceEngine::Blob::Ptr &actual,
+                           const InferenceEngine::Blob::Ptr &expected,
+                           float tolerance);
 
 void fill_data_common(BufferWrapper &data, size_t size, size_t duty_ratio = 10);
index 8d97b07..2895e9c 100644 (file)
 #include <cctype>
 #include <chrono>
 
-#ifdef WIN32
-#define UNUSED
+#ifdef _WIN32
+# define UNUSED
 #else
-#define UNUSED  __attribute__((unused))
+# define UNUSED  __attribute__((unused))
 #endif
 
 #include "stdlib.h"
 #include "stdio.h"
 #include "string.h"
 #ifdef _WIN32
-       #include "Psapi.h"
+include "Psapi.h"
 #endif
 
+template <class T>
+inline std::string to_string_c_locale(T value) {
+    std::stringstream val_stream;
+    val_stream.imbue(std::locale("C"));
+    val_stream << value;
+    return val_stream.str();
+}
+
 class BaseTestCreator {
 protected:
     std::string _type;
 public:
     explicit BaseTestCreator(const std::string& type) : _type(type) {}
+    virtual ~BaseTestCreator() = default;
 
     virtual InferenceEngine::CNNLayerPtr create(const std::string& type)  = 0;
 
@@ -104,6 +113,7 @@ private:
                 std::make_shared<LayerTestCreator<InferenceEngine::MathLayer>>("Floor"),
                 std::make_shared<LayerTestCreator<InferenceEngine::MathLayer>>("HardSigmoid"),
                 std::make_shared<LayerTestCreator<InferenceEngine::MathLayer>>("Log"),
+                std::make_shared<LayerTestCreator<InferenceEngine::MathLayer>>("Exp"),
                 std::make_shared<LayerTestCreator<InferenceEngine::MathLayer>>("Reciprocal"),
                 std::make_shared<LayerTestCreator<InferenceEngine::MathLayer>>("Selu"),
                 std::make_shared<LayerTestCreator<InferenceEngine::MathLayer>>("Sign"),
@@ -124,7 +134,9 @@ private:
                 std::make_shared<LayerTestCreator<InferenceEngine::ReduceLayer>>("ReduceProd"),
                 std::make_shared<LayerTestCreator<InferenceEngine::ReduceLayer>>("ReduceSum"),
                 std::make_shared<LayerTestCreator<InferenceEngine::ReduceLayer>>("ReduceSumSquare"),
-                std::make_shared<LayerTestCreator<InferenceEngine::TopKLayer>>("TopK")
+                std::make_shared<LayerTestCreator<InferenceEngine::TopKLayer>>("TopK"),
+                std::make_shared<LayerTestCreator<InferenceEngine::NonMaxSuppressionLayer>>("NonMaxSuppression"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ScatterLayer>>("ScatterUpdate")
         };
         return creators;
     }
@@ -365,23 +377,25 @@ public:
     }
 
     std::string replace(std::string& str, const std::string& from, const int& to) {
-        replace(str, from, std::to_string(to));
+        replace(str, from, to_string_c_locale(to));
         return str;
     }
 
     std::string replace(std::string& str, const std::string& from, const size_t& to) {
-        replace(str, from, std::to_string(to));
+        replace(str, from, to_string_c_locale(to));
         return str;
     }
 
     std::string replace(std::string& str, const std::string& from, const float& to) {
-        replace(str, from, std::to_string(to));
+        replace(str, from, to_string_c_locale(to));
         return str;
     }
     // trim from both ends (in place)
     static inline std::string &trim(std::string &s) {
-        s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
-        s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
+        s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c){
+            return !std::isspace(c);}));
+        s.erase(std::find_if(s.rbegin(), s.rend(), [](int c){
+            return !std::isspace(c);}).base(), s.end());
         return s;
     }
 
diff --git a/inference-engine/tests/helpers/tests_vpu_common.cpp b/inference-engine/tests/helpers/tests_vpu_common.cpp
new file mode 100644 (file)
index 0000000..0fed062
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_builders.hpp>
+#include <ie_icnn_network.hpp>
+
+#include "single_layer_common.hpp"
+#include "tests_vpu_common.hpp"
+
+using namespace InferenceEngine;
+
+/* this function assumes that the precision of a generated network is FP16 */
+std::shared_ptr<InferenceEngine::ICNNNetwork> createNetworkWithDesiredSize(std::size_t sizeInMB) {
+
+    Builder::Network builder("network");
+    Builder::FullyConnectedLayer fcBuilder("FullyConnected");
+
+    SizeVector inputDims = {1, 2, 16, 16}; // 1 KB
+
+    auto generateBlob = [](Precision precision,
+                           SizeVector dims, Layout layout) {
+        IE_ASSERT(precision == Precision::FP16);
+        Blob::Ptr blob = make_shared_blob<ie_fp16>(TensorDesc(precision, dims, layout));
+        blob->allocate();
+        GenRandomDataCommon(blob);
+        return blob;
+    };
+
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port(inputDims)));
+
+    idx_t weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP16,
+                                                                                           {sizeInMB * 1024, 2, 16, 16}, Layout::OIHW)));
+
+    layerId = builder.addLayer({{layerId}, {weightsId}}, Builder::FullyConnectedLayer("FullyConnected").setOutputNum(1024 * sizeInMB));
+
+    builder.addLayer({PortInfo(layerId)}, Builder::OutputLayer("output"));
+
+    INetwork::CPtr network = builder.build();
+    std::shared_ptr<ICNNNetwork> cnnNetwork = Builder::convertToICNNNetwork(network);
+
+    return cnnNetwork;
+}
diff --git a/inference-engine/tests/helpers/tests_vpu_common.hpp b/inference-engine/tests/helpers/tests_vpu_common.hpp
new file mode 100644 (file)
index 0000000..eee97e4
--- /dev/null
@@ -0,0 +1,100 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include <ie_builders.hpp>
+#include <ie_precision.hpp>
+
+#include "single_layer_common.hpp"
+#include "vpu/vpu_plugin_config.hpp"
+#include <vpu/graph_transformer/include/vpu/private_plugin_config.hpp>
+
+
+using config_t = std::map<std::string, std::string>;
+
+static constexpr char ENV_MYRIADX[] = "IE_VPU_MYRIADX";
+static constexpr char ENV_HDDL_R[]  = "IE_VPU_ENABLE_PER_LAYER_TESTS_HDDL";
+
+#define DISABLE_IF(expression)                                   \
+{                                                                \
+    if (expression) {                                            \
+        SKIP() << "Disabled since " << #expression << std::endl; \
+    }                                                            \
+}
+
+#if defined(_WIN32) || defined(WIN32)
+    #define DISABLE_ON_WINDOWS_IF(expr) DISABLE_IF((expr))
+#else
+    #define DISABLE_ON_WINDOWS_IF(expr)
+#endif
+
+static bool hasPlatform(const std::string &environment_variable) {
+    auto env = std::getenv(environment_variable.c_str());
+    if (!env) {
+        return false;
+    }
+
+    int value;
+    try {
+        value = std::stoi(env);
+    } catch (...) {
+        return false;
+    }
+
+    return value != 0;
+}
+
+static bool hasMyriadX() {
+    return hasPlatform(ENV_MYRIADX);
+}
+
+static bool hasMyriad2() {
+    /* TODO: change with environment variable for MYRIAD-2 */
+    return !hasMyriadX();
+}
+
+static bool hasAppropriateStick(const config_t &config) {
+    bool suitsConfig;
+
+    auto platform = config.find(VPU_MYRIAD_CONFIG_KEY(PLATFORM));
+    if (platform == config.end() || platform->second.empty()) {
+        suitsConfig = hasMyriad2() || hasMyriadX();
+    } else {
+        bool hasRequestedMyriad2 =
+                platform->second == VPU_MYRIAD_CONFIG_VALUE(2450) && hasMyriad2();
+        bool hasRequestedMyriadX =
+                platform->second == VPU_MYRIAD_CONFIG_VALUE(2480) && hasMyriadX();
+        suitsConfig = hasRequestedMyriad2 || hasRequestedMyriadX;
+    }
+
+    bool suitsDeprecatedConfig;
+    // Deprecated api
+    IE_SUPPRESS_DEPRECATED_START
+    platform = config.find(VPU_CONFIG_KEY(PLATFORM));
+    if (platform == config.end() || platform->second.empty()) {
+        suitsDeprecatedConfig = hasMyriad2() || hasMyriadX();
+    } else {
+        bool hasRequestedMyriad2 =
+                platform->second == VPU_CONFIG_VALUE(2450) && hasMyriad2();
+        bool hasRequestedMyriadX =
+                platform->second == VPU_CONFIG_VALUE(2480) && hasMyriadX();
+        suitsDeprecatedConfig = hasRequestedMyriad2 || hasRequestedMyriadX;
+    }
+    IE_SUPPRESS_DEPRECATED_END
+
+    return suitsConfig && suitsDeprecatedConfig;
+}
+
+static bool hasHDDL_R() {
+    return hasPlatform(ENV_HDDL_R);
+}
+
+/* this function assumes that the precision of a generated network is FP16 */
+std::shared_ptr<InferenceEngine::ICNNNetwork> createNetworkWithDesiredSize(std::size_t sizeInMB);
index d83e088..8180802 100644 (file)
@@ -14,14 +14,12 @@ SET (CMAKE_SKIP_RPATH OFF)
 file(GLOB
         TEST_SRC
         graph_tools/*.cpp
+        http_client/*.cpp
         inference_engine_tests/*.cpp
         inference_engine_tests/cpp_interfaces/*.cpp
         inference_engine_tests/normalization/*.cpp
-        mem_solver/*.cpp
         cnn_network/*.cpp
         builders/*.cpp
-        transformations/*.cpp
-        ie_class/*.cpp
         # TODO: apeskov: Please fix issue CVS
         # shape_infer/*.cpp
         shape_infer/built-in/*.cpp
@@ -76,7 +74,7 @@ source_group("include" FILES ${TEST_INCLUDE})
 
 # create target
 
-add_executable(${TARGET_NAME} ${TEST_SRC} ${TEST_INCLUDE} ${MKLDNN_TESTS} ${MKLDNN_TESTS_INCLUDE} ${DLAI_TESTS} transformations/sub_test.cpp transformations/tranformations_test.hpp)
+add_executable(${TARGET_NAME} ${TEST_SRC} ${TEST_INCLUDE} ${MKLDNN_TESTS} ${MKLDNN_TESTS_INCLUDE} ${DLIA_TESTS})
 set_ie_threading_interface_for(${TARGET_NAME})
 
 target_include_directories(${TARGET_NAME} PRIVATE
@@ -93,6 +91,10 @@ set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
 target_compile_options(${TARGET_NAME} PRIVATE $<$<CXX_COMPILER_ID:Clang>: -Wno-inconsistent-missing-override >)
 target_compile_options(${TARGET_NAME} PRIVATE $<$<CXX_COMPILER_ID:AppleClang>: -Wno-inconsistent-missing-override >)
 
+if (ENABLE_MYRIAD)
+    target_link_libraries(${TARGET_NAME} PRIVATE mvnc vpu_graph_transformer_test_static)
+endif ()
+
 target_link_libraries(${TARGET_NAME} PRIVATE
     gtest
     gflags
@@ -112,6 +114,10 @@ if (ENABLE_MKL_DNN)
             mkldnn)
 endif ()
 
+if (ENABLE_DLIA)
+    target_link_libraries(${TARGET_NAME} PRIVATE dliaPluginIOTransformations)
+endif ()
+
 add_test(NAME ${TARGET_NAME}
         COMMAND ${TARGET_NAME})
 
index f707c79..2c694c0 100644 (file)
@@ -190,12 +190,10 @@ public:
         }
     }
 
-    void compareICNNNetworks(const ICNNNetwork& newNetwork, const ICNNNetwork& oldNetwork) {
-        IE_SUPPRESS_DEPRECATED_START
-        CNNNetwork network((ICNNNetwork*)&newNetwork);
-        IE_SUPPRESS_DEPRECATED_END
+    void compareICNNNetworks(const ICNNNetwork::Ptr newNetwork, const ICNNNetwork& oldNetwork) {
+        CNNNetwork network(newNetwork);
 
-        if (newNetwork.layerCount() != oldNetwork.layerCount())
+        if (newNetwork->layerCount() != oldNetwork.layerCount())
             THROW_IE_EXCEPTION << "ICNNNetworks have different numbers of layers!";
         for (const auto& layer : network) {
             CNNLayerPtr oldLayer;
@@ -223,8 +221,8 @@ public:
 
         InputsDataMap newInput;
         OutputsDataMap newOutput;
-        newNetwork.getInputsInfo(newInput);
-        newNetwork.getOutputsInfo(newOutput);
+        newNetwork->getInputsInfo(newInput);
+        newNetwork->getOutputsInfo(newOutput);
         InputsDataMap oldInput;
         OutputsDataMap oldOutput;
         oldNetwork.getInputsInfo(oldInput);
@@ -724,7 +722,7 @@ TEST_F(NetworkBuilderTest, convertFromICNNNetworkToICNNNetwork) {
     std::shared_ptr<ICNNNetwork> network = Builder::convertToICNNNetwork(Builder::Network(net_reader.getNetwork()).build());
 
     try {
-        compareICNNNetworks(*network, net_reader.getNetwork());
+        compareICNNNetworks(network, net_reader.getNetwork());
     } catch (InferenceEngine::details::InferenceEngineException &ex) {
         FAIL() << ex.what();
     }
@@ -1148,7 +1146,7 @@ TEST_F(NetworkBuilderTest, CreateLSTMFromBuilder) {
     builder.addLayer({{lstm, 2}}, Builder::OutputLayer("output2"));
     const auto network = Builder::convertToICNNNetwork(builder.build());
     try {
-        compareICNNNetworks(*network, net_reader.getNetwork());
+        compareICNNNetworks(network, net_reader.getNetwork());
     } catch (InferenceEngine::details::InferenceEngineException &ex) {
         FAIL() << ex.what();
     }
@@ -1187,6 +1185,8 @@ TEST_F(NetworkBuilderTest, CheckPreProcessAlexNet) {
 }
 
 TEST_F(NetworkBuilderTest, ReshapeNetworkTest) {
+    Builder::ReshapeLayer("WA");
+
     std::string model = R"V0G0N(
 <net name="Reshape" version="2" batch="1">
     <layers>
@@ -1231,7 +1231,7 @@ TEST_F(NetworkBuilderTest, ReshapeNetworkTest) {
     network->getLayerByName("flatten", layer, nullptr);
     ASSERT_EQ(layer->outData[0]->getDims().size(), 2);
     try {
-        compareICNNNetworks(*network, net_reader.getNetwork());
+        compareICNNNetworks(network, net_reader.getNetwork());
     } catch (InferenceEngine::details::InferenceEngineException &ex) {
         FAIL() << ex.what();
     }
diff --git a/inference-engine/tests/unit/builders/transform_network_test.cpp b/inference-engine/tests/unit/builders/transform_network_test.cpp
deleted file mode 100644 (file)
index ebfce47..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <string.h>
-#include <transform/transform_network.hpp>
-#include <ie_builders.hpp>
-
-#include "builder_test.hpp"
-
-using namespace testing;
-using namespace InferenceEngine;
-
-class TransformNetworkTest: public BuilderTestCommon {};
-
-TEST_F(TransformNetworkTest, AddNewLayer) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    ASSERT_EQ(0, builder.size());
-    network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    ASSERT_EQ(1, builder.size());
-}
-
-TEST_F(TransformNetworkTest, RemoveLayer) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    ASSERT_EQ(0, builder.size());
-    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    ASSERT_EQ(1, builder.size());
-
-    network.removeLayer(layer);
-    ASSERT_EQ(0, builder.size());
-}
-
-TEST_F(TransformNetworkTest, GetIncorrectPort) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    ASSERT_THROW(layer.getInPort(), InferenceEngine::details::InferenceEngineException);
-    ASSERT_THROW(layer.getOutPort(1), InferenceEngine::details::InferenceEngineException);
-}
-
-
-TEST_F(TransformNetworkTest, GetCorrectPort) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    ASSERT_NO_THROW(layer.getOutPort());
-    ASSERT_NO_THROW(layer.getOutPort(0));
-}
-
-TEST_F(TransformNetworkTest, GetLayerById) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    ASSERT_NO_THROW(network.getLayer(layer.getId()));
-}
-
-TEST_F(TransformNetworkTest, GetLayerByName) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    ASSERT_NO_THROW(network.getLayer("in1"));
-}
-
-TEST_F(TransformNetworkTest, ConnectTwoLayers) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
-    ASSERT_EQ(2, builder.size());
-    ASSERT_EQ(0, builder.getConnections().size());
-    network.connect(input, relu);
-    ASSERT_EQ(1, builder.getConnections().size());
-}
-
-TEST_F(TransformNetworkTest, ConnectTwoPorts) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
-    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
-    ASSERT_EQ(2, builder.size());
-    ASSERT_EQ(0, builder.getConnections().size());
-    network.connect(inputPort, reluPort);
-    ASSERT_EQ(1, builder.getConnections().size());
-}
-
-TEST_F(TransformNetworkTest, DisconnectTwoLayers) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
-    ASSERT_EQ(2, builder.size());
-    ASSERT_EQ(0, builder.getConnections().size());
-    network.connect(input, relu);
-    ASSERT_EQ(1, builder.getConnections().size());
-    network.disconnect(input, relu);
-    ASSERT_EQ(0, builder.getConnections().size());
-}
-
-TEST_F(TransformNetworkTest, DisonnectTwoPorts) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
-    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
-    ASSERT_EQ(2, builder.size());
-    ASSERT_EQ(0, builder.getConnections().size());
-    network.connect(inputPort, reluPort);
-    ASSERT_EQ(1, builder.getConnections().size());
-    network.disconnect(inputPort, reluPort);
-    ASSERT_EQ(0, builder.getConnections().size());
-}
-
-TEST_F(TransformNetworkTest, RemoveLayerAndConnection) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
-    network.connect(input, relu);
-    ASSERT_EQ(1, builder.getConnections().size());
-    ASSERT_EQ(2, builder.size());
-    network.removeLayer(relu);
-    ASSERT_EQ(0, builder.getConnections().size());
-    ASSERT_EQ(1, builder.size());
-}
-
-TEST_F(TransformNetworkTest, GetInitializedConnection) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
-    network.connect(input, relu);
-    ASSERT_EQ(input.getOutPort(), relu.getInPort().getConnection().getSource());
-}
-
-TEST_F(TransformNetworkTest, GetIncorrectConnections) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
-    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
-    ASSERT_THROW(relu.getInPort().getConnection().getSource(), InferenceEngine::details::InferenceEngineException);
-    ASSERT_THROW(input.getOutPort().getConnection().getDestination(), InferenceEngine::details::InferenceEngineException);
-    ASSERT_NO_THROW(input.getOutPort().getConnection().getSource());
-    ASSERT_NO_THROW(relu.getInPort().getConnection().getDestination());
-}
-
-TEST_F(TransformNetworkTest, ConnectToSourcePortsFromConnection) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
-    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
-    ASSERT_EQ(2, builder.size());
-    ASSERT_EQ(0, builder.getConnections().size());
-    ASSERT_NO_THROW(inputPort.getConnection().setDestination(reluPort));
-    ASSERT_EQ(1, builder.getConnections().size());
-}
-
-TEST_F(TransformNetworkTest, ConnectWithTwoDestinations) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
-    Transform::Port reluPort1 = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
-    Transform::Port reluPort2 = network.addLayer(Builder::ReLULayer("relu2")).getInPort();
-    ASSERT_EQ(3, builder.size());
-    ASSERT_EQ(0, builder.getConnections().size());
-    ASSERT_NO_THROW(inputPort.getConnection().setDestination(reluPort1));
-    ASSERT_NO_THROW(inputPort.getConnection().addDestination(reluPort2));
-    ASSERT_THROW(inputPort.getConnection().addDestination(reluPort2), InferenceEngine::details::InferenceEngineException);
-    ASSERT_EQ(2, builder.getConnections().size());
-    ASSERT_THROW(inputPort.getConnection().setDestination(reluPort2), InferenceEngine::details::InferenceEngineException);
-    ASSERT_NO_THROW(inputPort.getConnection().setDestinations({reluPort2, reluPort1}));
-    ASSERT_EQ(2, builder.getConnections().size());
-}
-
-TEST_F(TransformNetworkTest, ConnectToDestinationPortsFromConnection) {
-    Builder::Network builder("test");
-    Transform::Network network(builder);
-    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
-    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
-    ASSERT_EQ(2, builder.size());
-    ASSERT_EQ(0, builder.getConnections().size());
-    reluPort.getConnection().setSource(inputPort);
-    ASSERT_EQ(1, builder.getConnections().size());
-}
\ No newline at end of file
index fc9b808..4803449 100644 (file)
@@ -10,7 +10,7 @@
 #include <xml_helper.hpp>
 #include <../shape_infer/built_in_shape_infer_general_test.hpp>
 #include <memory>
-#include <../include/ie_data.h>
+#include <ie_data.h>
 
 #include "layer_builder.h"
 #include "shapes.h"
index 4514225..30f109e 100644 (file)
@@ -214,13 +214,13 @@ public:
                 }
                 case ParametersValues::FLOAT_POSITIVE: {
                     for (int j = 0; j < magicNumber; ++j) {
-                        paramsValues.push_back(std::to_string(distFloatPositive(gen)));
+                        paramsValues.push_back(to_string_c_locale(distFloatPositive(gen)));
                     }
                     break;
                 }
                 case ParametersValues::FLOAT_NEGATIVE: {
                     for (int j = 0; j < magicNumber; ++j) {
-                        paramsValues.push_back(std::to_string(distFloatNegative(gen)));
+                        paramsValues.push_back(to_string_c_locale(distFloatNegative(gen)));
                     }
                     break;
                 }
index a020769..293f0c6 100644 (file)
@@ -26,6 +26,8 @@ struct Maps{
 
     std::map<std::string, std::pair<int, int>> mapOfUnequalShapes {
             // Layer name, Correct num of input, Correct num of output
+            { "Convolution", {3, 1}},
+            { "Deconvolution", {3, 1}},
             { "Crop", {2, 1}},
             { "DetectionOutput", {3, 1}},
             { "Interp", {2, 1}}
@@ -226,4 +228,4 @@ public:
     ~LayersWithNIO() override = default;
 };
 
-#endif // SHAPES_H
\ No newline at end of file
+#endif // SHAPES_H
index d3ce6e9..77e379d 100644 (file)
@@ -524,9 +524,7 @@ TEST_F(V2FormatParserTest, parsesNumberOfLayersCorrectly) {
     string content = MAKE_ALEXNET_FOR_MEAN_TESTS_V2();
 
     ASSERT_NO_FATAL_FAILURE(assertParseSucceed(content));
-    IE_SUPPRESS_DEPRECATED_START
-    CNNNetwork network(net.get());
-    IE_SUPPRESS_DEPRECATED_END
+    CNNNetwork network(net);
     ASSERT_EQ(network.layerCount(), LAYER_COUNT);
 }
 
index 95ec605..6f56466 100644 (file)
@@ -29,42 +29,10 @@ TEST_F(GNAConfigTest, reportAnErrorIfConfigNotFound) {
                {TargetDevice :: eCPU, {Precision::FP32}}});
 
     EXPECT_CALL(net, getPrecision()).WillRepeatedly(Return(Precision::FP32));
-    EXPECT_CALL(net, getTargetDevice()).WillRepeatedly(Return(TargetDevice::eGNA));
 
     ASSERT_ANY_THROW(c.find_configuration(net));
 }
 
-TEST_F(GNAConfigTest, canFindConfiguration) {
-
-    Config c ({{TargetDevice :: eGNA, {Precision::I16}},
-               {TargetDevice :: eCPU, {Precision::FP32}}});
-
-    EXPECT_CALL(net, getPrecision()).WillRepeatedly(Return(Precision::FP32));
-    EXPECT_CALL(net, getTargetDevice()).WillRepeatedly(Return(TargetDevice::eCPU));
-
-    auto match = c.find_configuration(net);
-
-    EXPECT_EQ(match.device, TargetDevice::eCPU);
-    auto matchNetPrec = std::find(match.networkPrec.begin(), match.networkPrec.end(), Precision::FP32) != match.networkPrec.end();
-    EXPECT_EQ(matchNetPrec, true);
-}
-
-TEST_F(GNAConfigTest, canPassTroughNetworkAfterFindConfiguration) {
-
-    Config c ({{TargetDevice :: eGNA, {Precision::I16}},
-               {TargetDevice :: eCPU, {Precision::FP32}}});
-
-    EXPECT_CALL(net, getPrecision()).WillRepeatedly(Return(Precision::FP32));
-    EXPECT_CALL(net, getTargetDevice()).WillRepeatedly(Return(TargetDevice::eCPU));
-
-    auto match = c.find_configuration(net);
-
-    auto net2 = match.convert(net);
-
-    EXPECT_EQ(net2->getTargetDevice(), TargetDevice::eCPU);
-    EXPECT_EQ(net2->getPrecision(), Precision::FP32);
-}
-
 TEST_F(GNAConfigTest, canNotMatchWithDefaultDevice) {
 
     Config c ({{TargetDevice :: eGNA, {Precision::I16}},
@@ -73,7 +41,6 @@ TEST_F(GNAConfigTest, canNotMatchWithDefaultDevice) {
     c.setDefaultDevice(TargetDevice::eGNA);
 
     EXPECT_CALL(net, getPrecision()).WillRepeatedly(Return(Precision::FP32));
-    EXPECT_CALL(net, getTargetDevice()).WillRepeatedly(Return(TargetDevice::eDefault));
 
     EXPECT_ANY_THROW(c.find_configuration(net).convert(net));
 }
@@ -86,12 +53,6 @@ TEST_F(GNAConfigTest, canMatchWithDefaultDevice) {
     c.setDefaultDevice(TargetDevice::eGNA);
 
     EXPECT_CALL(net, getPrecision()).WillRepeatedly(Return(Precision::I16));
-    EXPECT_CALL(net, getTargetDevice()).WillRepeatedly(Return(TargetDevice::eDefault));
-
-    auto net2 = c.find_configuration(net).convert(net);
-
-    EXPECT_EQ(net2->getTargetDevice(), TargetDevice::eDefault);
-    EXPECT_EQ(net2->getPrecision(), Precision::I16);
 }
 
 TEST_F(GNAConfigTest, canMatchWith1AsyncThread) {
index bb01872..4a7caab 100644 (file)
@@ -397,3 +397,214 @@ TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatThroughScaleShiftPropagateFor
             .called_with_input(input_data).equals_to(expected_result);
 }
 
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith2InputsNotAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(20);
+    assert_that().onInferModel(SplitToConcatWith2InputsNotAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith2By50InputsNotAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(100);
+    assert_that().onInferModel(SplitToConcatWith2By50InputsNotAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith2By50InputsNotAlignedNoFCWithInCopyWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(100);
+    assert_that().onInferModel(SplitToConcatWith2By50InputsNotAlignedNoFCWithInCopyWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith3InputsNotAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(30);
+    assert_that().onInferModel(SplitToConcatWith3InputsNotAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith4InputsNotAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(40);
+    assert_that().onInferModel(SplitToConcatWith4InputsNotAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith4InputsNotAlignedNoFCWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(40);
+    assert_that().onInferModel(SplitToConcatWith4InputsNotAlignedNoFCWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith10InputsNotAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(100);
+    assert_that().onInferModel(SplitToConcatWith10InputsNotAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith10InputsNotAlignedNoFCWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(100);
+    assert_that().onInferModel(SplitToConcatWith10InputsNotAlignedNoFCWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith10By1InputsNotAlignedNoFCWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(10);
+    assert_that().onInferModel(SplitToConcatWith10By1InputsNotAlignedNoFCWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith2InputsNotAlignedWithFC) {
+    std::vector<float> input_data = getRangeInput(20);
+    std::vector<float> expected_result(10, 211.0f);
+    assert_that().onInferModel(SplitToConcatWith2InputsNotAlignedWithFC())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith3InputsNotAlignedWithFC) {
+    std::vector<float> input_data = getRangeInput(30);
+    std::vector<float> expected_result(10, 466.0f);
+    assert_that().onInferModel(SplitToConcatWith3InputsNotAlignedWithFC())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith3By512InputsWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(1536);
+    assert_that().onInferModel(SplitToConcatWith3By512InputsWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith10InputsNotAlignedWithFC) {
+    std::vector<float> input_data = getRangeInput(100);
+    std::vector<float> expected_result(10, 5051.0f);
+    assert_that().onInferModel(SplitToConcatWith10InputsNotAlignedWithFC())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith2InputsAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(64);
+    assert_that().onInferModel(SplitToConcatWith2InputsAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith2By64InputsAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(128);
+    assert_that().onInferModel(SplitToConcatWith2By64InputsAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith2By64InputsAlignedNoFCWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(128);
+    assert_that().onInferModel(SplitToConcatWith2By64InputsAlignedNoFCWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith2InputsAlignedNoFCWithInCopyWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(64);
+    assert_that().onInferModel(SplitToConcatWith2InputsAlignedNoFCWithInCopyWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith3InputsAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(96);
+    assert_that().onInferModel(SplitToConcatWith3InputsAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith3InputsAlignedNoFCWithInCopyWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(96);
+    assert_that().onInferModel(SplitToConcatWith3InputsAlignedNoFCWithInCopyWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SplitToConcatWith10InputsAlignedNoFC) {
+    std::vector<float> input_data = getRangeInput(320);
+    assert_that().onInferModel(SplitToConcatWith10InputsAlignedNoFC())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith10InputsAlignedNoFCWithInCopyWithOutCopy) {
+    std::vector<float> input_data = getRangeInput(320);
+    assert_that().onInferModel(SplitToConcatWith10InputsAlignedNoFCWithInCopyWithOutCopy())
+            .inNotCompactMode().gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(input_data);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith2InputsAlignedWithFC) {
+    std::vector<float> input_data = getRangeInput(64);
+    std::vector<float> expected_result(32, 2081.0f);
+    assert_that().onInferModel(SplitToConcatWith2InputsAlignedWithFC())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith2InputsAlignedWithFCWithInCopy) {
+    std::vector<float> input_data = getRangeInput(64);
+    std::vector<float> expected_result(32, 2081.0f);
+    assert_that().onInferModel(SplitToConcatWith2InputsAlignedWithFCWithInCopy())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith3InputsAlignedWithFC) {
+    std::vector<float> input_data = getRangeInput(96);
+    std::vector<float> expected_result(32, 4657.0f);
+    assert_that().onInferModel(SplitToConcatWith3InputsAlignedWithFC())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith3InputsAlignedWithFCWithInCopy) {
+    std::vector<float> input_data = getRangeInput(96);
+    std::vector<float> expected_result(32, 4657.0f);
+    assert_that().onInferModel(SplitToConcatWith3InputsAlignedWithFCWithInCopy())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith10InputsAlignedWithFC) {
+    std::vector<float> input_data = getRangeInput(320);
+    std::vector<float> expected_result(32, 51361.0f);
+    assert_that().onInferModel(SplitToConcatWith10InputsAlignedWithFC())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitToConcatWith10InputsAlignedWithFCWithInCopy) {
+    std::vector<float> input_data = getRangeInput(320);
+    std::vector<float> expected_result(32, 51361.0f);
+    assert_that().onInferModel(SplitToConcatWith10InputsAlignedWithFCWithInCopy())
+            .inNotCompactMode().withWeigthsPattern({1}).gna().propagate_forward().onCPU()
+            .called_with_input(input_data).equals_to(expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, ReshapeConvolutionLessThan48Filters) {
+    std::vector<float> input_data(800, 1.f);
+    std::vector<float> expected_result(1600, 8.f);
+
+    assert_that().onInferModel(ReshapeConvolutionLessThan48Filters())
+            .inNotCompactMode()
+            .withWeigthsPattern({1})
+            .gna()
+            .propagate_forward()
+            .onCPU()
+            .called_with_input(input_data)
+            .equals_to(expected_result);
+}
index 3ff8aa5..a0321fd 100644 (file)
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#ifndef _WIN32
+#if !defined _WIN32 || !defined(__INTEL_COMPILER)
 #include <mm_malloc.h>
 #endif
 #include "gna_api_wrapper.hpp"
index c64d068..1bb166d 100644 (file)
@@ -30,7 +30,7 @@ class GNAAOTTests : public GNATest {
     }
 };
 
-TEST_F(GNAAOTTests, AffineWith2AffineOutputs_canbe_export_imported) {
+TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_export_imported) {
 
     const std::string X = registerFileForRemove("unit_tests.bin");
 
@@ -44,7 +44,7 @@ TEST_F(GNAAOTTests, AffineWith2AffineOutputs_canbe_export_imported) {
 }
 
 
-TEST_F(GNAAOTTests, AffineWith2AffineOutputs_canbe_imported_verify_structure) {
+TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_imported_verify_structure) {
 
     auto & nnet_type = storage<intel_nnet_type_t>();
 
index e9eeba7..c912935 100644 (file)
@@ -156,7 +156,6 @@ void GNAPropagateMatcher :: match() {
             }
             net_reader.SetWeights(weights);
 
-            net_reader.getNetwork().setTargetDevice(_env.target_device);
 
             if (_env.cb) {
                 auto network = net_reader.getNetwork();
@@ -256,7 +255,7 @@ void GNAPropagateMatcher :: match() {
             std::unique_ptr<NNetComponentMatcher> combined(new NNetComponentMatcher());
 
             for (auto & matchWhat : _env.whatToMatch) {
-                switch(matchWhat) {
+                switch(matchWhat.type) {
                     case GnaPluginTestEnvironment::matchPrecision :
                         combined->add(new NNetPrecisionMatcher(_env.nnet_precision, INTEL_AFFINE));
                         break;
@@ -265,13 +264,13 @@ void GNAPropagateMatcher :: match() {
                             .WillOnce(Return(GNA_NOERROR));
                         break;
                     case GnaPluginTestEnvironment::matchPwlInserted :
-                        combined->add(new PWLMatcher(_env.matchInserted, _env.matchQuantity, _env.pwlsToMatchWith));
+                        combined->add(new PWLMatcher(_env.matchInserted, matchWhat.matchQuantity, _env.pwlsToMatchWith));
                         break;
                     case GnaPluginTestEnvironment::matchConvInserted:
-                        combined->add(new ConvoluionLayerMatcher(_env.matchInserted, _env.matchQuantity));
+                        combined->add(new ConvoluionLayerMatcher(_env.matchInserted, matchWhat.matchQuantity));
                         break;
                     case GnaPluginTestEnvironment::matchMaxPoolingInserted:
-                        combined->add(new PoolingLayerMatcher(_env.matchInserted, _env.matchQuantity, true));
+                        combined->add(new PoolingLayerMatcher(_env.matchInserted, matchWhat.matchQuantity, true));
                         break;
                     case GnaPluginTestEnvironment::matchPwlQuantizeMetrics :
                         combined->add(new PWLQuantizationMetricsMatcher(_env.type,
@@ -279,10 +278,10 @@ void GNAPropagateMatcher :: match() {
                                                                         _env.quantization_segments_threshold));
                         break;
                     case GnaPluginTestEnvironment::matchCopyInserted :
-                        combined->add(new CopyLayerMatcher(_env.matchInserted, _env.matchQuantity));
+                        combined->add(new CopyLayerMatcher(_env.matchInserted, matchWhat.matchQuantity));
                         break;
                     case GnaPluginTestEnvironment::matchDiagonalInserted :
-                        combined->add(new DiagLayerMatcher(_env.matchInserted, _env.matchQuantity));
+                        combined->add(new DiagLayerMatcher(_env.matchInserted, matchWhat.matchQuantity));
                         break;
                     case GnaPluginTestEnvironment::saveArgs :
                         EXPECT_CALL(mockApi, GNAPropagateForward(_, _, _, _, _, _))
@@ -405,8 +404,6 @@ void GNAPluginAOTMatcher :: match() {
     TBlob<float> output({ Precision::FP32, {1, 10}, Layout::NC });
     output.allocate();
 
-    net_reader.getNetwork().setTargetDevice(TargetDevice::eGNA);
-
     if (_env.cb) {
         auto network = net_reader.getNetwork();
         _env.cb(network);
@@ -439,8 +436,6 @@ void GNADumpXNNMatcher::load(GNAPlugin & plugin) {
         GNATest::fillWeights(weights);
         net_reader.SetWeights(weights);
 
-        net_reader.getNetwork().setTargetDevice(TargetDevice::eGNA);
-
         if (_env.cb) {
             auto network = net_reader.getNetwork();
             _env.cb(network);
@@ -516,8 +511,6 @@ void GNAQueryStateMatcher :: match() {
         GNATest::fillWeights(weights);
         net_reader.SetWeights(weights);
 
-        net_reader.getNetwork().setTargetDevice(TargetDevice::eGNA);
-
         if (_env.cb) {
             auto network = net_reader.getNetwork();
             _env.cb(network);
index 3399937..4f0329a 100644 (file)
@@ -55,14 +55,19 @@ class GnaPluginTestEnvironment {
         matchAffineWeights,
         saveAffineWeights
     };
-    std::vector<MatchWhat> whatToMatch;
     enum {
         kUnset = -1,
         kAnyNotNull= -2
     };
+    struct  MatcherData {
+        MatchWhat type = matchNone;
+        int matchQuantity = kUnset;
+    };
+    std::vector<MatcherData> whatToMatch;
+
     InferenceEngine::TargetDevice target_device =
                             InferenceEngine::TargetDevice::eGNA;
-    int matchQuantity = kUnset;
+
     int numberOfStates = kUnset;
     bool matchInserted = true;
     NnetPrecision nnet_precision;
@@ -103,7 +108,7 @@ class GNATestConfigurability : public GNATestBase{
  protected:
     bool needNextMatcher = true;
     GnaPluginTestEnvironment _env;
-    GnaPluginTestEnvironment::MatchWhat & getMatcher() {
+    GnaPluginTestEnvironment::MatcherData& getMatcher() {
         if (needNextMatcher) {
             needNextMatcher = false;
             _env.whatToMatch.push_back({});
@@ -197,7 +202,7 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
      */
     GNAPropagateMatcher & filledWith(int16_t valueToFill) {
         _env.fillValue = valueToFill;
-        getMatcher() = GnaPluginTestEnvironment::fillOutputValues;
+        getMatcher().type = GnaPluginTestEnvironment::fillOutputValues;
         return *this;
     }
 
@@ -238,14 +243,15 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
 
 
     GNAPropagateMatcher & once() {
-        IE_ASSERT(_env.matchPwlInserted && _env.pwlsToMatchWith.empty());
-        _env.matchQuantity = 1;
-        return *this;
+        return times(1);
     }
 
     GNAPropagateMatcher & twice() {
-        IE_ASSERT(_env.matchPwlInserted && _env.pwlsToMatchWith.empty());
-        _env.matchQuantity = 2;
+        return times(2);
+    }
+
+    GNAPropagateMatcher & times(int n) {
+        getMatcher().matchQuantity = n;
         return *this;
     }
 
@@ -255,24 +261,24 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
 
     GNAPropagateMatcher & exact_nnet_structure(intel_nnet_type_t * pNet) {
 
-        getMatcher() = GnaPluginTestEnvironment::exactNNetStructure;
+        getMatcher().type = GnaPluginTestEnvironment::exactNNetStructure;
         original_nnet = pNet;
         return *this;
     }
 
     GNAPropagateMatcher & pwl_inserted_into_nnet() {
-        getMatcher() = GnaPluginTestEnvironment::matchPwlInserted;
+        getMatcher().type = GnaPluginTestEnvironment::matchPwlInserted;
         return *this;
     }
 
     GNAPropagateMatcher & pwls_inserted_into_nnet(const std::vector<DnnActivationType> &pwls) {
-        getMatcher() = GnaPluginTestEnvironment::matchPwlInserted;
+        getMatcher().type = GnaPluginTestEnvironment::matchPwlInserted;
         _env.pwlsToMatchWith = pwls;
         return *this;
     }
 
     GNAPropagateMatcher & max_pooling_inserted_into_nnet() {
-        getMatcher() = GnaPluginTestEnvironment::matchMaxPoolingInserted;
+        getMatcher().type = GnaPluginTestEnvironment::matchMaxPoolingInserted;
         return *this;
     }
 
@@ -281,37 +287,37 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
     }
 
     GNAPropagateMatcher & convolution_inserted_into_nnet() {
-        getMatcher() = GnaPluginTestEnvironment::matchConvInserted;
+        getMatcher().type = GnaPluginTestEnvironment::matchConvInserted;
         return *this;
     }
 
 
     GNAPropagateMatcher & pwl_quantization_activation(uint32_t activation_type) {
-        getMatcher() = GnaPluginTestEnvironment::matchPwlQuantizeMetrics;
+        getMatcher().type = GnaPluginTestEnvironment::matchPwlQuantizeMetrics;
         _env.type = activation_type;
         return *this;
     }
 
     GNAPropagateMatcher & pwl_quantization_precision_threshold(float threshold) {
-        getMatcher() = GnaPluginTestEnvironment::matchPwlQuantizeMetrics;
+        getMatcher().type = GnaPluginTestEnvironment::matchPwlQuantizeMetrics;
         _env.quantization_presicion_threshold = threshold;
         return *this;
     }
 
     GNAPropagateMatcher & pwl_quantization_segments_threshold(uint16_t threshold) {
-        getMatcher() = GnaPluginTestEnvironment::matchPwlQuantizeMetrics;
+        getMatcher().type = GnaPluginTestEnvironment::matchPwlQuantizeMetrics;
         _env.quantization_segments_threshold = threshold;
         return *this;
     }
 
     GNAPropagateMatcher & diagonal_inserted_into_nnet() {
-        getMatcher() = GnaPluginTestEnvironment::matchDiagonalInserted;
+        getMatcher().type = GnaPluginTestEnvironment::matchDiagonalInserted;
         return *this;
     }
 
     GNAPropagateMatcher &preprocessed_input_data(std::vector<float> input_init, std::vector<int16_t> input_processed,
                                                  InferenceEngine::Precision inputPrecision) {
-        getMatcher() = GnaPluginTestEnvironment::matchInputData;
+        getMatcher().type = GnaPluginTestEnvironment::matchInputData;
         _env.input_processed = std::move(input_processed);
         _env.input_init["placeholder"] = std::move(input_init);
         _env.input_precision = inputPrecision;
@@ -319,60 +325,60 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
     }
 
     GNAPropagateMatcher & copy_inserted_into_nnet() {
-        getMatcher() = GnaPluginTestEnvironment::matchCopyInserted;
+        getMatcher().type = GnaPluginTestEnvironment::matchCopyInserted;
         return *this;
     }
 
 
     GNAPropagateMatcher & affine_weights_transpozed(std::pair<int, int> &&transpozedArgs) {
-        getMatcher() = GnaPluginTestEnvironment::saveAffineWeights;
+        getMatcher().type = GnaPluginTestEnvironment::saveAffineWeights;
         _env.transposedArgsForSaving = std::move(transpozedArgs);
 
         return *this;
     }
 
     GNAPropagateMatcher & affine_weights() {
-        getMatcher() = GnaPluginTestEnvironment::saveAffineWeights;
+        getMatcher().type = GnaPluginTestEnvironment::saveAffineWeights;
         return *this;
     }
 
     GNAPropagateMatcher & affine_weights_eq(std::vector<uint16_t> & sourceWeights) {
-        getMatcher() = GnaPluginTestEnvironment::matchAffineWeights;
+        getMatcher().type = GnaPluginTestEnvironment::matchAffineWeights;
         _env.transposedData = &sourceWeights;
         return *this;
     }
 
 
     GNAPropagateMatcher & affine_weights_transposed(std::vector<uint16_t> & sourceWeights, std::pair<int,int> transposeData) {
-        getMatcher() = GnaPluginTestEnvironment::matchAffineWeightsTranspose;
+        getMatcher().type = GnaPluginTestEnvironment::matchAffineWeightsTranspose;
         _env.transposeArgs = transposeData;
         _env.transposedData = &sourceWeights;
         return *this;
     }
 
     GNAPropagateMatcher & nnet_input_precision(const InferenceEngine::Precision &precision) {
-        getMatcher() = GnaPluginTestEnvironment::matchPrecision;
+        getMatcher().type = GnaPluginTestEnvironment::matchPrecision;
         _env.nnet_precision.input_precision = precision;
         return *this;
     }
     GNAPropagateMatcher & nnet_ouput_precision(const InferenceEngine::Precision &precision) {
-        getMatcher() = GnaPluginTestEnvironment::matchPrecision;
+        getMatcher().type = GnaPluginTestEnvironment::matchPrecision;
         _env.nnet_precision.output_precision = precision;
         return *this;
     }
     GNAPropagateMatcher & nnet_weights_precision(const InferenceEngine::Precision &precision) {
-        getMatcher() = GnaPluginTestEnvironment::matchPrecision;
+        getMatcher().type = GnaPluginTestEnvironment::matchPrecision;
         _env.nnet_precision.weights_precision = precision;
         return *this;
     }
     GNAPropagateMatcher & nnet_biases_precision(const InferenceEngine::Precision &precision) {
-        getMatcher() = GnaPluginTestEnvironment::matchPrecision;
+        getMatcher().type = GnaPluginTestEnvironment::matchPrecision;
         _env.nnet_precision.biases_precision = precision;
         return *this;
     }
 
     GNAPropagateMatcher & proc_type(uint32_t proc_type) {
-        getMatcher() = GnaPluginTestEnvironment::matchProcType;
+        getMatcher().type = GnaPluginTestEnvironment::matchProcType;
         _env.proc_type = proc_type;
         return * this;
     }
@@ -392,6 +398,7 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
         _env.target_device = InferenceEngine::TargetDevice::eCPU;
         return *this;
     }
+
  protected:
     void match();
     intel_nnet_type_t * original_nnet = nullptr;
@@ -528,7 +535,7 @@ class GNATest : public ::testing::Test, public GNATestConfigurability<GNATest>
         return *this;
     }
     GNATest & save_args() {
-        getMatcher() = GnaPluginTestEnvironment::saveArgs;
+        getMatcher().type = GnaPluginTestEnvironment::saveArgs;
         return *this;
     }
     GNATest & save() {
@@ -635,4 +642,15 @@ class GNATest : public ::testing::Test, public GNATestConfigurability<GNATest>
             }
         }
     }
+
+    protected:
+    std::vector<float > getRangeInput(std::size_t max) {
+        std::vector<float> result(max);
+        float value = 1.0f;
+        for(std::size_t i = 0; i < result.size(); i++) {
+            result[i] = value;
+            value++;
+        }
+        return result;
+    }
 };
index 207c782..5708d27 100644 (file)
@@ -272,10 +272,10 @@ TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
         .gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
 }
 
-TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInDiagonalInsertion) {
+TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) {
     assert_that().onInferModel(eltwiseWithMemoryAndActivationInputModel())
         .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
-        .gna().propagate_forward().called_with().diagonal_inserted_into_nnet().once();
+        .gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
 }
 
 TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
@@ -362,7 +362,9 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiago
     // extra identity inserted for affine
     assert_that().onInferModel(AffineWithReluSigmoid())
         .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
-        .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActRelu, kActSigmoid});
+        .gna().propagate_forward().called_with()
+         // 1 diag for second activation, 1 for eltwise
+        .pwls_inserted_into_nnet({kActRelu, kActSigmoid}).diagonal_inserted_into_nnet().times(3);
 }
 
 // TODO: build a regression test on top of it using real quantisation accuracy checking
@@ -411,3 +413,9 @@ TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) {
         .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
         .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}).And().diagonal_inserted_into_nnet();
 }
+
+TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) {
+    assert_that().onInferModel(ConcatWithDiffScaleFactor())
+            .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+            .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity});
+}
index b39813d..2c17865 100644 (file)
@@ -9,12 +9,14 @@
 
 class DiagLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
     bool matchInserted;
-    int matchQuantity;
+    int  matchQuantity;
+    mutable int  actualQuantity;
 public:
     DiagLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
     bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
+        actualQuantity = 0;
         for(int i = 0; i < foo->nLayers; i++) {
             if (foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL) continue;
             // diagonal layer has to have 1 for weights and 0 for biases
@@ -45,13 +47,25 @@ public:
 
             // if all weights are zero, or zero value doesn't look like padding
             if (!bWeightsOK && beforePadding == -1) continue;
-
-            return matchInserted;
+            actualQuantity ++;
+        }
+        // means any quantity > 0
+        if (matchQuantity == -1) {
+            if (actualQuantity > 0)
+                return matchInserted;
+            else
+                return !matchInserted;
         }
-        return !matchInserted;
+        if (actualQuantity == matchQuantity)
+            return matchInserted;
+        else
+            return !matchInserted;
+
     };
     void DescribeTo(::std::ostream *os) const override {
-        *os << "should "<< (matchInserted ? "" : "not ") << "have Identity Diagonal Primitive primitive as part of nnet structure";
+        *os << "should "<< (matchInserted ? "" : "not ") << "have "
+            << (matchQuantity == -1 ? "any" : std::to_string(matchQuantity))
+            << " Identity Diagonal Primitive primitive as part of nnet structure, but was " << actualQuantity;
     }
 };
 
index 4ede25f..18e8554 100644 (file)
@@ -3509,7 +3509,7 @@ std::string AffineWithReluSigmoid() {
         </layer>
 
         <layer name="Eltwise_4" type="Eltwise" id="4" precision="FP32">
-                       <data operation="sum" />
+                       <data operation="mul" />
                        <input>
                                <port id="0">
                                        <dim>1</dim>
@@ -3777,6 +3777,8 @@ std::string LSTMCellOnlyModel() {
     )V0G0N";
 };
 
+
+
 std::string TIModelWithLSTMCell1() {
     return R"V0G0N(
 <?xml version="1.0" ?>
@@ -5764,6 +5766,2988 @@ std::string SplitToConcatThroughScaleShift() {
     )V0G0N";
     }
 
+std::string ConcatWithDiffScaleFactor() {
+        return R"V0G0N(
+<net Name="concatinationWithDiffScaleFactor" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input1" type="input" id="0" precision="FP32">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>20</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="Split" precision="FP32" type="Split">
+            <data axis="1" />
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>20</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="2">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="identity_activation" id="2" type="Activation" precision="FP32">
+            <data type="sigmoid" />
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="tanh_activation" id="3" type="Activation" precision="FP32">
+            <data type="tanh" />
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="4" name="concat" precision="FP32" type="Concat">
+            <input>
+                <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+            </input>
+            <output>
+                <port id="2">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </output>
+               </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="0" />
+        <edge from-layer="1" from-port="2" to-layer="3" to-port="0" />
+        <edge from-layer="2" from-port="1" to-layer="4" to-port="0" />
+        <edge from-layer="3" from-port="1" to-layer="4" to-port="1" />
+    </edges>
+</net>
+)V0G0N";
+    }
+
+    std::string SplitToConcatWith2InputsNotAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2By50InputsNotAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2By50InputsNotAlignedNoFCWithInCopyWithOutCopy  () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="input_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>100</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>100</dim>
+                    </port>
+                </output>
+        </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>50</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>100</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>100</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+
+        <edge from-layer="4" from-port="1" to-layer="1" to-port="0"/>
 
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2By64InputsAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2By64InputsAlignedNoFCWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>128</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>128</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2InputsAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+    </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2InputsAlignedNoFCWithInCopyWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="input_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>64</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>64</dim>
+                    </port>
+                </output>
+        </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>64</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>64</dim>
+                    </port>
+                </output>
+        </layer>
+    </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+
+        <edge from-layer="4" from-port="1" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2InputsNotAlignedWithFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="10"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>20</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="840"/>
+                               <biases offset="800" size="40"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+    <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2InputsAlignedWithFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="32"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="8324"/>
+                               <biases offset="8196" size="128"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith2InputsAlignedWithFCWithInCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_2_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="input_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>64</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>64</dim>
+                    </port>
+                </output>
+        </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="32"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>64</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="8324"/>
+                               <biases offset="8196" size="128"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+
+        <edge from-layer="4" from-port="1" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+std::string SplitToConcatWith3InputsNotAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_3_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>30</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>30</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>30</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+
+       </edges>
+</net>
+        )V0G0N";
+}
+
+std::string SplitToConcatWith3InputsNotAlignedWithFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_3_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>30</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>30</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>30</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="10"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>30</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="1240"/>
+                               <biases offset="1200" size="40"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith3InputsAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_3_inputs_align" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith3InputsAlignedNoFCWithInCopyWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_3_inputs_align" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="input_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>96</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>96</dim>
+                    </port>
+                </output>
+        </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>96</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>96</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+
+        <edge from-layer="4" from-port="1" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+
+       </edges>
+</net>
+        )V0G0N";
+}
+
+    std::string SplitToConcatWith3InputsAlignedWithFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_3_inputs_align" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="10"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="12416"/>
+                               <biases offset="12288" size="128"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith3InputsAlignedWithFCWithInCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_3_inputs_align" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="input_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>96</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>96</dim>
+                    </port>
+                </output>
+        </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="10"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>96</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="12416"/>
+                               <biases offset="12288" size="128"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+
+        <edge from-layer="4" from-port="1" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith4InputsNotAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_4_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>40</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>40</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>40</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+        <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith4InputsNotAlignedNoFCWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_4_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>40</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>40</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>40</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>40</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>40</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+        <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+
+        <edge from-layer="2" from-port="5" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10InputsNotAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10InputsNotAlignedNoFCWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>100</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>100</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+
+        <edge from-layer="2" from-port="11" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10By1InputsNotAlignedNoFCWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+
+        <edge from-layer="2" from-port="11" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10InputsAlignedNoFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10InputsAlignedNoFCWithInCopyWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="input_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>320</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>320</dim>
+                    </port>
+                </output>
+        </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>320</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>320</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+
+        <edge from-layer="4" from-port="1" to-layer="1" to-port="0"/>
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+
+        <edge from-layer="2" from-port="11" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10InputsNotAlignedWithFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="10"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="4040"/>
+                               <biases offset="4000" size="40"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+
+        <edge from-layer="2" from-port="11" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10InputsAlignedWithFC () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+       <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="32"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="41088"/>
+                               <biases offset="40960" size="128"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+
+        <edge from-layer="2" from-port="11" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith10InputsAlignedWithFCWithInCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_10_inputs" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="input_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>320</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>320</dim>
+                    </port>
+                </output>
+        </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="5">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="6">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="7">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="8">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="9">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="10">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="11">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </output>
+               </layer>
+       <layer id="3" name="fc" precision="FP32" type="FullyConnected">
+                       <data out-size="32"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>320</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="41088"/>
+                               <biases offset="40960" size="128"/>
+                       </blobs>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+
+        <edge from-layer="4" from-port="1" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+               <edge from-layer="1" from-port="4" to-layer="2" to-port="4"/>
+               <edge from-layer="1" from-port="5" to-layer="2" to-port="5"/>
+               <edge from-layer="1" from-port="6" to-layer="2" to-port="6"/>
+               <edge from-layer="1" from-port="7" to-layer="2" to-port="7"/>
+               <edge from-layer="1" from-port="8" to-layer="2" to-port="8"/>
+               <edge from-layer="1" from-port="9" to-layer="2" to-port="9"/>
+               <edge from-layer="1" from-port="10" to-layer="2" to-port="10"/>
+
+        <edge from-layer="2" from-port="11" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string SplitToConcatWith3By512InputsWithOutCopy () {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model_split_to_concat_with_3_inputs_align" version="2">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1536</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="split" precision="FP32" type="Split">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1536</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>512</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>512</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>512</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="concat" precision="FP32" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>512</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>512</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>512</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="4">
+                                       <dim>1</dim>
+                                       <dim>1536</dim>
+                               </port>
+                       </output>
+               </layer>
+        <layer name="output_copy" id="3" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>1536</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>1536</dim>
+                    </port>
+                </output>
+        </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="1" from-port="2" to-layer="2" to-port="2"/>
+               <edge from-layer="1" from-port="3" to-layer="2" to-port="3"/>
+
+        <edge from-layer="2" from-port="4" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+        )V0G0N";
+    }
+
+    std::string ReshapeConvolutionLessThan48Filters() {
+        return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="frozen_model" version="4">
+       <layers>
+               <layer id="0" name="input" precision="FP32" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>800</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="conv1d_1/convolution/ExpandDims" precision="FP32" type="Reshape">
+                       <data dim="1,4,1,200"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>800</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>4</dim>
+                                       <dim>1</dim>
+                                       <dim>200</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="conv1d_1/convolution/Conv1D" precision="FP32" type="Convolution">
+                       <data auto_pad="valid" dilations="1,1" group="1" kernel="1,2" output="16" pads_begin="0,0" pads_end="0,0" strides="1,2"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>4</dim>
+                                       <dim>1</dim>
+                                       <dim>200</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <weights offset="0" size="512"/>
+                       </blobs>
+               </layer>
+               <layer id="3" name="conv1d_1/convolution/RevertDims" precision="FP32" type="Reshape">
+                       <data dim="1,1600"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                                       <dim>1</dim>
+                                       <dim>100</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>1600</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer name="output_copy" id="4" type="Copy" precision="FP32">
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>1600</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>1600</dim>
+                    </port>
+                </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
+               <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+               <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
+       </edges>
+</net>
+    )V0G0N";
+    }
 
 }  // namespace GNATestIRs
index 5449607..8383419 100644 (file)
@@ -67,4 +67,39 @@ std::string InputSplitConcatReshapeModelUnaligned();
 std::string LSTMCellOnlyModelUnaligned();
 std::string SplitToConcatThroughScaleShift();
 std::string PowerWithScaleFactor1();
+std::string ConcatWithDiffScaleFactor();
+
+std::string SplitToConcatWith2InputsNotAlignedNoFC();
+std::string SplitToConcatWith2InputsAlignedNoFC();
+std::string SplitToConcatWith2InputsAlignedNoFCWithInCopyWithOutCopy();
+std::string SplitToConcatWith2InputsNotAlignedWithFC();
+std::string SplitToConcatWith2InputsAlignedWithFC();
+std::string SplitToConcatWith2InputsAlignedWithFCWithInCopy();
+
+std::string SplitToConcatWith3InputsNotAlignedNoFC();
+std::string SplitToConcatWith3InputsAlignedNoFC();
+std::string SplitToConcatWith3InputsAlignedNoFCWithInCopyWithOutCopy();
+std::string SplitToConcatWith3InputsNotAlignedWithFC();
+std::string SplitToConcatWith3InputsAlignedWithFC();
+std::string SplitToConcatWith3InputsAlignedWithFCWithInCopy();
+
+std::string SplitToConcatWith4InputsNotAlignedNoFC();
+std::string SplitToConcatWith4InputsNotAlignedNoFCWithOutCopy();
+
+std::string SplitToConcatWith10InputsNotAlignedNoFC();
+std::string SplitToConcatWith10InputsNotAlignedNoFCWithOutCopy();
+std::string SplitToConcatWith10InputsAlignedNoFC();
+std::string SplitToConcatWith10InputsAlignedNoFCWithInCopyWithOutCopy();
+std::string SplitToConcatWith10InputsNotAlignedWithFC();
+std::string SplitToConcatWith10InputsAlignedWithFC();
+std::string SplitToConcatWith10InputsAlignedWithFCWithInCopy();
+
+std::string SplitToConcatWith10By1InputsNotAlignedNoFCWithOutCopy();
+std::string SplitToConcatWith2By50InputsNotAlignedNoFC();
+std::string SplitToConcatWith2By50InputsNotAlignedNoFCWithInCopyWithOutCopy();
+std::string SplitToConcatWith2By64InputsAlignedNoFC();
+std::string SplitToConcatWith2By64InputsAlignedNoFCWithOutCopy();
+std::string SplitToConcatWith3By512InputsWithOutCopy();
+
+std::string ReshapeConvolutionLessThan48Filters();
 }  // namespace GNATestIRs
index c1e0985..21cb455 100644 (file)
@@ -19,6 +19,7 @@ using namespace std;
 using namespace mkldnn;
 
 struct broadcast_test_params {
+    std::string                 shape_precision;
     std::string                 precision;
     InferenceEngine::SizeVector in_shape;
     InferenceEngine::SizeVector out_shape;
@@ -33,6 +34,11 @@ void ref_broadcast(InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<d
     const data_t *src_data = src.data();
     InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
     InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
+
+    if (!src_dims.size())
+        src_dims = InferenceEngine::SizeVector(1, 1);
+    if (!srcStrides.size())
+        srcStrides = InferenceEngine::SizeVector(1, 1);
     data_t* dst_data = dst.data();
     InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
     InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
@@ -86,7 +92,7 @@ class MKLDNNCPUExtBroadcastTests : public TestsCommon, public WithParamInterface
                 </port>
             </output>
         </layer>
-        <layer name="shape" type="Input" precision="I32" id="2">
+        <layer name="shape" type="Input" precision="_ISDXP_" id="2">
             <output>
                 <port id="2">
                     <dim>_DIM_SIZE_</dim>
@@ -119,10 +125,11 @@ class MKLDNNCPUExtBroadcastTests : public TestsCommon, public WithParamInterface
 
     std::string getModel(broadcast_test_params p) {
         std::string model = model_t;
-        std::string in_shape;
+        std::string in_shape = "";
         std::string out_shape;
 
         REPLACE_WITH_STR(model, "_IIDXP_", p.precision);
+        REPLACE_WITH_STR(model, "_ISDXP_", p.shape_precision);
         for (size_t i = 0; i < p.in_shape.size(); i++) {
             in_shape += "<dim>";
             in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
@@ -166,20 +173,31 @@ protected:
             // Input Data
             InferenceEngine::Blob::Ptr dims;
             InferenceEngine::SizeVector vector_dim(1, p.out_shape.size());
-            dims = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
-            dims->allocate();
-            for (size_t i = 0; i < p.out_shape.size(); i++) {
-                static_cast<int32_t*>(dims->buffer())[i] = static_cast<int32_t>(p.out_shape[i]);
+            if (p.shape_precision == "I32") {
+                dims = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
+                dims->allocate();
+                for (size_t i = 0; i < p.out_shape.size(); i++) {
+                    static_cast<int32_t*>(dims->buffer())[i] = static_cast<int32_t>(p.out_shape[i]);
+                }
+                auto * dimsPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(dims.get());
+                if (dimsPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+            }  else if (p.shape_precision == "FP32") {
+                dims = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
+                dims->allocate();
+                for (size_t i = 0; i < p.out_shape.size(); i++) {
+                    static_cast<float*>(dims->buffer())[i] = static_cast<float>(p.out_shape[i]);
+                }
+                auto * dimsPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(dims.get());
+                if (dimsPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
             }
-            auto * dimsPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(dims.get());
-            if (dimsPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
 
             InferenceEngine::BlobMap srcs;
             InferenceEngine::Blob::Ptr src;
             std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
             if (p.precision == "I32") {
-                src = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+                src = InferenceEngine::make_shared_blob<int32_t>({InferenceEngine::Precision::I32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape)});
                 src->allocate();
                 for (size_t i = 0; i < src->size(); i++)
                     static_cast<int32_t*>(src->buffer())[i] = static_cast<int32_t>(i);
@@ -207,9 +225,8 @@ protected:
                     if (dst_ref.data()[i] != (*output).data()[i])
                         FAIL() << "The difference between res_ptr[i] and ref_ptr[i]";
                 }
-            }
-            else if (p.precision == "FP32") {
-                src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+            } else if (p.precision == "FP32") {
+                src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape)});
                 src->allocate();
                 fill_data_dbgval(src->buffer(), src->size());
                 auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
@@ -249,17 +266,18 @@ TEST_P(MKLDNNCPUExtBroadcastTests, TestsBroadcast) {}
 INSTANTIATE_TEST_CASE_P(
     TestsBroadcast, MKLDNNCPUExtBroadcastTests,
     ::testing::Values(
-        // Params: precision, in_shape, out_shape
-        broadcast_test_params{ "I32", { 1 }, { 2, 3, 4 } },
-        broadcast_test_params{ "I32", { 4, 1, 2 }, { 4, 2, 2 } },
-        broadcast_test_params{ "I32", { 4, 2, 1 }, { 4, 2, 2 } },
-        broadcast_test_params{ "I32", { 4, 2 }, { 2, 4, 2 } },
-        broadcast_test_params{ "I32", { 4, 1, 1 }, { 4, 2, 1 } },
-        broadcast_test_params{ "I32", { 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } },
-        broadcast_test_params{"FP32", { 1 }, { 2, 3, 4 } },
-        broadcast_test_params{"FP32", { 4, 1, 2 }, { 4, 2, 2 } },
-        broadcast_test_params{"FP32", { 4, 2, 1 }, { 4, 2, 2 } },
-        broadcast_test_params{"FP32", { 4, 2 }, { 2, 4, 2 } },
-        broadcast_test_params{"FP32", { 4, 1, 1 }, { 4, 2, 1 } },
-        broadcast_test_params{"FP32", { 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } }
+        // Params: shape_precision, precision, in_shape, out_shape
+        broadcast_test_params{ "I32", "I32",{},{ 2, 3, 4 } },
+        broadcast_test_params{ "I32", "I32",{ 4, 1, 2 },{ 4, 2, 2 } },
+        broadcast_test_params{ "I32", "I32",{ 4, 2, 1 },{ 4, 2, 2 } },
+        broadcast_test_params{ "I32", "I32",{ 4, 2 },{ 2, 4, 2 } },
+        broadcast_test_params{ "I32", "I32",{ 4, 1, 1 },{ 4, 2, 1 } },
+        broadcast_test_params{ "I32", "I32",{ 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } },
+        broadcast_test_params{ "I32","FP32",{},{ 2, 3, 4 } },
+        broadcast_test_params{ "I32","FP32",{ 4, 1, 2 },{ 4, 2, 2 } },
+        broadcast_test_params{ "I32","FP32",{ 4, 2, 1 },{ 4, 2, 2 } },
+        broadcast_test_params{ "I32","FP32",{ 4, 2 },{ 2, 4, 2 } },
+        broadcast_test_params{ "I32","FP32",{ 4, 1, 1 },{ 4, 2, 1 } },
+        broadcast_test_params{ "I32","FP32", { 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } },
+        broadcast_test_params{"FP32","FP32",{ 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } }
 ));
index 8c2c369..fa0af69 100644 (file)
@@ -44,8 +44,8 @@ class FakeExtensions : public IExtension {
 
     void GetVersion(const Version *&versionInfo) const noexcept override {
         static Version ExtensionDescription = {
-            {2, 0},    // extension API version
-            "2.0",
+            {2, 1},    // extension API version
+            "2.1",
             "ie-cpu-ext"  // extension description message
         };
 
index d92a4f2..4976106 100644 (file)
@@ -21,8 +21,9 @@ using namespace mkldnn;
 
 struct gather_test_params {
     std::string inIdxPrecision;
-    InferenceEngine::SizeVector inIdx;
     InferenceEngine::SizeVector inDict;
+    InferenceEngine::SizeVector inIdx;
+
     int axis;
     InferenceEngine::SizeVector out;
 
@@ -40,34 +41,23 @@ void ref_gather(InferenceEngine::TBlob<data_t> &srcIdx, InferenceEngine::TBlob<f
     float *dst_data = dst.data();
     size_t src_size = srcIdx.size();
 
-    std::vector<size_t> dims = srcDct.getTensorDesc().getDims();
-    std::vector<size_t> dims_actual;
-
-    //  Remove redundant dimensions
-    for (size_t i = 0; i < dims.size(); i++) {
-        if (dims[i] > 1) {
-            for (size_t j = i; j < dims.size(); j++)
-                dims_actual.push_back(dims[j]);
-            break;
-        }
-    }
+    std::vector<size_t> dictionary_dims = srcDct.getTensorDesc().getDims();
 
     //  Find number of dictionaries, index range and data length
     size_t numDictionaries = 1;
     for (i = 0; i < axis; i++)
-        numDictionaries *= dims_actual[i];
-    size_t indexRange = dims_actual[axis];
+        numDictionaries *= dictionary_dims[i];
+    size_t indexRange = dictionary_dims[axis];
     size_t dataLength = 1;
-    for (i = axis + 1; i < dims_actual.size(); i++)
-        dataLength *= dims_actual[i];
+    for (i = axis + 1; i < dictionary_dims.size(); i++)
+        dataLength *= dictionary_dims[i];
 
     //  The gathering process
     for (i = 0; i < src_size; i++) {
         unsigned int idx = static_cast<unsigned int>(src_dataIdx[i]);
 
         //  Index clipping
-        if (idx < indexRange)
-        {
+        if (idx < indexRange) {
             //  Copying data to destination from Dictionary
             for (j = 0; j < numDictionaries; j++) {
                 memcpy(&dst_data[dataLength * (i + j * src_size)],
@@ -85,17 +75,17 @@ class MKLDNNCPUExtGatherTests: public TestsCommon, public WithParamInterface<gat
     std::string model_t = R"V0G0N(
 <net Name="Gather_net" version="2" precision="FP32" batch="1">
     <layers>
-        <layer name="InputText" type="Input" precision="_IIDXP_" id="1">
+        <layer name="InputDictionary" type="Input" precision="FP32" id="1">
             <output>
                 <port id="1">
-                    _IIDX_
+                    _IDICT_
                 </port>
             </output>
         </layer>
-        <layer name="InputDictionary" type="Input" precision="FP32" id="2">
+        <layer name="InputText" type="Input" precision="_IIDXP_" id="2">
             <output>
                 <port id="2">
-                    _IDICT_
+                    _IIDX_
                 </port>
             </output>
         </layer>
@@ -117,17 +107,17 @@ class MKLDNNCPUExtGatherTests: public TestsCommon, public WithParamInterface<gat
         </layer>
     </layers>
     <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="2"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="1"/>
+        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
     </edges>
 </net>
 )V0G0N";
 
     std::string getModel(gather_test_params p) {
         std::string model = model_t;
-        std::string inIdx;
+        std::string inIdx = "";
         std::string inDict;
-        std::string out;
+        std::string out = "";
 
         for (auto& idx : p.inIdx) {
             inIdx += "<dim>";
@@ -193,7 +183,6 @@ protected:
                               node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
                 }
             }
-            ASSERT_EQ(4, nodes.size());
 
             // Input Dictionary
             InferenceEngine::Blob::Ptr srcDict = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inDict, InferenceEngine::TensorDesc::getLayoutByDims(p.inDict) });
@@ -309,27 +298,32 @@ TEST_P(MKLDNNCPUExtGatherTests, TestsGather) {}
 INSTANTIATE_TEST_CASE_P(
         TestsGather, MKLDNNCPUExtGatherTests,
             ::testing::Values(
-                gather_test_params{ "FP32", {1, 1, 12, 256}, {1, 1, 71, 16}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {1, 1, 12, 256}, {1, 1, 71, 16}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {12, 256}, {71, 16}, 0, {12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {3, 4}, {2, 5, 6}, 0, {3, 4, 5, 6}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {3, 4}, {5, 1}, 0, {3, 4, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{ "FP32", {1, 1, 12, 256}, {1, 1, 71, 16}, 1, {1, 71, 12, 256}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {1, 1, 3, 4}, {1, 2, 5, 6}, 1, {2, 3, 4, 6}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {1, 1, 3, 4}, {1, 2, 5, 6}, 2, {2, 5, 3, 4}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {12, 4, 9, 8}, {6, 13, 10, 3}, 1, {6, 12, 4, 9, 8, 10, 3}, 1, MKLDNNPlugin::impl_desc_type::unknown }
+// Params: inIdxPrecision, inDict, inIdx, axis, out, num_prim_desc, selectedType
+                gather_test_params{  "I32",{ 31 },{}, 0,{}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{ "FP32",{ 31 },{}, 0,{}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{ "FP32",{ 1, 31, 4 },{ 10 }, 1,{ 1, 10, 4 }, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{ "FP32",{ 31, 7 },{ 1,12,1 }, 0,{ 1, 12, 1, 7 }, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{ "FP32", {71, 16}, {1, 12, 256}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{  "I32", {71, 16}, {1, 12, 256}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{  "I32", {71, 16}, {12, 256}, 0, {12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{  "I32", {2, 5, 6}, {3, 4}, 0, {3, 4, 5, 6}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{  "I32", {5, 1}, {3, 4}, 0, {3, 4, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{ "FP32", {71, 16}, {1, 12, 256}, 1, {1, 71, 12, 256}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{  "I32", {2, 5, 6}, {1, 1, 3, 4}, 1, {2, 3, 4, 6}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{  "I32", {2, 5, 6}, {1, 1, 3, 4}, 2, {2, 5, 3, 4}, 1, MKLDNNPlugin::impl_desc_type::unknown },
+                gather_test_params{  "I32", {6, 13, 10, 3}, {12, 4, 9, 8}, 1, {6, 12, 4, 9, 8, 10, 3}, 1, MKLDNNPlugin::impl_desc_type::unknown }
             ));
 
 
 
 
 struct gatherTF_test_params {
-    InferenceEngine::SizeVector in_dim;
-    std::vector<int32_t> in;
-
     InferenceEngine::SizeVector dct_dim;
     std::vector<float> dct;
 
+    InferenceEngine::SizeVector in_dim;
+    std::vector<int32_t> in;
+
     int axis;
 
     InferenceEngine::SizeVector ref_dim;
@@ -342,17 +336,17 @@ class MKLDNNCPUExtGatherTFTests : public TestsCommon, public WithParamInterface<
     std::string model_t = R"V0G0N(
 <net Name="Gather_net" version="2" precision="FP32" batch="1">
     <layers>
-        <layer name="InputText" type="Input" precision="I32" id="1">
+        <layer name="InputDictionary" type="Input" precision="FP32" id="1">
             <output>
                 <port id="1">
-                    _IIDX_
+                    _IDICT_
                 </port>
             </output>
         </layer>
-        <layer name="InputDictionary" type="Input" precision="FP32" id="2">
+        <layer name="InputText" type="Input" precision="I32" id="2">
             <output>
                 <port id="2">
-                    _IDICT_
+                    _IIDX_
                 </port>
             </output>
         </layer>
@@ -374,8 +368,8 @@ class MKLDNNCPUExtGatherTFTests : public TestsCommon, public WithParamInterface<
         </layer>
     </layers>
     <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="2"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="1"/>
+        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
     </edges>
 </net>
 )V0G0N";
@@ -474,8 +468,6 @@ protected:
 TEST_P(MKLDNNCPUExtGatherTFTests, TestsGather) {}
 
 //  Test data vectors
-std::vector<int32_t> in0 = { 0, 1, 1, 0 };
-std::vector<int32_t> in1 = { 0, 1, 2, 1 };
 std::vector<float> dict = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f };
 std::vector<float> ref_in0_a0_d223 = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }; // 2x2x2x3
 std::vector<float> ref_in0_a2_d232 = { 1.f, 2.f, 2.f, 1.f, 3.f, 4.f, 4.f, 3.f, 5.f, 6.f, 6.f, 5.f, 7.f, 8.f, 8.f, 7.f, 9.f, 10.f, 10.f, 9.f, 11.f, 12.f, 12.f, 11.f }; // 2x3x2x2
@@ -486,34 +478,37 @@ std::vector<float> ref_in1_a2_d223 = { 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7
 INSTANTIATE_TEST_CASE_P(
         TestsGather, MKLDNNCPUExtGatherTFTests,
         ::testing::Values(
-        gatherTF_test_params{ { 2, 2 }, in0,{ 2, 2, 3 }, dict, 0, { 2, 2, 2, 3 }, ref_in0_a0_d223 },
-        gatherTF_test_params{ { 2, 2 }, in0,{ 2, 2, 3 }, dict,-3, { 2, 2, 2, 3 }, ref_in0_a0_d223 },
-        gatherTF_test_params{ { 2, 2 }, in0,{ 2, 3, 2 }, dict, 2, { 2, 3, 2, 2 }, ref_in0_a2_d232 },
-        gatherTF_test_params{ { 2, 2 }, in0,{ 2, 3, 2 }, dict,-1, { 2, 3, 2, 2 }, ref_in0_a2_d232 },
-        gatherTF_test_params{ { 2, 2 }, in1,{ 3, 2, 2 }, dict, 0, { 2, 2, 2, 2 }, ref_in1_a0_d322 },
-        gatherTF_test_params{ { 2, 2 }, in1,{ 3, 2, 2 }, dict,-3, { 2, 2, 2, 2 }, ref_in1_a0_d322 },
-        gatherTF_test_params{ { 2, 2 }, in1,{ 2, 3, 2 }, dict, 1, { 2, 2, 2, 2 }, ref_in1_a1_d232 },
-        gatherTF_test_params{ { 2, 2 }, in1,{ 2, 3, 2 }, dict,-2, { 2, 2, 2, 2 }, ref_in1_a1_d232 },
-        gatherTF_test_params{ { 2, 2 }, in1,{ 2, 2, 3 }, dict, 2, { 2, 2, 2, 2 }, ref_in1_a2_d223 },
-        gatherTF_test_params{ { 2, 2 }, in1,{ 2, 2, 3 }, dict,-1, { 2, 2, 2, 2 }, ref_in1_a2_d223 }));
+// Params: dct_dim, dct, in_dim, in, axis, ref_dim, ref
+        gatherTF_test_params{ { 3,2 }, {1.0, 1.2, 2.3, 3.4, 4.5, 5.7 }, { 2, 2 }, { 0, 1, 1, 2 },0, { 2, 2, 2 }, {1.0, 1.2, 2.3, 3.4,2.3, 3.4,4.5, 5.7 } },
+        gatherTF_test_params{ { 3,3 },{ 1.0, 1.2, 1.9,2.3, 3.4, 3.9,4.5, 5.7, 5.9 }, { 1, 2 }, { 0, 2 },1,{ 3, 2 },{ 1.0, 1.9,2.3, 3.9,4.5, 5.9 } },
+        gatherTF_test_params{ { 2, 2, 3 }, dict, { 2, 2 }, { 0, 1, 1, 0 },0, { 2, 2, 2, 3 }, ref_in0_a0_d223 },
+        gatherTF_test_params{ { 2, 2, 3 }, dict,{ 2, 2 }, { 0, 1, 1, 0 },-3, { 2, 2, 2, 3 }, ref_in0_a0_d223 },
+        gatherTF_test_params{ { 2, 3, 2 }, dict, { 2, 2 }, { 0, 1, 1, 0 },2, { 2, 3, 2, 2 }, ref_in0_a2_d232 },
+        gatherTF_test_params{ { 2, 3, 2 }, dict,{ 2, 2 }, { 0, 1, 1, 0 },-1, { 2, 3, 2, 2 }, ref_in0_a2_d232 },
+        gatherTF_test_params{ { 3, 2, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 }, 0, { 2, 2, 2, 2 }, ref_in1_a0_d322 },
+        gatherTF_test_params{ { 3, 2, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 },-3, { 2, 2, 2, 2 }, ref_in1_a0_d322 },
+        gatherTF_test_params{ { 2, 3, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 }, 1, { 2, 2, 2, 2 }, ref_in1_a1_d232 },
+        gatherTF_test_params{ { 2, 3, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 },-2, { 2, 2, 2, 2 }, ref_in1_a1_d232 },
+        gatherTF_test_params{ { 2, 2, 3 }, dict,{ 2, 2 }, { 0, 1, 2, 1 }, 2, { 2, 2, 2, 2 }, ref_in1_a2_d223 },
+        gatherTF_test_params{ { 2, 2, 3 }, dict,{ 2, 2 }, { 0, 1, 2, 1 },-1, { 2, 2, 2, 2 }, ref_in1_a2_d223 }));
 
 
 class MKLDNNCPUExtGatherHolesTests : public TestsCommon, public WithParamInterface<gatherTF_test_params> {
     std::string model_t = R"V0G0N(
 <net Name="Gather_net" version="2" precision="FP32" batch="1">
     <layers>
-        <layer name="InputText" type="Input" precision="I32" id="1">
+        <layer name="InputDictionary" type="Input" precision="FP32" id="1">
             <output>
                 <port id="1">
+                    <dim>3</dim>
                     <dim>2</dim>
                     <dim>2</dim>
                 </port>
             </output>
         </layer>
-        <layer name="InputDictionary" type="Input" precision="FP32" id="2">
+        <layer name="InputText" type="Input" precision="I32" id="2">
             <output>
                 <port id="2">
-                    <dim>3</dim>
                     <dim>2</dim>
                     <dim>2</dim>
                 </port>
@@ -578,8 +573,8 @@ class MKLDNNCPUExtGatherHolesTests : public TestsCommon, public WithParamInterfa
         </layer>
     </layers>
     <edges>
-        <edge from-layer="1" from-port="1" to-layer="4" to-port="2"/>
-        <edge from-layer="2" from-port="2" to-layer="4" to-port="1"/>
+        <edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="4" to-port="2"/>
         <edge from-layer="4" from-port="3" to-layer="5" to-port="1"/>
         <edge from-layer="3" from-port="3" to-layer="5" to-port="2"/>
     </edges>
@@ -686,5 +681,6 @@ TEST_P(MKLDNNCPUExtGatherHolesTests, TestsGather) {}
 INSTANTIATE_TEST_CASE_P(
     TestsGather, MKLDNNCPUExtGatherHolesTests,
     ::testing::Values(
-        gatherTF_test_params{ { 1, 5, 2, 2 }, in1,{ 1, 3, 2, 2 }, dict, 1,{ 2, 2, 2, 2 }, ref_in1_a0_d322 }));
+        // Params: dct_dim, dct, in_dim, in, axis, ref_dim, ref
+        gatherTF_test_params{ { 1, 3, 2, 2 }, dict,{ 1, 5, 2, 2 },{ 0, 1, 2, 1 }, 1,{ 2, 2, 2, 2 }, ref_in1_a0_d322 }));
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tree_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tree_tests.cpp
deleted file mode 100644 (file)
index bd29222..0000000
+++ /dev/null
@@ -1,286 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_plugin/mkldnn_graph.h"
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include <mkldnn_plugin/mkldnn_extension_utils.h>
-#include <extension/ext_list.hpp>
-#include "tests_common.hpp"
-#include <stdio.h>
-
-#include <cpp/ie_cnn_net_reader.h>
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace InferenceEngine;
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct gather_tree_test_params {
-    SizeVector           in_out_shape;
-    std::vector<int32_t> step_idx;
-    std::vector<int32_t> parent_idx;
-    std::vector<int32_t> max_seq_len;
-    std::vector<int32_t> end_token;
-    std::vector<int32_t> reference;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-
-template <typename data_t>
-void ref_gather_tree(
-    InferenceEngine::TBlob<data_t> &step_idx,
-    InferenceEngine::TBlob<int32_t> &parent_idx,
-    InferenceEngine::TBlob<int32_t> &max_seq_len,
-    InferenceEngine::TBlob<data_t> &end_token,
-    InferenceEngine::TBlob<data_t> &dst
-) {
-    const data_t *step_idxPtr = step_idx.data();
-    const int32_t *parent_idxPtr = parent_idx.data();
-    const int32_t *max_seq_lenPtr = max_seq_len.data();
-    const data_t *end_tokenPtr = end_token.data();
-    data_t *final_idxPtr = dst.data();
-
-    SizeVector step_idx_dims = step_idx.getTensorDesc().getDims();
-    SizeVector parent_idx_dims = parent_idx.getTensorDesc().getDims();
-    SizeVector max_seq_len_dims = max_seq_len.getTensorDesc().getDims();
-    SizeVector final_idx_dims = dst.getTensorDesc().getDims();
-    int32_t max_time = step_idx_dims[0];
-    int32_t batch_size = step_idx_dims[1];
-    int32_t beam_width = step_idx_dims[2];
-
-    if (max_time != parent_idx_dims[0] || max_time != final_idx_dims[0] ||
-        batch_size != parent_idx_dims[1] || batch_size != final_idx_dims[1] || batch_size != max_seq_len_dims[0] ||
-        beam_width != parent_idx_dims[2] || beam_width != final_idx_dims[2]) {
-        FAIL() << " Input/Output tensors dimensions mismatch";
-        return;
-    }
-
-    for (int32_t time, batch = 0; batch < batch_size; batch++) {
-        for (int32_t beam = 0; beam < beam_width; beam++) {
-            int32_t max_sequence_in_beam = (std::min)(max_time, max_seq_lenPtr[batch]);
-            if (max_sequence_in_beam <= 0)
-                continue;
-
-            for (time = (max_time - 1); time >= max_sequence_in_beam; time--)
-                final_idxPtr[(time * batch_size + batch) * beam_width + beam] = (*end_tokenPtr);
-
-            for (int32_t parent = beam; time >= 0; time--) {
-                if (parent < 0 || parent >= beam_width) {
-                    FAIL() << " Wrong parent index";
-                    return;
-                }
-
-                int32_t idx = (time * batch_size + batch) * beam_width;
-                final_idxPtr[idx + beam] = step_idxPtr[idx + parent];
-                parent = parent_idxPtr[idx + parent];
-            }
-
-            bool finished = false;
-            data_t *final = &final_idxPtr[batch * beam_width + beam];
-            for (time = 0; time < max_sequence_in_beam; time++, final += (batch_size * beam_width)) {
-                if (finished)
-                    (*final) = (*end_tokenPtr);
-                else if ((*final) == (*end_tokenPtr))
-                    finished = true;
-            }
-        }
-    }
-}
-
-class MKLDNNCPUExtGatherTreeTests : public TestsCommon, public WithParamInterface<gather_tree_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="GatherTree_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="step_idx" type="Input" precision="I32" id="1">
-            <output>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-        <layer name="parent_idx" type="Input" precision="I32" id="2">
-            <output>
-                <port id="2">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-        <layer name="max_seq_len" type="Input" precision="I32" id="3">
-            <output>
-                <port id="3">
-                    <dim>_IN2_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="end_token" type="Input" precision="I32" id="4">
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="GatherTree" precision="I32">
-            <data/>
-            <input>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-                <port id="2">
-                    _IN_OUT_
-                </port>
-                <port id="3">
-                    <dim>_IN2_</dim>
-                </port>
-                <port id="4">
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-        <edge from-layer="3" from-port="3" to-layer="2" to-port="3"/>
-        <edge from-layer="4" from-port="4" to-layer="2" to-port="4"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(gather_tree_test_params p) {
-        std::string model = model_t;
-        std::string in_out_shape;
-
-        for (auto& dct : p.in_out_shape) {
-            in_out_shape += "<dim>";
-            in_out_shape += std::to_string(dct) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
-        REPLACE_WITH_NUM(model, "_IN2_", p.in_out_shape[1]);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            gather_tree_test_params p = ::testing::WithParamInterface<gather_tree_test_params>::GetParam();
-            std::string model = getModel(p);
-            //std::cout << model;
-            InferenceEngine::CNNNetReader net_reader;
-            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
-
-            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
-            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*) {}));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(net_reader.getNetwork(), extMgr);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = net_reader.getNetwork().getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<int32_t>::Ptr output;
-            output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<int32_t> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Input Data
-            // step_idx
-            InferenceEngine::Blob::Ptr step_idx;
-            step_idx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.in_out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out_shape) });
-            step_idx->allocate();
-            memcpy(step_idx->buffer(), &p.step_idx[0], sizeof(int32_t)*p.step_idx.size());
-            auto * step_idxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(step_idx.get());
-            if (step_idxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            // parent_idx
-            InferenceEngine::Blob::Ptr parent_idx;
-            parent_idx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.in_out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out_shape) });
-            parent_idx->allocate();
-            memcpy(parent_idx->buffer(), &p.parent_idx[0], sizeof(int32_t)*p.parent_idx.size());
-            auto * parent_idxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(parent_idx.get());
-            if (parent_idxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            // max_seq_len
-            InferenceEngine::Blob::Ptr max_seq_len;
-            InferenceEngine::SizeVector max_seq_len_dim(1, p.in_out_shape[1]);
-            max_seq_len = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, max_seq_len_dim, InferenceEngine::TensorDesc::getLayoutByDims(max_seq_len_dim) });
-            max_seq_len->allocate();
-            memcpy(max_seq_len->buffer(), &p.max_seq_len[0], sizeof(int32_t)*p.max_seq_len.size());
-            auto * max_seq_lenPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(max_seq_len.get());
-            if (max_seq_lenPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            // end_token
-            InferenceEngine::Blob::Ptr end_token;
-            InferenceEngine::SizeVector end_token_dim(1, 1);
-            end_token = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, end_token_dim, InferenceEngine::TensorDesc::getLayoutByDims(end_token_dim) });
-            end_token->allocate();
-            memcpy(static_cast<int32_t*>(end_token->buffer()), &p.end_token[0], sizeof(int32_t));
-            auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(end_token.get());
-            if (seq_lengthsIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("step_idx", step_idx));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("parent_idx", parent_idx));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("max_seq_len", max_seq_len));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("end_token", end_token));
-
-            // Reference
-            ref_gather_tree(*step_idxPtr, *parent_idxPtr, *max_seq_lenPtr, *seq_lengthsIdxPtr, dst_ref);
-            if (p.reference.size())
-                if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(int32_t)) != 0)
-                    FAIL() << "Wrong result with compare reference vector!";
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::details::InferenceEngineException &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtGatherTreeTests, TestsGatherTree) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-    TestsGatherTree, MKLDNNCPUExtGatherTreeTests,
-            ::testing::Values(
-// Params: in_out_shape, step_idx, parent_idx, max_seq_len, end_token, reference
-                gather_tree_test_params{ { 3,2,3 },{ 1,2,3,2,3,4,4,5,6,5,6,7,7,8,9,8,9,10 },{ 0,0,0,0,0,0,0,1,1,1,2,0,2,1,2,2,1,1 },{ 3,3 },{ 11 },{ 2,2,2,2,4,4,6,5,6,7,6,6,7,8,9,8,9,10 } },
-                gather_tree_test_params{ { 4,1,3 },{ 1,2,3,4,5,6,7,8,9,-1,-1,-1 },{ 0,0,0,0,1,1,2,1,2,-1,-1,-1 },{ 3 },{ 10 },{ 2,2,2,6,5,6,7,8,9,10,10,10 } },
-                gather_tree_test_params{ { 4,1,3 },{ 1,2,3,4,5,6,7,8,9,10,10,10 },{ 0,0,0,0,1,1,2,1,2,1,1,1 },{ 4 },{ 10 },{ 2,2,2,5,5,5,8,8,8,10,10,10 } },
-                gather_tree_test_params{ { 5,1,3 },{ 1,2,3,4,5,6,7,8,9,1,10,3,2,10,10 },{ 0,0,0,0,1,1,2,1,2,1,1,1,2,0,1 },{ 5 },{ 10 },{ 2,2,2,5,5,5,8,8,8,3,1,10,2,10,10 } },
-                gather_tree_test_params{ { 4,2,3 },{ 1,2,3,2,3,4,4,5,6,5,6,7,7,8,9,8,9,10,0,0,0,11,12,0 },{ 0,0,0,0,0,0,0,1,1,1,1,0,2,1,2,2,0,1,-1,-1,-1,0,1,0 },{ 3,4 },{ 11 },{ 2,2,2,2,3,2,6,5,6,7,5,7,7,8,9,8,9,8,11,11,11,11,12,0 } }
-            ));
index 04be8a3..45aab38 100644 (file)
@@ -151,7 +151,7 @@ public:
 
         size_t data_size = outputs[0]->size();
         for (size_t i = 0; i < data_size; i++) {
-            dst_data[i] = (dst_data[i] + 1)*2;
+            dst_data[i] = 2;
         }
         return InferenceEngine::OK;
     }
index 52ce963..564221f 100644 (file)
@@ -180,7 +180,7 @@ class MKLDNNCPUExtMathTests: public TestsCommon, public WithParamInterface<math_
 
     std::string getModel(math_test_params p) {
         std::string model = model_t;
-        std::string in_out;
+        std::string in_out = "";
         std::string alpha;
         std::string beta;
         std::string gamma;
@@ -194,17 +194,17 @@ class MKLDNNCPUExtMathTests: public TestsCommon, public WithParamInterface<math_
         REPLACE_WITH_STR(model, "_MATH_FUNCTION_", p.math_function);
 
         if (p.alpha.size()) {
-            alpha = "alpha=\"" + std::to_string(p.alpha[0]) + "\"";
+            alpha = "alpha=\"" + to_string_c_locale(p.alpha[0]) + "\"";
         }
         REPLACE_WITH_STR(model, "_ALPHA_", alpha);
 
         if (p.beta.size()) {
-            beta = "beta=\"" + std::to_string(p.beta[0]) + "\"";
+            beta = "beta=\"" + to_string_c_locale(p.beta[0]) + "\"";
         }
         REPLACE_WITH_STR(model, "_BETA_", beta);
 
         if (p.gamma.size()) {
-            gamma = "gamma=\"" + std::to_string(p.gamma[0]) + "\"";
+            gamma = "gamma=\"" + to_string_c_locale(p.gamma[0]) + "\"";
         }
         REPLACE_WITH_STR(model, "_GAMMA_", gamma);
         return model;
@@ -294,6 +294,7 @@ INSTANTIATE_TEST_CASE_P(
         TestsMath, MKLDNNCPUExtMathTests,
             ::testing::Values(
                 // Params: math_function, in_out, input_tensor, alpha, beta, gamma, reference
+                math_test_params{ "Erf", {},{},{},{},{},{} },
                 math_test_params{ "Erf", { 1, 1, 12, 256 }, {},{},{},{}, {} },
                 math_test_params{ "Erf", { 12, 256, 3 },{},{},{},{},{} },
                 math_test_params{ "Erf", { 3, 4 },{},{},{},{},{} },
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp
new file mode 100644 (file)
index 0000000..8fa5b00
--- /dev/null
@@ -0,0 +1,586 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct nmsTF_test_params {
+    int center_point_box;
+    InferenceEngine::SizeVector scoresDim;
+    std::vector<float> boxes;
+    std::vector<float> scores;
+    std::vector<int> max_output_boxes_per_class;
+    std::vector<float> iou_threshold;
+    std::vector<float> score_threshold;
+
+    int num_selected_indices;
+    std::vector<int> ref;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+static float intersectionOverUnion(float* boxesI, float* boxesJ, bool center_point_box) {
+    float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ;
+    if (center_point_box) {
+        //  box format: x_center, y_center, width, height
+        yminI = boxesI[1] - boxesI[3] / 2.f;
+        xminI = boxesI[0] - boxesI[2] / 2.f;
+        ymaxI = boxesI[1] + boxesI[3] / 2.f;
+        xmaxI = boxesI[0] + boxesI[2] / 2.f;
+        yminJ = boxesJ[1] - boxesJ[3] / 2.f;
+        xminJ = boxesJ[0] - boxesJ[2] / 2.f;
+        ymaxJ = boxesJ[1] + boxesJ[3] / 2.f;
+        xmaxJ = boxesJ[0] + boxesJ[2] / 2.f;
+    } else {
+        //  box format: y1, x1, y2, x2
+        yminI = (std::min)(boxesI[0], boxesI[2]);
+        xminI = (std::min)(boxesI[1], boxesI[3]);
+        ymaxI = (std::max)(boxesI[0], boxesI[2]);
+        xmaxI = (std::max)(boxesI[1], boxesI[3]);
+        yminJ = (std::min)(boxesJ[0], boxesJ[2]);
+        xminJ = (std::min)(boxesJ[1], boxesJ[3]);
+        ymaxJ = (std::max)(boxesJ[0], boxesJ[2]);
+        xmaxJ = (std::max)(boxesJ[1], boxesJ[3]);
+    }
+
+    float areaI = (ymaxI - yminI) * (xmaxI - xminI);
+    float areaJ = (ymaxJ - yminJ) * (xmaxJ - xminJ);
+    if (areaI <= 0.f || areaJ <= 0.f)
+        return 0.f;
+
+    float intersection_area =
+            (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ), 0.f) *
+            (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ), 0.f);
+    return intersection_area / (areaI + areaJ - intersection_area);
+}
+
+typedef struct {
+    float score;
+    int batch_index;
+    int class_index;
+    int box_index;
+} filteredBoxes;
+
+static void ref_nms(
+        InferenceEngine::TBlob<float> &srcBoxes,
+        InferenceEngine::TBlob<float> &srcScores,
+        InferenceEngine::TBlob<int> &selected_idxs,
+        nmsTF_test_params p
+) {
+    float *boxes = srcBoxes.data();
+    float *scores = srcScores.data();
+
+    InferenceEngine::SizeVector scores_dims = srcScores.getTensorDesc().getDims();
+    int num_boxes = static_cast<int>(scores_dims[2]);
+    int max_output_boxes_per_class = num_boxes;
+    if (p.max_output_boxes_per_class.size())
+        max_output_boxes_per_class = (std::min)(max_output_boxes_per_class, p.max_output_boxes_per_class[0]);
+
+    float iou_threshold = 1.f;  //  Value range [0, 1]
+    if (p.iou_threshold.size())
+        iou_threshold = (std::min)(iou_threshold, p.iou_threshold[0]);
+
+    float score_threshold = 0.f;
+    if (p.score_threshold.size())
+        score_threshold = p.score_threshold[0];
+
+    int* selected_indices = selected_idxs.data();
+    InferenceEngine::SizeVector selected_indices_dims = selected_idxs.getTensorDesc().getDims();
+
+    InferenceEngine::SizeVector boxesStrides = srcBoxes.getTensorDesc().getBlockingDesc().getStrides();
+    InferenceEngine::SizeVector scoresStrides = srcScores.getTensorDesc().getBlockingDesc().getStrides();
+
+    // boxes shape: {num_batches, num_boxes, 4}
+    // scores shape: {num_batches, num_classes, num_boxes}
+    int num_batches = static_cast<int>(scores_dims[0]);
+    int num_classes = static_cast<int>(scores_dims[1]);
+    std::vector<filteredBoxes> fb;
+
+    for (int batch = 0; batch < num_batches; batch++) {
+        float *boxesPtr = boxes + batch * boxesStrides[0];
+        for (int class_idx = 0; class_idx < num_classes; class_idx++) {
+            float *scoresPtr = scores + batch * scoresStrides[0] + class_idx * scoresStrides[1];
+            std::vector<std::pair<float, int> > scores_vector;
+            for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
+                if (scoresPtr[box_idx] > score_threshold)
+                    scores_vector.push_back(std::make_pair(scoresPtr[box_idx], box_idx));
+            }
+
+            if (scores_vector.size()) {
+                std::sort(scores_vector.begin(), scores_vector.end(),
+                          [](const std::pair<float, int>& l, const std::pair<float, int>& r) { return l.first > r.first; });
+
+                int io_selection_size = 1;
+                fb.push_back({ scores_vector[0].first, batch, class_idx, scores_vector[0].second });
+                for (int box_idx = 1; (box_idx < static_cast<int>(scores_vector.size()) && io_selection_size < max_output_boxes_per_class); box_idx++) {
+                    bool box_is_selected = true;
+                    for (int idx = io_selection_size - 1; idx >= 0; idx--) {
+                        float iou = intersectionOverUnion(&boxesPtr[scores_vector[box_idx].second * 4],
+                                                          &boxesPtr[scores_vector[idx].second * 4], (p.center_point_box == 1));
+                        if (iou > iou_threshold) {
+                            box_is_selected = false;
+                            break;
+                        }
+                    }
+
+                    if (box_is_selected) {
+                        scores_vector[io_selection_size] = scores_vector[box_idx];
+                        io_selection_size++;
+                        fb.push_back({ scores_vector[box_idx].first, batch, class_idx, scores_vector[box_idx].second });
+                    }
+                }
+            }
+        }
+    }
+
+    std::sort(fb.begin(), fb.end(), [](const filteredBoxes& l, const filteredBoxes& r) { return l.score > r.score; });
+    int selected_indicesStride = selected_idxs.getTensorDesc().getBlockingDesc().getStrides()[0];
+    int* selected_indicesPtr = selected_indices;
+    size_t idx;
+    for (idx = 0; idx < (std::min)(selected_indices_dims[0], fb.size()); idx++) {
+        selected_indicesPtr[0] = fb[idx].batch_index;
+        selected_indicesPtr[1] = fb[idx].class_index;
+        selected_indicesPtr[2] = fb[idx].box_index;
+        selected_indicesPtr += selected_indicesStride;
+    }
+    for (; idx < selected_indices_dims[0]; idx++) {
+        selected_indicesPtr[0] = -1;
+        selected_indicesPtr[1] = -1;
+        selected_indicesPtr[2] = -1;
+        selected_indicesPtr += selected_indicesStride;
+    }
+}
+
+class MKLDNNCPUExtNonMaxSuppressionTFTests : public TestsCommon, public WithParamInterface<nmsTF_test_params> {
+    std::string model_t2 = R"V0G0N(
+<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IBOXES_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputScores" type="Input" precision="FP32" id="2">
+            <output>
+                <port id="2">
+                    _ISCORES_
+                </port>
+            </output>
+        </layer>
+        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
+            <data center_point_box="_CPB_"/>
+            <input>
+                <port id="1">
+                    _IBOXES_
+                </port>
+                <port id="2">
+                    _ISCORES_
+                </port>
+            </input>
+            <output>
+                <port id="6" precision="I32">
+                    _IOUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string model_t3 = R"V0G0N(
+<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IBOXES_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputScores" type="Input" precision="FP32" id="2">
+            <output>
+                <port id="2">
+                    _ISCORES_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
+            <output>
+                <port id="3">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
+            <data center_point_box="_CPB_"/>
+            <input>
+                <port id="1">
+                    _IBOXES_
+                </port>
+                <port id="2">
+                    _ISCORES_
+                </port>
+                <port id="3" precision="I32">
+                    <dim>1</dim>
+                </port>
+            </input>
+            <output>
+                <port id="6" precision="I32">
+                    _IOUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
+        <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
+    </edges>
+</net>
+)V0G0N";
+    std::string model_t4 = R"V0G0N(
+<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IBOXES_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputScores" type="Input" precision="FP32" id="2">
+            <output>
+                <port id="2">
+                    _ISCORES_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
+            <output>
+                <port id="3">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="InputIouThr" type="Input" precision="FP32" id="4">
+            <output>
+                <port id="4">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
+            <data center_point_box="_CPB_"/>
+            <input>
+                <port id="1">
+                    _IBOXES_
+                </port>
+                <port id="2">
+                    _ISCORES_
+                </port>
+                <port id="3" precision="I32">
+                    <dim>1</dim>
+                </port>
+                <port id="4">
+                    <dim>1</dim>
+                </port>
+            </input>
+            <output>
+                <port id="6" precision="I32">
+                    _IOUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
+        <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
+        <edge from-layer="4" from-port="4" to-layer="6" to-port="4"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string model_t5 = R"V0G0N(
+<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IBOXES_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputScores" type="Input" precision="FP32" id="2">
+            <output>
+                <port id="2">
+                    _ISCORES_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
+            <output>
+                <port id="3">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="InputIouThr" type="Input" precision="FP32" id="4">
+            <output>
+                <port id="4">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="InputScoreThr" type="Input" precision="FP32" id="5">
+            <output>
+                <port id="5">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
+            <data center_point_box="_CPB_"/>
+            <input>
+                <port id="1">
+                    _IBOXES_
+                </port>
+                <port id="2">
+                    _ISCORES_
+                </port>
+                <port id="3" precision="I32">
+                    <dim>1</dim>
+                </port>
+                <port id="4">
+                    <dim>1</dim>
+                </port>
+                <port id="5">
+                    <dim>1</dim>
+                </port>
+            </input>
+            <output>
+                <port id="6" precision="I32">
+                    _IOUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
+        <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
+        <edge from-layer="4" from-port="4" to-layer="6" to-port="4"/>
+        <edge from-layer="5" from-port="5" to-layer="6" to-port="5"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(nmsTF_test_params p) {
+        std::string model;
+        if (!p.max_output_boxes_per_class.size())
+            model = model_t2;
+        else if (!p.iou_threshold.size())
+            model = model_t3;
+        else if (!p.score_threshold.size())
+            model = model_t4;
+        else
+            model = model_t5;
+
+        std::string inBoxes;
+        std::string inScores;
+        std::string out;
+
+        inBoxes += "<dim>" + std::to_string(p.scoresDim[0]) + "</dim>\n";
+        inBoxes += "<dim>" + std::to_string(p.scoresDim[2]) + "</dim>\n";
+        inBoxes += "<dim>4</dim>";
+
+
+        for (auto& scr : p.scoresDim) {
+            inScores += "<dim>";
+            inScores += std::to_string(scr) + "</dim>\n";
+        }
+
+        out += "<dim>" + std::to_string(p.num_selected_indices) + "</dim>\n";
+        out += "<dim>3</dim>";
+
+        REPLACE_WITH_STR(model, "_IBOXES_", inBoxes);
+        REPLACE_WITH_STR(model, "_ISCORES_", inScores);
+        REPLACE_WITH_STR(model, "_IOUT_", out);
+        REPLACE_WITH_NUM(model, "_CPB_", p.center_point_box);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            nmsTF_test_params p = ::testing::WithParamInterface<nmsTF_test_params>::GetParam();
+            std::string model = getModel(p);
+            //std::cout << model << std::endl;
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            //  Input
+            InferenceEngine::BlobMap srcs;
+
+            //  Input Boxes
+            InferenceEngine::SizeVector boxesDim = {p.scoresDim[0], p.scoresDim[2], 4};
+            InferenceEngine::Blob::Ptr srcBoxes = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, boxesDim, InferenceEngine::TensorDesc::getLayoutByDims(boxesDim) });
+            srcBoxes->allocate();
+            for (size_t i = 0; i < p.boxes.size(); i++) {
+                static_cast<float*>(srcBoxes->buffer())[i] = static_cast<float>(p.boxes[i]);
+            }
+            //memcpy(srcBoxes->buffer(), &p.boxes[0], sizeof(float)*boxes.size());
+            auto * srcBoxesPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcBoxes.get());
+            if (srcBoxesPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputBoxes", srcBoxes));
+
+            // Input Scores
+            InferenceEngine::Blob::Ptr srcScores = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.scoresDim, InferenceEngine::TensorDesc::getLayoutByDims(p.scoresDim) });
+            srcScores->allocate();
+            for (size_t i = 0; i < p.scores.size(); i++) {
+                static_cast<float*>(srcScores->buffer())[i] = static_cast<float>(p.scores[i]);
+            }
+            auto * srcScoresPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcScores.get());
+            if (srcScoresPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputScores", srcScores));
+
+            // Input BoxesPerClass
+            InferenceEngine::Blob::Ptr srcBoxesPerClass;
+            InferenceEngine::Blob::Ptr srcIouThr;
+            InferenceEngine::Blob::Ptr srcScoreThr;
+            if (p.max_output_boxes_per_class.size()) {
+                srcBoxesPerClass = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, InferenceEngine::SizeVector(1,1), InferenceEngine::TensorDesc::getLayoutByDims(InferenceEngine::SizeVector(1,1)) });
+                srcBoxesPerClass->allocate();
+                memcpy(static_cast<int32_t*>(srcBoxesPerClass->buffer()), &p.max_output_boxes_per_class[0], sizeof(int32_t));
+                auto * srcBoxesPerClassPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcBoxesPerClass.get());
+                if (srcBoxesPerClassPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputBoxesPerClass", srcBoxesPerClass));
+            }
+
+            // Input IouThr
+            if (p.iou_threshold.size()) {
+                srcIouThr = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, InferenceEngine::SizeVector(1,1), InferenceEngine::TensorDesc::getLayoutByDims(InferenceEngine::SizeVector(1,1)) });
+                srcIouThr->allocate();
+                memcpy(static_cast<float*>(srcIouThr->buffer()), &p.iou_threshold[0], sizeof(float));
+                auto * srcIouThrPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcIouThr.get());
+                if (srcIouThrPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputIouThr", srcIouThr));
+            }
+
+            // Input ScoreThr
+            if (p.score_threshold.size()) {
+                srcScoreThr = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, InferenceEngine::SizeVector(1,1), InferenceEngine::TensorDesc::getLayoutByDims(InferenceEngine::SizeVector(1,1)) });
+                srcScoreThr->allocate();
+                memcpy(static_cast<float*>(srcScoreThr->buffer()), &p.score_threshold[0], sizeof(float));
+                auto * srcScoreThrPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcScoreThr.get());
+                if (srcScoreThrPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputScoreThr", srcScoreThr));
+            }
+
+            //  Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+            InferenceEngine::TBlob<int32_t>::Ptr output;
+            output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            //  Infer
+            graph.Infer(srcs, outputBlobs);
+
+            // Output Reference
+            if (!p.ref.size()) {
+                InferenceEngine::TBlob <int32_t> selected_indices_ref(item.second->getTensorDesc());
+                selected_indices_ref.allocate();
+                ref_nms(*srcBoxesPtr, *srcScoresPtr, selected_indices_ref, p);
+                compare(*output, selected_indices_ref);
+            } else {
+                //  Check results
+                if (memcmp((*output).data(), &p.ref[0], p.ref.size()) != 0)
+                    FAIL() << "Wrong result with compare TF reference!";
+            }
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtNonMaxSuppressionTFTests, TestsNonMaxSuppression) {}
+
+static std::vector<float> boxes = { 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0 };
+static std::vector<float> scores = { 0.9f, 0.75f, 0.6f, 0.95f, 0.5f, 0.3f };
+static std::vector<int> reference = { 0,0,3,0,0,0,0,0,5 };
+
+INSTANTIATE_TEST_CASE_P(
+        TestsNonMaxSuppression, MKLDNNCPUExtNonMaxSuppressionTFTests,
+        ::testing::Values(
+// Params: center_point_box, scoresDim, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, num_selected_indices, ref
+
+        nmsTF_test_params{ 1, {1,1,6}, { 0.5f, 0.5f, 1.0f, 1.0f,0.5f, 0.6f, 1.0f, 1.0f,0.5f, 0.4f, 1.0f, 1.0f,0.5f, 10.5f, 1.0f, 1.0f, 0.5f, 10.6f, 1.0f, 1.0f, 0.5f, 100.5f, 1.0f, 1.0f },
+        scores,{ 3 },{ 0.5f },{ 0.f }, 3, reference }, /*nonmaxsuppression_center_point_box_format*/
+
+        nmsTF_test_params{ 0, {1,1,6}, { 1.0, 1.0, 0.0, 0.0, 0.0, 0.1, 1.0, 1.1, 0.0, 0.9, 1.0, -0.1, 0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0 },
+        scores,{ 3 },{ 0.5 },{ 0.0 }, 3, reference }, /*nonmaxsuppression_flipped_coordinates*/
+
+        nmsTF_test_params{ 0, { 1,1,10 },{ 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
+                                           0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0 },
+        { 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9 },{ 3 },{ 0.5 },{ 0.0 }, 1,{ 0,0,0 } }, /*nonmaxsuppression_identical_boxes*/
+
+        nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores,{ 2 },{ 0.5 },{ 0.0 }, 2,{ 0,0,3,0,0,0 } }, /*nonmaxsuppression_limit_output_size*/
+
+        nmsTF_test_params{ 0,{ 1,1,1 },{ 0.0, 0.0, 1.0, 1.0 }, { 0.9 },{ 3 },{ 0.5 },{ 0.0 }, 1, { 0,0,0 } }, /*nonmaxsuppression_single_box*/
+
+        nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, { 3 }, { 0.5 }, { 0.0 }, 3, reference }, /*nonmaxsuppression_suppress_by_IOU*/
+
+        nmsTF_test_params{ 0, { 2,1,6 },{ 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                                         0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0 },
+        { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,0,0,0,1,0,3,1,0,0 } }, /*nonmaxsuppression_two_batches*/
+
+        nmsTF_test_params{ 0, { 1,2,6 }, boxes,
+        { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,0,0,0,0,1,3,0,1,0 } }, /*nonmaxsuppression_two_classes*/
+
+        nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, { 3 }, { 0.5 }, {}, 3, reference }, /*nonmaxsuppression_no_score_threshold*/
+
+        nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, { 3 }, {}, {}, 3, reference }, /*nonmaxsuppression_no_iou_threshold_and_score_threshold*/
+
+        nmsTF_test_params{ 0, { 1,1,6 }, boxes, scores, {}, {}, {}, 3, {} } /*nonmaxsuppression_no_max_output_boxes_per_class_and_iou_threshold_and_score_threshold*/
+));
index 08653b5..f4b02fc 100644 (file)
@@ -829,11 +829,6 @@ protected:
 #define case_5d_3 one_hot_base_params({ {1, 3, 2, 3}, {4, 1, 3, 2, 3}, 2, 4, 1.0f, 0.0f })
 #define case_5d_4 one_hot_base_params({ {1, 3, 2, 3}, {2, 1, 3, 4, 3}, 3, 4, 1.0f, 0.0f })
 
-std::string  getTestCaseName(testing::TestParamInfo<one_hot_test_params> obj) {
-    return  obj.param.libraryName +
-            "_" + getDeviceName(obj.param.targetDevice);
-}
-
 one_hot_test_params one_hot_only_1d_test_cases[] = {
     one_hot_test_params("MKLDNNPlugin", case_1d_0),
     one_hot_test_params("MKLDNNPlugin", case_1d_1)
index d845a78..dc8ff4c 100644 (file)
@@ -81,6 +81,12 @@ void ref_reduce(
     InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
     InferenceEngine::SizeVector skip_dims;
 
+    if (!dst_dims.size())
+        dst_dims = InferenceEngine::SizeVector(1, 1);
+
+    if (!dstStrides.size())
+        dstStrides = InferenceEngine::SizeVector(1, 1);
+
     if (axes_for_reduction.size() == 0)
         FAIL() << " Index vector should be 1 dimension";
 
@@ -283,7 +289,7 @@ class MKLDNNCPUExtReduceTests : public TestsCommon, public WithParamInterface<re
     std::string getModel(reduce_test_params p) {
         std::string model = model_t;
         std::string in_shape;
-        std::string out_shape;
+        std::string out_shape = "";
 
         for (size_t i = 0; i < p.in_shape.size(); i++) {
             in_shape += "<dim>";
@@ -293,13 +299,9 @@ class MKLDNNCPUExtReduceTests : public TestsCommon, public WithParamInterface<re
         REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.axes_for_reduction.size());
         REPLACE_WITH_STR(model, "_REDUCE_TYPE_", p.reduce_type);
         REPLACE_WITH_NUM(model, "_KEEP_DIMS_", p.keep_dims);
-        if (p.out_shape.size()) {
-            for (size_t i = 0; i < p.out_shape.size(); i++) {
-                out_shape += "<dim>";
-                out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-            }
-        } else {
-            out_shape = "<dim>1</dim>\n";
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
         }
         REPLACE_WITH_STR(model, "_OUT_", out_shape);
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
new file mode 100644 (file)
index 0000000..0d498a7
--- /dev/null
@@ -0,0 +1,205 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct scatterTF_test_params {
+    std::string inIdxPrecision;
+    InferenceEngine::SizeVector inDataDim;
+    std::vector<float> inData;
+    InferenceEngine::SizeVector inIdxDim;
+    std::vector<int32_t> inIdx;
+    std::vector<float> inUpd;
+    int axis;
+
+    std::vector<float> reference;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+class MKLDNNCPUExtScatterTFTests : public TestsCommon, public WithParamInterface<scatterTF_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="Scatter_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputData" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IDATA_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputIndexes" type="Input" precision="_IIDXP_" id="2">
+            <output>
+                <port id="2">
+                    _IIDX_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputUpdates" type="Input" precision="FP32" id="3">
+            <output>
+                <port id="3">
+                    _IIDX_
+                </port>
+            </output>
+        </layer>
+        <layer name="scatter" type="ScatterUpdate" precision="FP32" id="4">
+            <data axis="_AX_"/>
+            <input>
+                <port id="1">
+                    _IDATA_
+                </port>
+                <port id="2" precision="_IIDXP_">
+                    _IIDX_
+                </port>
+                <port id="3">
+                    _IIDX_
+                </port>
+            </input>
+            <output>
+                <port id="4">
+                    _IDATA_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="4" to-port="2"/>
+        <edge from-layer="3" from-port="3" to-layer="4" to-port="3"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(scatterTF_test_params p) {
+        std::string model = model_t;
+        std::string inIdx;
+        std::string inData;
+
+        for (auto& idx : p.inIdxDim) {
+            inIdx += "<dim>";
+            inIdx += std::to_string(idx) + "</dim>\n";
+        }
+
+        for (auto& dct : p.inDataDim) {
+            inData += "<dim>";
+            inData += std::to_string(dct) + "</dim>\n";
+        }
+
+        REPLACE_WITH_STR(model, "_IIDX_", inIdx);
+        REPLACE_WITH_STR(model, "_IIDXP_", p.inIdxPrecision);
+        REPLACE_WITH_STR(model, "_IDATA_", inData);
+        REPLACE_WITH_NUM(model, "_AX_", p.axis);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            scatterTF_test_params p = ::testing::WithParamInterface<scatterTF_test_params>::GetParam();
+            std::string model = getModel(p);
+            //std::cout << model << std::endl;
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            //  Input Data
+            InferenceEngine::Blob::Ptr srcData = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inDataDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inDataDim) });
+            srcData->allocate();
+            memcpy(srcData->buffer(), &p.inData[0], sizeof(float)*p.inData.size());
+            auto * srcDataPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcData.get());
+            if (srcDataPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            // Input Indexes
+            InferenceEngine::Blob::Ptr srcIdx;
+            if (p.inIdxPrecision == "I32") {
+                srcIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.inIdxDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdxDim) });
+                srcIdx->allocate();
+                memcpy(static_cast<int32_t*>(srcIdx->buffer()), &p.inIdx[0], sizeof(int32_t)*p.inIdx.size());
+                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcIdx.get());
+                if (srcIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+            } else {
+                srcIdx = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inIdxDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdxDim) });
+                srcIdx->allocate();
+                for (size_t i = 0; i < p.inIdx.size(); i++) {
+                    static_cast<float*>(srcIdx->buffer())[i] = static_cast<float>(p.inIdx[i]);
+                }
+                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcIdx.get());
+                if (srcIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+            }
+
+            // Input Updates
+            InferenceEngine::Blob::Ptr srcUpd;
+            srcUpd = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inIdxDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdxDim) });
+            srcUpd->allocate();
+            memcpy(static_cast<float*>(srcUpd->buffer()), &p.inUpd[0], sizeof(float)*p.inUpd.size());
+            auto * srcUpdPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcUpd.get());
+            if (srcUpdPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            //  Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            //  Infer
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputData", srcData));
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputIndexes", srcIdx));
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputUpdates", srcUpd));
+            graph.Infer(srcs, outputBlobs);
+
+            //  Check results
+            if (memcmp((*output).data(), &p.reference[0], p.reference.size()) != 0)
+                FAIL() << "Wrong result with compare TF reference!";
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtScatterTFTests, TestsScatter) {}
+
+INSTANTIATE_TEST_CASE_P(
+        TestsScatter, MKLDNNCPUExtScatterTFTests,
+        ::testing::Values(
+// Params: inDataDim, inData, inIdxDim, inIdx, inUpd, axis, reference
+        scatterTF_test_params{ "I32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 0,{ 2,1.1,0,1,0,2.2,0,2.1,1.2 }},
+        scatterTF_test_params{ "I32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 1,{ 1.1,1,1.2,2,2.2,2.1,0,0,0 }},
+        scatterTF_test_params{ "I32", { 1,5 },{ 1,2,3,4,5 },{ 1,2 },{ 1,3 },{ 1.1,2.1 }, 1,{ 1,1.1,3,2.1,5 }},
+        scatterTF_test_params{"FP32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 0,{ 2,1.1,0,1,0,2.2,0,2.1,1.2 }},
+        scatterTF_test_params{"FP32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 1,{ 1.1,1,1.2,2,2.2,2.1,0,0,0 }},
+        scatterTF_test_params{"FP32", { 1,5 },{ 1,2,3,4,5 },{ 1,2 },{ 1,3 },{ 1.1,2.1 }, 1,{ 1,1.1,3,2.1,5 }}));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp
new file mode 100644 (file)
index 0000000..b0a1411
--- /dev/null
@@ -0,0 +1,553 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+#include <algorithm>
+#include <vector>
+#include <array>
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+
+struct sparse_fill_empty_rows_test_params {
+    std::string precision;
+    InferenceEngine::SizeVector input_indices_shape;
+    std::vector<float> input_indices_value;
+
+    InferenceEngine::SizeVector input_values_shape;
+
+    InferenceEngine::SizeVector input_dense_shape_shape;
+    std::vector<float> input_dense_shape_value;
+
+    InferenceEngine::SizeVector input_default_value_shape;
+    std::vector<float> input_default_value_value;
+
+    InferenceEngine::SizeVector output_indices_shape;
+    InferenceEngine::SizeVector output_values_shape;
+    InferenceEngine::SizeVector output_empty_rows_indicator_shape;
+
+    size_t num_prim_desc;
+    int selectedType;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+void ref_sparse_fill_empty_rows(InferenceEngine::TBlob<float> &input_indices,
+    InferenceEngine::TBlob<float> &input_values,
+    InferenceEngine::TBlob<float> &dense_shape,
+    InferenceEngine::TBlob<float> &default_value,
+    InferenceEngine::TBlob<float> &output_indices,
+    InferenceEngine::TBlob<float> &output_values,
+    InferenceEngine::TBlob<float> &output_empty_rows_indicator) {
+    const float *input_indices_ptr = input_indices.data();
+    const float *input_values_ptr = input_values.data();
+    const float *dense_shape_ptr = dense_shape.data();
+    const float *default_value_ptr = default_value.data();
+    float dflt_value = default_value_ptr[0];
+
+    float num_rows = dense_shape_ptr[0];
+    float num_cols = dense_shape_ptr[1];
+
+    std::vector<size_t> dims = input_values.getTensorDesc().getDims();
+    size_t inMaxNumValues = dims[0];
+    std::vector<size_t> out_dims = output_values.getTensorDesc().getDims();
+    size_t outMaxNumValues = out_dims[0];
+
+    // compute actual number of values by searching out of range indice that serves as a marker
+    size_t in_actual_num_values = 0;
+    for (in_actual_num_values = 0; in_actual_num_values < inMaxNumValues; in_actual_num_values++) {
+        float indice_x = input_indices_ptr[2 * in_actual_num_values];
+        float indice_y = input_indices_ptr[2 * in_actual_num_values + 1];
+        if (indice_x < 0 || indice_y < 0 || indice_x >= num_rows || indice_y >= num_cols) break;
+    }
+
+    // create auxiliary container for sorting
+    std::vector<std::array<float, 3>> indices_values(in_actual_num_values); // <row, column, value>
+    for (size_t i = 0; i < in_actual_num_values; i++) {
+        float row = input_indices_ptr[2 * i];
+        float col = input_indices_ptr[2 * i + 1];
+        float value = input_values_ptr[i];
+        std::array<float, 3> elem = { row, col, value };
+        indices_values[i] = elem;
+    }
+
+    // sort values by row
+    std::sort(indices_values.begin(), indices_values.end(),
+        [](const std::array<float, 3>& first, const std::array<float, 3>& second) {
+        return first[0] < second[0];
+    });
+
+    // unsplit indices and values
+    std::vector<float> indices_with_sorted_rows;
+    std::vector<float> values_for_sorted_rows;
+    for (auto const & elem : indices_values) {
+        indices_with_sorted_rows.push_back(elem[0]);
+        indices_with_sorted_rows.push_back(elem[1]);
+        values_for_sorted_rows.push_back(elem[2]);
+    }
+
+    // compute start indice for each row and a number of values at each row
+    std::vector<int> values_at_row(num_rows);
+    std::fill(values_at_row.begin(), values_at_row.end(), 0);
+    float prev_row_with_value = -1.0;
+    unsigned int total_num_values = 0;
+    std::vector<std::array<float, 3>>::iterator curr_it, prev_it;
+    for (float row_ind = 0.0; row_ind < num_rows; row_ind = row_ind + 1.0) {
+        curr_it = std::find_if(indices_values.begin(), indices_values.end(),
+            [row_ind](std::array<float, 3> elem) { return elem[0] == row_ind; });
+        if (curr_it != indices_values.end()) {
+            if (prev_row_with_value != -1.0) {
+                unsigned int num_values_at_prev_row = std::distance(prev_it, curr_it);
+                values_at_row[(int)prev_row_with_value] = num_values_at_prev_row;
+                total_num_values += num_values_at_prev_row;
+            }
+            prev_row_with_value = row_ind;
+            prev_it = curr_it;
+        }
+        else {
+            total_num_values++;
+        }
+    }
+    if (prev_row_with_value != -1.0) {
+        unsigned int num_values_at_prev_row = std::distance(prev_it, indices_values.end());
+        values_at_row[(int)prev_row_with_value] = num_values_at_prev_row;
+        total_num_values += num_values_at_prev_row;
+    }
+
+    // create output indices
+    float *output_indices_ptr = output_indices.data();
+    float *output_values_ptr = output_values.data();
+    float *output_empty_rows_indicator_ptr = output_empty_rows_indicator.data();
+
+    // zero output buffers
+    std::memset(output_indices_ptr, 0, outMaxNumValues * 2 * sizeof(float));
+    std::memset(output_values_ptr, 0, outMaxNumValues * sizeof(float));
+    std::memset(output_empty_rows_indicator_ptr, 0, num_rows * sizeof(float));
+
+    unsigned int curr_pos_from_copy = 0;
+    unsigned int curr_pos_to_copy = 0;
+    for (int row_ind = 0; row_ind < (int)num_rows; row_ind++) {
+        unsigned int num_values_at_row = values_at_row[row_ind];
+        if (num_values_at_row == 0) {
+            output_empty_rows_indicator_ptr[row_ind] = 1.0;
+            output_values_ptr[curr_pos_to_copy] = dflt_value;
+            output_indices_ptr[curr_pos_to_copy * 2] = (float)row_ind;
+            output_indices_ptr[curr_pos_to_copy * 2 + 1] = 0.0;
+            curr_pos_to_copy++;
+        }
+        else {
+            output_empty_rows_indicator_ptr[row_ind] = 0.0;
+            std::copy(values_for_sorted_rows.begin() + curr_pos_from_copy,
+                values_for_sorted_rows.begin() + curr_pos_from_copy + num_values_at_row,
+                output_values_ptr + curr_pos_to_copy);
+            std::copy(indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy,
+                indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy + 2 * num_values_at_row, output_indices_ptr + 2 * curr_pos_to_copy);
+            curr_pos_to_copy += num_values_at_row;
+            curr_pos_from_copy += num_values_at_row;
+        }
+    }
+
+    // mark the end of output using (-1, -1) indice
+    if (total_num_values < outMaxNumValues) {
+        output_indices_ptr[total_num_values * 2] = -1.0;
+        output_indices_ptr[total_num_values * 2 + 1] = -1.0;
+    }
+}
+
+class MKLDNNCPUExtSparseFillEmptyRowsTests : public TestsCommon, public WithParamInterface<sparse_fill_empty_rows_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="SparseFillEmptyRows_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputIndices" type="Input" precision="FP32" id="0">
+            <output>
+                <port id="0">
+                    _IIN_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputValues" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="0">
+                    _IVL_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputDenseShape" type="Input" precision="FP32" id="2">
+            <output>
+                <port id="0">
+                    _IDS_
+                </port>
+            </output>
+        </layer>
+        <layer name="InputDefaultValue" type="Input" precision="FP32" id="3">
+            <output>
+                <port id="0">
+                    _IDV_
+                </port>
+            </output>
+        </layer>
+        <layer name="SparseFillEmptyRows" id="4" type="SparseFillEmptyRows" precision="FP32">
+            <input>
+                <port id="0">
+                    _IIN_
+                </port>
+                <port id="1">
+                    _IVL_
+                </port>
+                <port id="2">
+                    _IDS_
+                </port>
+                <port id="3">
+                    _IDV_
+                </port>
+            </input>
+            <output>
+                <port id="0">
+                    _OIN_
+                </port>
+                <port id="1">
+                    _OVL_
+                </port>
+                <port id="2">
+                    _ERI_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
+        <edge from-layer="1" from-port="0" to-layer="4" to-port="1"/>
+        <edge from-layer="2" from-port="0" to-layer="4" to-port="2"/>
+        <edge from-layer="3" from-port="0" to-layer="4" to-port="3"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(sparse_fill_empty_rows_test_params p) {
+        std::string model = model_t;
+        std::string input_indices;
+        std::string input_values;
+        std::string dense_shape;
+        std::string default_value;
+        std::string output_indices;
+        std::string output_values;
+        std::string output_empty_rows_indicator;
+
+        InferenceEngine::SizeVector input_dense_shape_shape = { 2 };
+
+        for (auto& shape : p.input_indices_shape) {
+            input_indices += "<dim>";
+            input_indices += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.input_values_shape) {
+            input_values += "<dim>";
+            input_values += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : input_dense_shape_shape) {
+            dense_shape += "<dim>";
+            dense_shape += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.input_default_value_shape) {
+            default_value += "<dim>";
+            default_value += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.output_indices_shape) {
+            output_indices += "<dim>";
+            output_indices += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.output_values_shape) {
+            output_values += "<dim>";
+            output_values += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.output_empty_rows_indicator_shape) {
+            output_empty_rows_indicator += "<dim>";
+            output_empty_rows_indicator += std::to_string(shape) + "</dim>\n";
+        }
+
+        REPLACE_WITH_STR(model, "_IIN_", input_indices);
+        REPLACE_WITH_STR(model, "_IVL_", input_values);
+        REPLACE_WITH_STR(model, "_IDS_", dense_shape);
+        REPLACE_WITH_STR(model, "_IDV_", default_value);
+        REPLACE_WITH_STR(model, "_OIN_", output_indices);
+        REPLACE_WITH_STR(model, "_OVL_", output_values);
+        REPLACE_WITH_STR(model, "_ERI_", output_empty_rows_indicator);
+
+        return model;
+    }
+
+    template <typename data_t>
+    static void fill_data_dbgval(data_t *data, size_t size) {
+        for (size_t i = 0; i < size; i++) {
+            data[i] = static_cast<data_t>(i & (sizeof(data_t) * 8 - 1));
+        }
+    }
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            sparse_fill_empty_rows_test_params p = ::testing::WithParamInterface<sparse_fill_empty_rows_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*) {}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            auto& nodes = graph.getNodes();
+            nodes = graph.getNodes();
+
+            for (auto &node : nodes) {
+                if (node->getName() == "SparseFillEmptyRows") {
+                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
+                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
+                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
+                    }
+                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
+                    ASSERT_EQ(p.selectedType,
+                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
+                }
+            }
+            // 4 inputs + 1 op + 3 outputs
+            ASSERT_EQ(8, nodes.size());
+
+            // Input Data
+            InferenceEngine::Blob::Ptr input_indices = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                p.input_indices_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_indices_shape) });
+            input_indices->allocate();
+            auto *input_indices_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_indices.get());
+            std::copy(p.input_indices_value.begin(), p.input_indices_value.end(), (float *) input_indices_ptr->data());
+
+            InferenceEngine::Blob::Ptr input_values = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                p.input_values_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_values_shape) });
+            input_values->allocate();
+            fill_data(input_values->buffer(), input_values->size());
+
+            auto *input_values_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_values.get());
+            InferenceEngine::Blob::Ptr input_dense_shape = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                p.input_dense_shape_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_dense_shape_shape) });
+            input_dense_shape->allocate();
+            auto *input_dense_shape_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_dense_shape.get());
+            std::copy(p.input_dense_shape_value.begin(), p.input_dense_shape_value.end(), (float *) input_dense_shape_ptr->data());
+
+            InferenceEngine::Blob::Ptr input_default_value = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                p.input_default_value_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_default_value_shape) });
+            input_default_value->allocate();
+            auto *input_default_value_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_default_value.get());
+            std::copy(p.input_default_value_value.begin(), p.input_default_value_value.end(), (float *) input_default_value_ptr->data());
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap output_blobs;
+            auto iter = out.begin();
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *(iter++);
+            InferenceEngine::Blob::Ptr output_indices = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output_indices->allocate();
+            output_blobs[item.first] = output_indices;
+            InferenceEngine::TBlob<float> output_indices_ref(item.second->getTensorDesc());
+            output_indices_ref.allocate();
+
+            item = *(iter++);
+            InferenceEngine::Blob::Ptr output_values = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output_values->allocate();
+            output_blobs[item.first] = output_values;
+            InferenceEngine::TBlob<float> output_values_ref(item.second->getTensorDesc());
+            output_values_ref.allocate();
+
+            item = *(iter++);
+            InferenceEngine::Blob::Ptr output_empty_rows_indicator = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output_empty_rows_indicator->allocate();
+            output_blobs[item.first] = output_empty_rows_indicator;
+            InferenceEngine::TBlob<float> output_empty_rows_indicator_ref(item.second->getTensorDesc());
+            output_empty_rows_indicator_ref.allocate();
+
+            // Compute reference result
+            ref_sparse_fill_empty_rows(*input_indices_ptr, *input_values_ptr, *input_dense_shape_ptr, *input_default_value_ptr,
+                output_indices_ref, output_values_ref, output_empty_rows_indicator_ref);
+
+            // Compute IE result
+            InferenceEngine::BlobMap inputs;
+            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputIndices", input_indices));
+            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputValues", input_values));
+            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputDenseShape", input_dense_shape));
+            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputDefaultValue", input_default_value));
+
+            // Check the result
+            graph.Infer(inputs, output_blobs);
+            compare(*output_indices, output_indices_ref, 0.0f);
+            compare(*output_values, output_values_ref, 0.0f);
+            compare(*output_empty_rows_indicator, output_empty_rows_indicator_ref, 0.0f);
+        }
+        catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtSparseFillEmptyRowsTests, TestsSparseFillEmptyRows) {}
+
+
+// case 1 - empty sparse tensor with marker
+InferenceEngine::SizeVector input_indices_shape_case1 = {2, 2};
+std::vector<float>          input_indices_value_case1 = {-1.f, -1.f};
+InferenceEngine::SizeVector input_values_shape_case1 = {2};
+InferenceEngine::SizeVector input_dense_shape_shape_case1 = {2};
+std::vector<float>          input_dense_shape_value_case1 = {3.f, 4.f};
+InferenceEngine::SizeVector input_default_value_shape_case1 = {1};
+std::vector<float>          input_default_value_case1 = {0.f};
+InferenceEngine::SizeVector output_indices_shape_case1 = {12, 2};
+InferenceEngine::SizeVector output_values_shape_case1 = {12};
+InferenceEngine::SizeVector output_empty_rows_indicator_shape_case1 = {3};
+
+// case 2 - in one row all values absent without marker
+InferenceEngine::SizeVector input_indices_shape_case2 = {6, 2};
+std::vector<float>          input_indices_value_case2 = {1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 4.f, 0.f, 1.f};
+InferenceEngine::SizeVector input_values_shape_case2 = {6};
+InferenceEngine::SizeVector input_dense_shape_shape_case2 = {2};
+std::vector<float>          input_dense_shape_value_case2 = {4.f, 5.f};
+InferenceEngine::SizeVector input_default_value_shape_case2 = {1};
+std::vector<float>          input_default_value_case2 = {0.f};
+InferenceEngine::SizeVector output_indices_shape_case2 = {20, 2};
+InferenceEngine::SizeVector output_values_shape_case2 = {20};
+InferenceEngine::SizeVector output_empty_rows_indicator_shape_case2 = {4};
+
+// case 3 - in one row all values absent with marker
+InferenceEngine::SizeVector input_indices_shape_case3 = { 6, 2 };
+std::vector<float>          input_indices_value_case3 = { 1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 4.f, -1.f, -1.f };
+InferenceEngine::SizeVector input_values_shape_case3 = { 6 };
+InferenceEngine::SizeVector input_dense_shape_shape_case3 = { 2 };
+std::vector<float>          input_dense_shape_value_case3 = { 4.f, 5.f };
+InferenceEngine::SizeVector input_default_value_shape_case3 = { 1 };
+std::vector<float>          input_default_value_case3 = { 0.f };
+InferenceEngine::SizeVector output_indices_shape_case3 = { 20, 2 };
+InferenceEngine::SizeVector output_values_shape_case3 = { 20 };
+InferenceEngine::SizeVector output_empty_rows_indicator_shape_case3 = { 4 };
+
+// case 4 - in all rows at least one value presents without marker
+InferenceEngine::SizeVector input_indices_shape_case4 = { 7, 2 };
+std::vector<float>          input_indices_value_case4 = { 1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 3.f, 2.f, 1.f, 4.f, 3.f };
+InferenceEngine::SizeVector input_values_shape_case4 = { 7 };
+InferenceEngine::SizeVector input_dense_shape_shape_case4 = { 2 };
+std::vector<float>          input_dense_shape_value_case4 = { 5.f, 4.f };
+InferenceEngine::SizeVector input_default_value_shape_case4 = { 1 };
+std::vector<float>          input_default_value_case4 = { 0.f };
+InferenceEngine::SizeVector output_indices_shape_case4 = { 20, 2 };
+InferenceEngine::SizeVector output_values_shape_case4 = { 20 };
+InferenceEngine::SizeVector output_empty_rows_indicator_shape_case4 = { 5 };
+
+// case 5 - in all rows at least one value presents with marker
+InferenceEngine::SizeVector input_indices_shape_case5 = { 8, 2 };
+std::vector<float>          input_indices_value_case5 = { 1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 3.f, 2.f, 1.f, 4.f, 3.f, -1.f, -1.f };
+InferenceEngine::SizeVector input_values_shape_case5 = { 8 };
+InferenceEngine::SizeVector input_dense_shape_shape_case5 = { 2 };
+std::vector<float>          input_dense_shape_value_case5 = { 5.f, 4.f };
+InferenceEngine::SizeVector input_default_value_shape_case5 = { 1 };
+std::vector<float>          input_default_value_case5 = { 0.f };
+InferenceEngine::SizeVector output_indices_shape_case5 = { 20, 2 };
+InferenceEngine::SizeVector output_values_shape_case5 = { 20 };
+InferenceEngine::SizeVector output_empty_rows_indicator_shape_case5 = { 5 };
+
+// case 6 - big sparse tensor with many missed rows without marker
+InferenceEngine::SizeVector input_indices_shape_case6 = { 7, 2 };
+std::vector<float>          input_indices_value_case6 = { 1.f, 0.f, 0.f, 0.f, 99.f, 19.f, 12.f, 2.f, 37.f, 13.f, 2.f, 1.f, 45.f, 3.f };
+InferenceEngine::SizeVector input_values_shape_case6 = { 7 };
+InferenceEngine::SizeVector input_dense_shape_shape_case6 = { 2 };
+std::vector<float>          input_dense_shape_value_case6 = { 100.f, 20.f };
+InferenceEngine::SizeVector input_default_value_shape_case6 = { 1 };
+std::vector<float>          input_default_value_case6 = { 0.f };
+InferenceEngine::SizeVector output_indices_shape_case6 = { 2000, 2 };
+InferenceEngine::SizeVector output_values_shape_case6 = { 2000 };
+InferenceEngine::SizeVector output_empty_rows_indicator_shape_case6 = { 100 };
+
+// case 7 - big sparse tensor with many missed rows with marker
+InferenceEngine::SizeVector input_indices_shape_case7 = { 8, 2 };
+std::vector<float>          input_indices_value_case7 = { 1.f, 0.f, 0.f, 0.f, 99.f, 19.f, 12.f, 2.f, 37.f, 13.f, 2.f, 1.f, 45.f, 3.f, -1.f, -1.f };
+InferenceEngine::SizeVector input_values_shape_case7 = { 8 };
+InferenceEngine::SizeVector input_dense_shape_shape_case7 = { 2 };
+std::vector<float>          input_dense_shape_value_case7 = { 100.f, 20.f };
+InferenceEngine::SizeVector input_default_value_shape_case7 = { 1 };
+std::vector<float>          input_default_value_case7 = { 0.f };
+InferenceEngine::SizeVector output_indices_shape_case7 = { 2000, 2 };
+InferenceEngine::SizeVector output_values_shape_case7 = { 2000 };
+InferenceEngine::SizeVector output_empty_rows_indicator_shape_case7 = { 100 };
+
+INSTANTIATE_TEST_CASE_P(
+    TestsSparseFillEmptyRows, MKLDNNCPUExtSparseFillEmptyRowsTests,
+            ::testing::Values(
+                // case 1 - empty sparse tensor without marker
+                sparse_fill_empty_rows_test_params{ "FP32",
+                input_indices_shape_case1, input_indices_value_case1, input_values_shape_case1,
+                input_dense_shape_shape_case1, input_dense_shape_value_case1, input_default_value_shape_case1, input_default_value_case1,
+                output_indices_shape_case1, output_values_shape_case1, output_empty_rows_indicator_shape_case1,
+                1, MKLDNNPlugin::impl_desc_type::unknown },
+                
+                // case 2 - in one row all values absent without marker
+                sparse_fill_empty_rows_test_params{ "FP32",
+                input_indices_shape_case2, input_indices_value_case2, input_values_shape_case2,
+                input_dense_shape_shape_case2, input_dense_shape_value_case2, input_default_value_shape_case2, input_default_value_case2,
+                output_indices_shape_case2, output_values_shape_case2, output_empty_rows_indicator_shape_case2,
+                1, MKLDNNPlugin::impl_desc_type::unknown },
+                
+                // case 3 - in one row all values absent with marker
+                sparse_fill_empty_rows_test_params{ "FP32",
+                input_indices_shape_case3, input_indices_value_case3, input_values_shape_case3,
+                input_dense_shape_shape_case3, input_dense_shape_value_case3, input_default_value_shape_case3, input_default_value_case3,
+                output_indices_shape_case3, output_values_shape_case3, output_empty_rows_indicator_shape_case3,
+                1, MKLDNNPlugin::impl_desc_type::unknown },
+                
+                // case 4 - in all rows at least one value presents without marker
+                sparse_fill_empty_rows_test_params{ "FP32",
+                input_indices_shape_case4, input_indices_value_case4, input_values_shape_case4,
+                input_dense_shape_shape_case4, input_dense_shape_value_case4, input_default_value_shape_case4, input_default_value_case4,
+                output_indices_shape_case4, output_values_shape_case4, output_empty_rows_indicator_shape_case4,
+                1, MKLDNNPlugin::impl_desc_type::unknown },
+
+                // case 5 - in all rows at least one value presents with marker
+                sparse_fill_empty_rows_test_params{ "FP32",
+                input_indices_shape_case5, input_indices_value_case5, input_values_shape_case5,
+                input_dense_shape_shape_case5, input_dense_shape_value_case5, input_default_value_shape_case5, input_default_value_case5,
+                output_indices_shape_case5, output_values_shape_case5, output_empty_rows_indicator_shape_case5,
+                1, MKLDNNPlugin::impl_desc_type::unknown },
+
+                // case 6 - big sparse tensor with many missed rows without marker
+                sparse_fill_empty_rows_test_params{ "FP32",
+                input_indices_shape_case6, input_indices_value_case6, input_values_shape_case6,
+                input_dense_shape_shape_case6, input_dense_shape_value_case6, input_default_value_shape_case6, input_default_value_case6,
+                output_indices_shape_case6, output_values_shape_case6, output_empty_rows_indicator_shape_case6,
+                1, MKLDNNPlugin::impl_desc_type::unknown },
+
+                // case 7 - big sparse tensor with many missed rows with marker
+                sparse_fill_empty_rows_test_params{ "FP32",
+                input_indices_shape_case7, input_indices_value_case7, input_values_shape_case7,
+                input_dense_shape_shape_case7, input_dense_shape_value_case7, input_default_value_shape_case7, input_default_value_case7,
+                output_indices_shape_case7, output_values_shape_case7, output_empty_rows_indicator_shape_case7,
+                1, MKLDNNPlugin::impl_desc_type::unknown }
+                ));
index 9f28a81..d1ec622 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2019 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -131,7 +131,7 @@ static void ref_topk(InferenceEngine::TBlob<float> &src, InferenceEngine::TBlob<
         }
 
         if (!sort_value)
-            std::sort(src_vector.begin(), src_vector.begin() + src_k, [&src_vector](const pair<int, int> &a, const pair<int, int> &b)
+            std::sort(src_vector.begin(), src_vector.begin() + src_k, [](const pair<int, int> &a, const pair<int, int> &b)
             { return (a.second < b.second); });
 
         for (int j = 0; j < src_k; ++j) {
@@ -367,9 +367,7 @@ class MKLDNNCPUExtTopK1OutTests : public TestsCommon, public WithParamInterface<
         </layer>
         <layer name="src_k" type="Input" precision="I32" id="2">
             <output>
-                <port id="2">
-                    <dim>1</dim>
-                </port>
+                <port id="2"/>
             </output>
         </layer>
         <layer name="output" id="2" type="TopK" precision="_PRECISION_">
@@ -379,7 +377,6 @@ class MKLDNNCPUExtTopK1OutTests : public TestsCommon, public WithParamInterface<
                     _IN_
                 </port>
                 <port id="2">
-                    <dim>1</dim>
                 </port>
             </input>
             <output>
@@ -445,8 +442,8 @@ protected:
             graph.CreateGraph(net_reader.getNetwork(), extMgr);
 
             // Input Data
-            InferenceEngine::Blob::Ptr src;
-            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape,
+                                                                                        InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
             src->allocate();
             if (p.input_tensor.size())
                 memcpy(src->buffer(), &p.input_tensor[0], sizeof(float)*p.input_tensor.size());
@@ -458,10 +455,8 @@ protected:
 
             InferenceEngine::BlobMap srcs;
             srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("value", src));
-
-            InferenceEngine::Blob::Ptr seq_lengthsIdx;
-            InferenceEngine::SizeVector seq_lengths_dim(1, 1);
-            seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
+            InferenceEngine::Blob::Ptr seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, {},
+                                                                                                     InferenceEngine::TensorDesc::getLayoutByDims({})});
             seq_lengthsIdx->allocate();
             memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.src_k[0], sizeof(int32_t));
             auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
@@ -492,7 +487,8 @@ protected:
                 }
             } else {
                 InferenceEngine::TBlob<int32_t>::Ptr output;
-                output = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.out_shape) });
+                output = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.out_shape,
+                                                                      InferenceEngine::TensorDesc::getLayoutByDims(p.out_shape) });
                 output->allocate();
                 outputBlobs[item.first] = output;
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp
new file mode 100644 (file)
index 0000000..b17369c
--- /dev/null
@@ -0,0 +1,378 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+#include <algorithm>
+#include <vector>
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+
+struct unique_test_params {
+    std::string model;
+
+    std::string precision;
+
+    std::string sorted;
+    std::string return_inverse;
+    std::string return_counts;
+
+    InferenceEngine::SizeVector input_shape;
+    std::vector<float> input_value;
+
+    InferenceEngine::SizeVector output_uniques_shape;
+    InferenceEngine::SizeVector output_indices_shape;
+    InferenceEngine::SizeVector output_counts_shape;
+
+    std::vector<float> output_uniques_value_ref;
+    std::vector<float> output_indices_value_ref;
+    std::vector<float> output_counts_value_ref;
+
+    size_t num_prim_desc;
+    int selectedType;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+class MKLDNNCPUExtUniqueTests : public TestsCommon, public WithParamInterface<unique_test_params> {
+    std::string getModel(unique_test_params p) {
+        std::string model = p.model;
+
+        std::string input_shape;
+        std::string output_uniques_shape;
+        std::string output_indices_shape;
+        std::string output_counts_shape;
+
+        for (auto& shape : p.input_shape) {
+            input_shape += "<dim>";
+            input_shape += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.output_uniques_shape) {
+            output_uniques_shape += "<dim>";
+            output_uniques_shape += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.output_indices_shape) {
+            output_indices_shape += "<dim>";
+            output_indices_shape += std::to_string(shape) + "</dim>\n";
+        }
+
+        for (auto& shape : p.output_counts_shape) {
+            output_counts_shape += "<dim>";
+            output_counts_shape += std::to_string(shape) + "</dim>\n";
+        }
+
+        REPLACE_WITH_STR(model, "_SORTED_", p.sorted);
+        REPLACE_WITH_STR(model, "_INPUT_SHAPE_", input_shape);
+        REPLACE_WITH_STR(model, "_OUTPUT_UNIQUES_SHAPE_", output_uniques_shape);
+        REPLACE_WITH_STR(model, "_OUTPUT_INDICES_SHAPE_", output_indices_shape);
+        REPLACE_WITH_STR(model, "_OUTPUT_COUNTS_SHAPE_", output_counts_shape);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            unique_test_params p = ::testing::WithParamInterface<unique_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*) {}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            auto& nodes = graph.getNodes();
+            nodes = graph.getNodes();
+
+            for (auto &node : nodes) {
+                if (node->getName() == "Unique") {
+                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
+                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
+                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
+                    }
+                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
+                    ASSERT_EQ(p.selectedType,
+                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
+                }
+            }
+
+            // prepare input blob and input blob map
+            InferenceEngine::Blob::Ptr input = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                p.input_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_shape) });
+            input->allocate();
+            auto *input_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input.get());
+            std::copy(p.input_value.begin(), p.input_value.end(), (float *)input_ptr->data());
+            InferenceEngine::BlobMap input_blob_map;
+            input_blob_map["InputValues"] = input;
+
+            // prepare output blob map
+            InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap output_blob_map;
+            for (auto iter = out.begin(); iter != out.end(); iter++) {
+                std::pair<std::string, InferenceEngine::DataPtr> item = *iter;
+                InferenceEngine::Blob::Ptr output_blob_ptr = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+                output_blob_ptr->allocate();
+                output_blob_map[item.first] = output_blob_ptr;
+            }
+
+            // prepare blobs with reference data
+            InferenceEngine::Blob::Ptr output_uniques_blob_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                 p.output_uniques_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_uniques_shape) });
+            output_uniques_blob_ref->allocate();
+            auto *output_uniques_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_uniques_blob_ref.get());
+            std::copy(p.output_uniques_value_ref.begin(), p.output_uniques_value_ref.end(), (float *)output_uniques_blob_ref_ptr->data());
+
+            InferenceEngine::Blob::Ptr output_indices_blob_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                 p.output_indices_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_indices_shape) });
+            output_indices_blob_ref->allocate();
+            auto *output_indices_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_indices_blob_ref.get());
+            std::copy(p.output_indices_value_ref.begin(), p.output_indices_value_ref.end(), (float *)output_indices_blob_ref_ptr->data());
+
+            InferenceEngine::Blob::Ptr output_counts_blob_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+                 p.output_counts_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_counts_shape) });
+            output_counts_blob_ref->allocate();
+            auto *output_counts_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_counts_blob_ref.get());
+            std::copy(p.output_counts_value_ref.begin(), p.output_counts_value_ref.end(), (float *)output_counts_blob_ref_ptr->data());
+
+            // infer
+            graph.Infer(input_blob_map, output_blob_map);
+
+            // check the result
+            auto iter = out.begin();
+            compare(*output_blob_map[iter->first], *output_uniques_blob_ref, 0.0f);
+            if (p.return_inverse == "true") {
+                iter++;
+                compare(*output_blob_map[iter->first], *output_indices_blob_ref, 0.0f);
+            }
+            if (p.return_counts == "true") {
+                iter++;
+                compare(*output_blob_map[iter->first], *output_counts_blob_ref, 0.0f);
+            }
+        }
+        catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtUniqueTests, TestsUnique) {}
+
+// model 1 that contains one Unique layer with two outputs: unique elements, indices
+std::string model1 = R"V0G0N(
+<net Name="Unique_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputValues" type="Input" precision="FP32" id="0">
+            <output>
+                <port id="0">
+                    _INPUT_SHAPE_
+                </port>
+            </output>
+        </layer>
+        <layer name="UniqueLayer" id="1" type="Unique" precision="FP32">
+            <data return_counts="false" return_inverse="true" sorted="_SORTED_"/>
+            <input>
+                <port id="0">
+                    _INPUT_SHAPE_
+                </port>
+            </input>
+            <output>
+                <port id="0">
+                    _OUTPUT_UNIQUES_SHAPE_
+                </port>
+                <port id="1">
+                    _OUTPUT_INDICES_SHAPE_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+    </edges>
+</net>
+)V0G0N";
+
+// model 2 that contains one Unique layer with three outputs: unique elements, indices, counts
+std::string model2 = R"V0G0N(
+<net Name="Unique_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="InputValues" type="Input" precision="FP32" id="0">
+            <output>
+                <port id="0">
+                    _INPUT_SHAPE_
+                </port>
+            </output>
+        </layer>
+        <layer name="UniqueLayer" id="1" type="Unique" precision="FP32">
+            <data return_counts="true" return_inverse="true" sorted="_SORTED_"/>
+            <input>
+                <port id="0">
+                    _INPUT_SHAPE_
+                </port>
+            </input>
+            <output>
+                <port id="0">
+                    _OUTPUT_UNIQUES_SHAPE_
+                </port>
+                <port id="1">
+                    _OUTPUT_INDICES_SHAPE_
+                </port>
+                <port id="2">
+                    _OUTPUT_COUNTS_SHAPE_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+    </edges>
+</net>
+)V0G0N";
+
+// case 1 - input with 10 elements where some of them repeat, non-sorted
+InferenceEngine::SizeVector input_shape_case1 = { 10 };
+std::vector<float>          input_value_case1 = { 8.f, 1.f, 2.f, 1.f, 8.f, 5.f, 1.f, 5.f, 0.f, 0.f };
+InferenceEngine::SizeVector output_uniques_shape_case1 = { 10 };
+InferenceEngine::SizeVector output_indicess_shape_case1 = { 10 };
+InferenceEngine::SizeVector output_counts_shape_case1 = { 10 };
+std::vector<float>          output_uniques_value_ref_case1 = { 8.f, 1.f, 2.f, 5.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
+std::vector<float>          output_indices_value_ref_case1 = { 0.f, 1.f, 2.f, 1.f, 0.f, 3.f, 1.f, 3.f, 4.f, 4.f };
+std::vector<float>          output_counts_value_ref_case1 = { 2.f, 3.f, 1.f, 2.f, 2.f, 0.f, 0.f, 0.f, 0.f, 0.f };
+
+// case 2 - input with 10 elements where all of them are unique, non-sorted
+InferenceEngine::SizeVector input_shape_case2 = { 10 };
+std::vector<float>          input_value_case2 = { 8.f, 1.f, 2.f, 3.f, 10.f, 5.f, 12.f, 15.f, 0.f, 100.f };
+InferenceEngine::SizeVector output_uniques_shape_case2 = { 10 };
+InferenceEngine::SizeVector output_indicess_shape_case2 = { 10 };
+InferenceEngine::SizeVector output_counts_shape_case2 = { 10 };
+std::vector<float>          output_uniques_value_ref_case2 = { 8.f, 1.f, 2.f, 3.f, 10.f, 5.f, 12.f, 15.f, 0.f, 100.f };
+std::vector<float>          output_indices_value_ref_case2 = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f };
+std::vector<float>          output_counts_value_ref_case2 = { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f };
+
+// case 3 - input with 10 elements where all of them are the same, non-sorted
+InferenceEngine::SizeVector input_shape_case3 = { 10 };
+std::vector<float>          input_value_case3 = { 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f };
+InferenceEngine::SizeVector output_uniques_shape_case3 = { 10 };
+InferenceEngine::SizeVector output_indicess_shape_case3 = { 10 };
+InferenceEngine::SizeVector output_counts_shape_case3 = { 10 };
+std::vector<float>          output_uniques_value_ref_case3 = { 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f };
+std::vector<float>          output_indices_value_ref_case3 = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
+std::vector<float>          output_counts_value_ref_case3 = { 10.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
+
+// case 4 - input with 10 elements where some of them repeat, sorted
+InferenceEngine::SizeVector input_shape_case4 = { 10 };
+std::vector<float>          input_value_case4 = { 8.f, 1.f, 2.f, 1.f, 8.f, 5.f, 1.f, 5.f, 0.f, 0.f };
+InferenceEngine::SizeVector output_uniques_shape_case4 = { 10 };
+InferenceEngine::SizeVector output_indicess_shape_case4 = { 10 };
+InferenceEngine::SizeVector output_counts_shape_case4 = { 10 };
+std::vector<float>          output_uniques_value_ref_case4 = { 0.f, 1.f, 2.f, 5.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f };
+std::vector<float>          output_indices_value_ref_case4 = { 4.f, 1.f, 2.f, 1.f, 4.f, 3.f, 1.f, 3.f, 0.f, 0.f };
+std::vector<float>          output_counts_value_ref_case4 = { 2.f, 3.f, 1.f, 2.f, 2.f, 0.f, 0.f, 0.f, 0.f, 0.f };
+
+// case 5 - input with 10 elements where all of them are unique, sorted
+InferenceEngine::SizeVector input_shape_case5 = { 10 };
+std::vector<float>          input_value_case5 = { 8.f, 1.f, 2.f, 3.f, 10.f, 5.f, 12.f, 15.f, 0.f, 100.f };
+InferenceEngine::SizeVector output_uniques_shape_case5 = { 10 };
+InferenceEngine::SizeVector output_indicess_shape_case5 = { 10 };
+InferenceEngine::SizeVector output_counts_shape_case5 = { 10 };
+std::vector<float>          output_uniques_value_ref_case5 = { 0.f, 1.f, 2.f, 3.f, 5.f, 8.f, 10.f, 12.f, 15.f, 100.f };
+std::vector<float>          output_indices_value_ref_case5 = { 5.f, 1.f, 2.f, 3.f, 6.f, 4.f, 7.f, 8.f, 0.f, 9.f };
+std::vector<float>          output_counts_value_ref_case5 = { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f };
+
+INSTANTIATE_TEST_CASE_P(
+    TestsUnique, MKLDNNCPUExtUniqueTests,
+    ::testing::Values(
+        // case 0 - model1, sorted="false", input with 10 elements where some of them repeat
+        unique_test_params {
+            model1, "FP32", "false", "true", "false", input_shape_case1, input_value_case1,
+            output_uniques_shape_case1, output_indicess_shape_case1, output_counts_shape_case1,
+            output_uniques_value_ref_case1, output_indices_value_ref_case1, output_counts_value_ref_case1,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 1 - model1, sorted="false", input with 10 elements where all of them are unique
+        unique_test_params{
+            model1, "FP32", "false", "true", "false", input_shape_case2, input_value_case2,
+            output_uniques_shape_case2, output_indicess_shape_case2, output_counts_shape_case2,
+            output_uniques_value_ref_case2, output_indices_value_ref_case2, output_counts_value_ref_case2,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 2 - model1, sorted="false", input with 10 elements where all of them are the same
+        unique_test_params{
+            model1, "FP32", "false", "true", "false", input_shape_case3, input_value_case3,
+            output_uniques_shape_case3, output_indicess_shape_case3, output_counts_shape_case3,
+            output_uniques_value_ref_case3, output_indices_value_ref_case3, output_counts_value_ref_case3,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 3 - model1, sorted="true", input with 10 elements where some of them repeat
+        unique_test_params{
+            model1, "FP32", "true", "true", "false", input_shape_case4, input_value_case4,
+            output_uniques_shape_case4, output_indicess_shape_case4, output_counts_shape_case4,
+            output_uniques_value_ref_case4, output_indices_value_ref_case4, output_counts_value_ref_case4,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 4 - model1, sorted="true", input with 10 elements where all of them are unique
+        unique_test_params{
+            model1, "FP32", "true", "true", "false", input_shape_case5, input_value_case5,
+            output_uniques_shape_case5, output_indicess_shape_case5, output_counts_shape_case5,
+            output_uniques_value_ref_case5, output_indices_value_ref_case5, output_counts_value_ref_case5,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 5 - model2, sorted="false", input with 10 elements where some of them repeat
+        unique_test_params{
+            model2, "FP32", "false", "true", "true", input_shape_case1, input_value_case1,
+            output_uniques_shape_case1, output_indicess_shape_case1, output_counts_shape_case1,
+            output_uniques_value_ref_case1, output_indices_value_ref_case1, output_counts_value_ref_case1,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 6 - model2, sorted="false", input with 10 elements where all of them are unique
+        unique_test_params{
+            model2, "FP32", "false", "true", "true", input_shape_case2, input_value_case2,
+            output_uniques_shape_case2, output_indicess_shape_case2, output_counts_shape_case2,
+            output_uniques_value_ref_case2, output_indices_value_ref_case2, output_counts_value_ref_case2,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 7 - model2, sorted="false", input with 10 elements where all of them are the same
+        unique_test_params{
+            model2, "FP32", "false", "true", "true", input_shape_case3, input_value_case3,
+            output_uniques_shape_case3, output_indicess_shape_case3, output_counts_shape_case3,
+            output_uniques_value_ref_case3, output_indices_value_ref_case3, output_counts_value_ref_case3,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 8 - model2, sorted="true", input with 10 elements where some of them repeat
+        unique_test_params{
+            model2, "FP32", "true", "true", "true", input_shape_case4, input_value_case4,
+            output_uniques_shape_case4, output_indicess_shape_case4, output_counts_shape_case4,
+            output_uniques_value_ref_case4, output_indices_value_ref_case4, output_counts_value_ref_case4,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        },
+        // case 9 - model2, sorted="true", input with 10 elements where all of them are unique
+        unique_test_params{
+            model2, "FP32", "true", "true", "true", input_shape_case5, input_value_case5,
+            output_uniques_shape_case5, output_indicess_shape_case5, output_counts_shape_case5,
+            output_uniques_value_ref_case5, output_indices_value_ref_case5, output_counts_value_ref_case5,
+            1, MKLDNNPlugin::impl_desc_type::unknown
+        }
+));
index 8ba55b3..e5f5afe 100644 (file)
@@ -165,16 +165,16 @@ protected:
 
         string P1, P2;
         if (p.alg == eltwise_relu) {
-            P1 = string("negative_slope=\"") + to_string(p.alpha) + string("\"");
-            P2 = string("beta=\"") + to_string(p.beta) + string("\"");
+            P1 = string("negative_slope=\"") + to_string_c_locale(p.alpha) + string("\"");
+            P2 = string("beta=\"") + to_string_c_locale(p.beta) + string("\"");
         } else if (p.alg == eltwise_bounded_relu) {
-            P1 = string("n=\"") + to_string(p.alpha) + string("\"");
-            P2 = string("beta=\"") + to_string(p.beta) + string("\"");
+            P1 = string("n=\"") + to_string_c_locale(p.alpha) + string("\"");
+            P2 = string("beta=\"") + to_string_c_locale(p.beta) + string("\"");
         } else if (p.alg == eltwise_tanh) {
             P1 = string("type=\"tanh\"");
         } else {
-            P1 = string("alpha=\"") + to_string(p.alpha) + string("\"");
-            P2 = string("beta=\"") + to_string(p.beta) + string("\"");
+            P1 = string("alpha=\"") + to_string_c_locale(p.alpha) + string("\"");
+            P2 = string("beta=\"") + to_string_c_locale(p.beta) + string("\"");
         }
         REPLACE_WITH_STR(model, "_P1_", P1);
         REPLACE_WITH_STR(model, "_P2_", P2);
index ba4596d..8018deb 100644 (file)
@@ -59,8 +59,8 @@ void ref_conv(const TBlob<data_t> &src, const data_t *weights, const size_t weig
     size_t IH = src_dims[dims_size - 2];
     size_t IW = src_dims[dims_size - 1];
 
-    size_t OW = (IW + 2u * prm.pads_begin[X_AXIS] - prm.kernel[X_AXIS]) / prm.strides[X_AXIS] + 1u;
-    size_t OH = (IH + 2u * prm.pads_begin[Y_AXIS] - prm.kernel[Y_AXIS]) / prm.strides[Y_AXIS] + 1u;
+    size_t OW = (IW + prm.pads_end[X_AXIS] + prm.pads_begin[X_AXIS] - prm.kernel[X_AXIS]) / prm.strides[X_AXIS] + 1u;
+    size_t OH = (IH + prm.pads_end[Y_AXIS] + prm.pads_begin[Y_AXIS] - prm.kernel[Y_AXIS]) / prm.strides[Y_AXIS] + 1u;
     size_t OD = dims_size == 5 ? (ID + 2u * prm.pads_begin[Z_AXIS] - prm.kernel[Z_AXIS]) / prm.strides[Z_AXIS] + 1u : 1u;
     size_t OC = prm.out_c;
 
@@ -80,12 +80,12 @@ void ref_conv(const TBlob<data_t> &src, const data_t *weights, const size_t weig
     size_t SC2 = SC1 * OD;
     size_t SC3 = OC / GC;
     size_t SC4 = SC2 * SC3;
-    
+
     size_t IC1 = IH * IW;
     size_t IC2 = IC1 * ID;
     size_t IC3 = IC / GC;
     size_t IC4 = IC2 * IC3;
-    
+
     size_t KC1 = KH * KW;
     size_t KC2 = KC1 * KD;
     size_t KC3 = IC3 * KC2;
@@ -144,7 +144,7 @@ void ref_conv(const TBlob<data_t> &src, const data_t *weights, const size_t weig
 class MKLDNNGraphConvolutionTests: public TestsCommon,
                                    public WithParamInterface<conv_test_params> {
     std::string model_t_5D = R"V0G0N(
-<net name="Convolution_Only" version="3" precision="FP32" batch="1">
+<net name="Convolution_Only" version="4" precision="FP32" batch="1">
     <layers>
         <layer name="in1" type="Input" precision="FP32" id="0">
             <output>
@@ -193,7 +193,7 @@ protected:
         int k_len = p.kernel.size();
         for (size_t i = 2; i < p.dims.size(); i++) {
             size_t inx = k_len - i + 1;
-            size_t dim = (p.dims[i] + 2lu * p.pads_begin[inx] - p.kernel[inx]) / p.strides[inx] + 1lu;
+            size_t dim = (p.dims[i] + p.pads_end[inx] + p.pads_begin[inx] - p.kernel[inx]) / p.strides[inx] + 1lu;
             s_dims += "\n                    <dim>";
             s_dims += std::to_string(dim) + "</dim>";
         }
@@ -347,9 +347,9 @@ INSTANTIATE_TEST_CASE_P(
         TestConvolution, MKLDNNGraphConvolutionTests,
         ::testing::Values(
         /*0*/   conv_test_params{{1, 9, 16, 32},
-                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 6, MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_1x1 },
+                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "same_upper", 6, MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_1x1 },
                 conv_test_params{{1, 9, 32, 16},
-                                 {2, 4}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit },
+                                 {2, 4}, {1, 1}, {1, 1}, {0, 2}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit },
                 conv_test_params{{1, 9, 32, 16},
                                  {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit },
                 conv_test_params{{1, 3, 40, 40},
@@ -392,13 +392,13 @@ INSTANTIATE_TEST_CASE_P(
                                  {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas },
                 conv_test_params{{1, 5, 15, 20, 20},
                                  {3, 3, 3}, {3, 2, 1}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas },
-                conv_test_params{{1, 5, 15, 20, 20},
-                                 {3, 3, 3}, {1, 1, 1}, {2, 2, 2}, {1, 1, 1}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas },
+                // conv_test_params{{1, 5, 15, 20, 20},
+                //                  {3, 3, 3}, {1, 1, 1}, {2, 2, 2}, {1, 1, 1}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas },
                 conv_test_params{{1, 16, 30, 30, 10},
                                  {5, 5, 5}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, 16, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas,
                                  {MKLDNNPlugin::impl_desc_type::gemm_blas} },
                 conv_test_params{{1, 4, 16, 16, 16},
-                                 {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, 8, 1, "same_upper", 2, MKLDNNPlugin::impl_desc_type::gemm_blas },
+                                 {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, 8, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas },
 #endif
         /*20*/  conv_test_params{{1, 16, 30, 30, 10},
                                  {5, 5, 5}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, 16, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit },
@@ -492,7 +492,7 @@ INSTANTIATE_TEST_CASE_P(
         TestDynBatchConvolution, MKLDNNGraphDynBatchConvolutionTests,
         ::testing::Values(
                 conv_test_params{{1, 8, 16, 32},
-                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 7, MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_1x1,
+                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "same_upper", 7, MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_1x1,
                                  {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd}},
                 conv_test_params{{1, 9, 32, 16},
                                  {2, 4}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit,
index ed09cec..d9d98d4 100644 (file)
@@ -47,7 +47,7 @@ void ref_eltwise(const std::vector<InferenceEngine::TBlob<data_t>> &src, Inferen
         std::istringstream stream(prm.scales);
         std::string str;
         while (getline(stream, str, ',')) {
-            float val = std::stof(str);
+            float val = InferenceEngine::CNNLayer::ie_parse_float(str);
             scales.push_back(val);
         }
     } else {
@@ -344,7 +344,7 @@ protected:
 
         std::string scale;
         if (!p.scales.empty()) {
-            scale = std::string("coeff=\"") + p.scales + std::string("\"");
+            scale = std::string("coeff=\"") + to_string_c_locale(p.scales) + std::string("\"");
         }
         REPLACE_WITH_STR(model, "_OP_", op);
         REPLACE_WITH_STR(model, "_COEFF_", scale);
@@ -588,14 +588,14 @@ protected:
         std::string model = model_t;
         std::string op = select_op(p.op);
 
-        std::string src_dims1;
+        std::string src_dims1 = "";
         for (auto &dim : p.dims1) {
             src_dims1 += "\n                    <dim>";
             src_dims1 += std::to_string(dim) + "</dim>";
         }
         REPLACE_WITH_STR(model, "__SRC_DIMS_1__", src_dims1);
 
-        std::string src_dims2;
+        std::string src_dims2 = "";
         for (auto &dim : p.dims2) {
             src_dims2 += "\n                    <dim>";
             src_dims2 += std::to_string(dim) + "</dim>";
@@ -617,7 +617,7 @@ protected:
 
         std::string scale;
         if (!p.scales.empty()) {
-            scale = std::string("coeff=\"") + p.scales + std::string("\"");
+            scale = std::string("coeff=\"") + to_string_c_locale(p.scales) + std::string("\"");
         }
         REPLACE_WITH_STR(model, "_OP_", op);
         REPLACE_WITH_STR(model, "_COEFF_", scale);
@@ -652,27 +652,7 @@ protected:
                 }
             }
             InferenceEngine::SizeVector dims_src1 = p.dims1;
-            InferenceEngine::Layout layout1 = InferenceEngine::ANY;
-            switch (p.dims1.size()) {
-                case 4:
-                    layout1 = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout1 = InferenceEngine::NCDHW;
-                    break;
-            }
-            InferenceEngine::SizeVector dims_src2 = p.dims2;
-            InferenceEngine::Layout layout2 = InferenceEngine::ANY;
-            switch (p.dims2.size()) {
-                case 4:
-                    layout2 = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout2 = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout1});
+            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::TensorDesc::getLayoutByDims(p.dims1) });
             src1->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
@@ -681,7 +661,9 @@ protected:
                 FAIL() << "Cannot cast blob to TBlob<float>.";
 
             fill_data_sine(src1->buffer(), src1->size(), 0.1, 0.9, 1);
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout2});
+
+            InferenceEngine::SizeVector dims_src2 = p.dims2;
+            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::TensorDesc::getLayoutByDims(p.dims2) });
             src2->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
@@ -762,22 +744,22 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
         TestsDiffDims, MKLDNNGraphEltwise2InputsTests,
         ::testing::Values(
-                eltwise_test_params{{1},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3},{1},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3},{3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3, 3},{1},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3, 3},{3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3, 3, 3},{1},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3, 3, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3, 3, 3, 3},{1},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
                 eltwise_test_params{{1, 3, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
index d8ffa20..e357668 100644 (file)
@@ -5,6 +5,7 @@
 #include <gtest/gtest.h>
 #include <gmock/gmock-spec-builders.h>
 #include "mkldnn_plugin/mkldnn_graph.h"
+#include "mkldnn_plugin/mkldnn_exec_network.h"
 
 #include "test_graph.hpp"
 
index 3b1b7d2..d91b3ff 100644 (file)
@@ -202,7 +202,9 @@ INSTANTIATE_TEST_CASE_P(
                 permute_test_params{{2, 3, 4, 5, 7}, {0, 2, 4, 3, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown},
                 permute_test_params{{2, 3, 4, 5, 7}, {0, 4, 2, 3, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown},
                 permute_test_params{{2, 3, 4, 5}, {0, 3, 1, 2}, 1, MKLDNNPlugin::impl_desc_type::unknown},
-                permute_test_params{{3, 4, 7}, {1, 0, 2}, 1, MKLDNNPlugin::impl_desc_type::unknown}
+                permute_test_params{{3, 4, 7}, {1, 0, 2}, 1, MKLDNNPlugin::impl_desc_type::unknown},
+                permute_test_params{{3, 4, 7, 8, 4}, {0, 2, 3, 4, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown},
+                permute_test_params{{3, 4, 7, 8, 4}, {0, 4, 1, 2, 3}, 1, MKLDNNPlugin::impl_desc_type::unknown}
         ));
 
 class MKLDNNGraphDynBatchPermuteTests: public MKLDNNGraphPermuteTests {
@@ -288,5 +290,7 @@ INSTANTIATE_TEST_CASE_P(
                 permute_test_params{{2, 3, 4, 5, 7}, {0, 2, 1, 3, 4}, 1, MKLDNNPlugin::impl_desc_type::unknown},
                 permute_test_params{{2, 3, 4, 5, 7}, {0, 2, 4, 3, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown},
                 permute_test_params{{2, 3, 4, 5, 7}, {0, 4, 2, 3, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown},
-                permute_test_params{{2, 3, 4, 5}, {0, 3, 1, 2}, 1, MKLDNNPlugin::impl_desc_type::unknown}
+                permute_test_params{{2, 3, 4, 5}, {0, 3, 1, 2}, 1, MKLDNNPlugin::impl_desc_type::unknown},
+                permute_test_params{{3, 4, 7, 8, 4}, {0, 2, 3, 4, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown},
+                permute_test_params{{3, 4, 7, 8, 4}, {0, 4, 1, 2, 3}, 1, MKLDNNPlugin::impl_desc_type::unknown}
         ));
index e8e20c0..85f73ad 100644 (file)
@@ -3,17 +3,14 @@
 //
 
 #include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_plugin/mkldnn_graph.h"
+#include "mkldnn_plugin/mkldnn_exec_network.h"
 
-#include "single_layer_common.hpp"
 #include <mkldnn_plugin/mkldnn_extension_utils.h>
 #include "tests_common.hpp"
 #include "../test_graph.hpp"
 #include <ext_list.hpp>
 #include <ie_builders.hpp>
 #include <ie_ir_reader.hpp>
-#include <ngraph/frontend/onnx_import/onnx.hpp>
 
 using namespace ::testing;
 using namespace std;
@@ -3817,7 +3814,7 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForXceptionTopology) {
 
 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForGrayscaleInput) {
     std::string model = R"V0G0N(
-<net batch="1" name="xception" version="2">
+<net batch="1" name="xception" version="4">
        <layers>
                <layer id="1" name="data" precision="FP32" type="Input">
                        <output>
@@ -3830,7 +3827,7 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForGrayscaleInput) {
                        </output>
                </layer>
                <layer id="2" name="conv1" precision="FP32" type="Convolution">
-                       <data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="32" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
+                       <data auto_pad="same_upper" dilations="1,1" group="1" kernel="3,3" output="32" pads_begin="0,0" pads_end="2,2" strides="1,1"/>
                        <input>
                                <port id="2">
                                        <dim>1</dim>
@@ -4505,7 +4502,7 @@ TEST_F(MKLDNNGraphStructureTests, TestFailedVNect0003) {
 
 TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
     std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
+<net name="net" version="4" batch="1">
     <layers>
         <layer name="data0" type="Input" precision="FP32" id="0">
             <output>
@@ -4528,7 +4525,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
             </output>
         </layer>
         <layer name="conv0" type="Convolution" precision="FP32" id="2">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
+                       <data auto_pad="same_upper" dilations="1,1" group="1" kernel="1,1" output="48" pads_end="0, 0" pads_begin="150,300" strides="1,1"/>
             <input>
                 <port id="0">
                     <dim>1</dim>
@@ -4549,7 +4546,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
             <biases offset="6144" size="192"/>
         </layer>
         <layer name="conv1" type="Convolution" precision="FP32" id="3">
-            <convolution_data stride-x="2" stride-y="2" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="48" group="48"/>
+                       <data auto_pad="same_upper" dilations="1,1" group="48" kernel="3,3" output="48" pads_end="1,1" pads_begin="1,1" strides="2,2"/>
             <input>
                 <port id="0">
                     <dim>1</dim>
@@ -4570,7 +4567,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
             <biases offset="7872" size="192"/>
         </layer>
         <layer name="eltwise" type="Eltwise" precision="FP32" id="4">
-            <elementwise_data operation="sum"/>
+            <data operation="sum"/>
             <input>
                 <port id="0">
                     <dim>1</dim>
@@ -4613,7 +4610,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
             </output>
         </layer>
         <layer name="power" type="Power" precision="FP32" id="6">
-            <power_data power="1" scale="-1" shift="0"/>
+            <data power="1" scale="-1" shift="0"/>
             <input>
                 <port id="0">
                     <dim>1</dim>
index 67188fb..da43e2a 100644 (file)
@@ -64,7 +64,11 @@ public:
 
         auto input = inputNodes.find(name);
         if (input != inputNodes.end()) {
-            MKLDNNPlugin::MKLDNNDims outDims = input->second->getChildEdgeAt(0)->getDims();
+            MKLDNNPlugin::MKLDNNDims outDims;
+            if(input->second->getChildEdgeAt(0)->getDims().ndims() == 0 )
+                outDims = MKLDNNPlugin::MKLDNNDims(InferenceEngine::SizeVector(1,1));
+            else
+                outDims = input->second->getChildEdgeAt(0)->getDims();
             if (batch < 1)
                 batch = outDims[0];
 
@@ -4,12 +4,12 @@
 
 #include <gtest/gtest.h>
 
-#include "memory_solver.hpp"
+#include "mkldnn_memory_solver.hpp"
 #include "details/ie_exception.hpp"
 
 using namespace testing;
-using namespace InferenceEngine;
-using Box = InferenceEngine::MemorySolver::Box;
+using namespace MKLDNNPlugin;
+using Box = MKLDNNPlugin::MemorySolver::Box;
 
 TEST(MemSolverTest, LinearAndEven) {
     int n = 0;
@@ -198,7 +198,7 @@ TEST(MemSolverTest, GetOffsetThows) {
     MemorySolver ms(boxes);
     ms.solve();
 
-    EXPECT_THROW(ms.getOffset(100), details::InferenceEngineException);
+    EXPECT_THROW(ms.getOffset(100), InferenceEngine::details::InferenceEngineException);
 }
 
 TEST(MemSolverTest, NoOverlapping) {
index fce08b2..0353000 100644 (file)
@@ -38,18 +38,17 @@ protected:
         CONNECT(3, 5);
         CONNECT(5, 2);
 
-        EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap &maps) {
+        EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap &maps) {
             prepareInputs(maps, 12);
         })));
 
-        EXPECT_CALL(mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap &maps) {
+        EXPECT_CALL(*mockNet, getOutputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](OutputsDataMap &maps) {
             prepareOutputs(maps);
         })));
 
-        EXPECT_CALL(mockNet, getTargetDevice()).WillRepeatedly(Return(TargetDevice::eCPU));
-        EXPECT_CALL(mockNet, getPrecision()).WillRepeatedly(Return(Precision::FP16));
-        EXPECT_CALL(mockNet, getBatchSize()).WillRepeatedly(Return(12));
-        EXPECT_CALL(mockNet, getName(_, _)).WillRepeatedly(Invoke([](char *pName, size_t len) {
+        EXPECT_CALL(*mockNet, getPrecision()).WillRepeatedly(Return(Precision::FP16));
+        EXPECT_CALL(*mockNet, getBatchSize()).WillRepeatedly(Return(12));
+        EXPECT_CALL(*mockNet, getName(_, _)).WillRepeatedly(Invoke([](char *pName, size_t len) {
             memcpy(pName, "nm", 3);
         }));
 
@@ -60,12 +59,10 @@ protected:
 };
 
 TEST_F(GraphCopyTests, copyNetworkPreserveBasicParams) {
-
-    auto clone = CNNNetCopy<MockCopier>(mockNet, mc);
+    auto clone = CNNNetCopy<MockCopier>(*mockNet, mc);
 
     //network was copied not just assigned
-    ASSERT_NE(clone.get(), &mockNet);
-    ASSERT_EQ(clone->getTargetDevice(), TargetDevice::eCPU);
+    ASSERT_NE(clone.get(), mockNet.get());
     ASSERT_EQ(clone->getPrecision(), Precision::FP16);
 
     char name[20];
@@ -74,41 +71,38 @@ TEST_F(GraphCopyTests, copyNetworkPreserveBasicParams) {
 }
 
 TEST_F(GraphCopyTests, canPreserveBatchWhenCopyNetwork) {
-    auto clone = CNNNetCopy<MockCopier>(mockNet, mc);
+    auto clone = CNNNetCopy<MockCopier>(*mockNet, mc);
     ASSERT_EQ(clone->getBatchSize(), 12);
 }
 
 
 TEST_F(GraphCopyTests, canPreserveInputs) {
-    auto clone = CNNNetCopy<MockCopier>(mockNet, mc);
+    auto clone = CNNNetCopy<MockCopier>(*mockNet, mc);
 
     InputsDataMap inputs, inputsTarget;
     InputsDataMap heads, headsTarget;
 
     clone->getInputsInfo(inputs);
-    mockNet.getInputsInfo(inputsTarget);
+    mockNet->getInputsInfo(inputsTarget);
     ASSERT_INPUTS_INFO_EQ(inputs, inputsTarget);
 }
 
 TEST_F(GraphCopyTests, canPreserveOutputs) {
 
-    auto clone = CNNNetCopy<MockCopier>(mockNet, mc);
+    auto clone = CNNNetCopy<MockCopier>(*mockNet, mc);
 
     OutputsDataMap outTarget, outSource;
     clone->getOutputsInfo(outTarget);
-    mockNet.getOutputsInfo(outSource);
+    mockNet->getOutputsInfo(outSource);
 
     ASSERT_OUTPUTS_INFO_EQ(outSource, outTarget);
 }
 
 TEST_F(GraphCopyTests, canPreserveAttributes) {
-    auto clone = CNNNetCopy<MockCopier>(mockNet, mc);
+    auto clone = CNNNetCopy<MockCopier>(*mockNet, mc);
     ADD_ATTR(1, "id", "r-1-2-3");
     ADD_ATTR(2, "id", "r-1-2-3");
-
-    IE_SUPPRESS_DEPRECATED_START
-    CNNNetwork cloned (clone.get());
-    IE_SUPPRESS_DEPRECATED_END
+    CNNNetwork cloned (clone);
     auto idMemOutput = cloned.getLayerByName("1")->GetParamAsString("id");
     auto idMemInput  = cloned.getLayerByName("2")->GetParamAsString("id");
 
@@ -117,7 +111,7 @@ TEST_F(GraphCopyTests, canPreserveAttributes) {
 }
 
 TEST_F(GraphCopyTests, canPreserveGetData) {
-    auto clone = CNNNetCopy<MockCopier>(mockNet, mc);
+    auto clone = CNNNetCopy<MockCopier>(*mockNet, mc);
 
     ASSERT_NE(clone->getData("1"), nullptr);
     ASSERT_NE(clone->getData("2"), nullptr);
@@ -127,7 +121,7 @@ TEST_F(GraphCopyTests, canPreserveGetData) {
 }
 
 TEST_F(GraphCopyTests, canPreserveTopology) {
-    auto iclone = CNNNetCopy<MockCopier>(mockNet, mc);
+    auto iclone = CNNNetCopy<MockCopier>(*mockNet, mc);
     auto clone = CNNNetwork(iclone);
 
     ASSERT_EQ(clone.layerCount(), 5);
@@ -159,7 +153,7 @@ using FP32_2_FP32 = GNAPluginNS::details::QuantPair<_FP32_2_FP32 , _FP32_2_FP32
 
 TEST_F(GraphCopyTests, canQuantizeTopology) {
 
-    auto iclone = ModelQuantizer<FP32_2_FP32>().quantize(mockNet, std::vector<float >({1.0f, 1.0f}));
+    auto iclone = ModelQuantizer<FP32_2_FP32>().quantize(*mockNet, std::vector<float >({1.0f, 1.0f}));
     auto clone = CNNNetwork(iclone);
 
     CNNNetBFS(clone.getLayerByName("1"), [&](CNNLayerPtr layer) {
@@ -224,9 +218,7 @@ TEST(CNNSpecificGraphCopyTests, copyNetworkWithClampLayer) {
     struct EmptyStruct {};
     auto visitor = [&](CNNLayerPtr lp) { return injectData<EmptyStruct>(lp); };
     auto copied_net_ptr = CNNNetCopy(network, visitor);
-    IE_SUPPRESS_DEPRECATED_START
-    auto copied_net = CNNNetwork(copied_net_ptr.get());
-    IE_SUPPRESS_DEPRECATED_END
+    auto copied_net = CNNNetwork(copied_net_ptr);
 
     //check that Clamp layer was properly copied
     auto layer = std::dynamic_pointer_cast<ClampLayer>(copied_net.getLayerByName("ClampLayer"));
@@ -294,9 +286,7 @@ TEST(CNNSpecificGraphCopyTests, copyPreprocess) {
     struct EmptyStruct {};
     auto visitor = [&](CNNLayerPtr lp) { return injectData<EmptyStruct>(lp); };
     auto copied_net_ptr = CNNNetCopy(network, visitor);
-    IE_SUPPRESS_DEPRECATED_START
-    auto copied_net = CNNNetwork(copied_net_ptr.get());
-    IE_SUPPRESS_DEPRECATED_END
+    auto copied_net = CNNNetwork(copied_net_ptr);
 
     //check that pre process Info existed in copied network
     auto &pp = copied_net.getInputsInfo().begin()->second->getPreProcess();
@@ -359,9 +349,7 @@ TEST(CNNSpecificGraphCopyTests, copyNetworkWithDeconvolution) {
     struct EmptyStruct {};
     auto visitor = [&](CNNLayerPtr lp) { return injectData<EmptyStruct>(lp); };
     auto copied_net_ptr = CNNNetCopy(network, visitor);
-    IE_SUPPRESS_DEPRECATED_START
-    auto copied_net = CNNNetwork(copied_net_ptr.get());
-    IE_SUPPRESS_DEPRECATED_END
+    auto copied_net = CNNNetwork(copied_net_ptr);
 
     // check that Clamp layer was properly copied
     auto layer = std::dynamic_pointer_cast<DeconvolutionLayer>(copied_net.getLayerByName("upsample_merged"));
index 5e6a683..a4dbb24 100644 (file)
@@ -30,7 +30,7 @@ class GraphTestsBase : public ::testing::Test {
     std::vector<CNNLayerPtr> layers;
     std::vector<std::vector<DataPtr>> datas;
 
-    MockICNNNetwork mockNet;
+    std::shared_ptr<MockICNNNetwork> mockNet;
     InferenceEngine::CNNNetwork wrap;
 
     /**
@@ -63,7 +63,7 @@ class GraphTestsBase : public ::testing::Test {
     }
 
     CNNLayerPtr layerByName(std::string name) {
-        auto sorted = InferenceEngine::details::CNNNetSortTopologically(mockNet);
+        auto sorted = InferenceEngine::details::CNNNetSortTopologically(*mockNet);
 
         auto i = std::find_if(sorted.begin(), sorted.end(), [&](CNNLayerPtr l){
             return l->name == name;
@@ -232,9 +232,8 @@ class GraphTestsBase : public ::testing::Test {
      */
     int _batchSize = 1;
     void SetUp() override {
-        IE_SUPPRESS_DEPRECATED_START
-        wrap = InferenceEngine::CNNNetwork(&mockNet);
-        IE_SUPPRESS_DEPRECATED_END
+       mockNet = std::make_shared<MockICNNNetwork>();
+       wrap = InferenceEngine::CNNNetwork(std::dynamic_pointer_cast<ICNNNetwork>(mockNet));
 
         datas.resize(10);
         for (int i = 0; i < 10; i++) {
diff --git a/inference-engine/tests/unit/graph_tools/graph_tools_functional_tests.cpp b/inference-engine/tests/unit/graph_tools/graph_tools_functional_tests.cpp
new file mode 100644 (file)
index 0000000..a969d71
--- /dev/null
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <details/ie_cnn_network_tools.h>
+#include <cpp/ie_cnn_net_reader.h>
+#include <cpp/ie_cnn_network.h>
+#include <memory>
+#include <test_model_path.hpp>
+
+using namespace testing;
+using namespace InferenceEngine::details;
+using namespace InferenceEngine;
+using namespace std;
+
+class GraphToolsFncTest : public ::testing::Test {
+public:
+    template <typename T>
+    static void checkSort(const T &sorted) {
+        for (int i = 0; i < sorted.size(); i++) {
+            //check that all input already visited:
+            for (auto &inputs : sorted[i]->insData) {
+                auto inputName = inputs.lock()->getCreatorLayer().lock()->name;
+
+                bool bFound = false;
+                for (int j = 0; j < i; j++) {
+                    if (sorted[j]->name == inputName) {
+                        bFound = true;
+                        break;
+                    }
+                }
+                ASSERT_TRUE(bFound) << "order is not correct, layer " << sorted[i]->name << " has missed input: "
+                                    << inputName;
+            }
+        }
+    }
+};
+
index ad6dc17..70c4b82 100644 (file)
@@ -104,10 +104,10 @@ TEST_F(GraphToolsTest, canSortTopologically) {
     CONNECT(2, 1);
     CONNECT(1, 4);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
-    auto sorted = CNNNetSortTopologically(mockNet);
+    auto sorted = CNNNetSortTopologically(*mockNet);
 
     EXPECT_EQ(sorted.size(), 4);
 
@@ -139,10 +139,10 @@ TEST_F(GraphToolsTest, canDetectLoopsWhileSortTing) {
     CONNECT(4, 8);
     CONNECT(8, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
-    ASSERT_ANY_THROW(CNNNetSortTopologically(mockNet));
+    ASSERT_ANY_THROW(CNNNetSortTopologically(*mockNet));
 }
 
 
@@ -154,11 +154,11 @@ TEST_F(GraphToolsTest, canSortIfInputsPointsToLayerWithMultiInputs) {
     CONNECT(3, 5);
     CONNECT(5, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    auto sorted = CNNNetSortTopologically(mockNet);
+    auto sorted = CNNNetSortTopologically(*mockNet);
 
     vector<vector<string>> expected = {
         {"1", "3", "4", "5", "2"},
@@ -203,10 +203,10 @@ TEST_F(GraphToolsTest, canGetAllMemoryInputsLayersFromStandardInputs) {
     CONNECT(5, 7);
 
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareSomeInputs(maps, {1});
     })));
-    auto allInputLayers = CNNNetGetAllInputLayers(mockNet);
+    auto allInputLayers = CNNNetGetAllInputLayers(*mockNet);
     ASSERT_EQ(3, allInputLayers.size());
     auto element = allInputLayers.begin();
     ASSERT_STREQ("1", element->get()->name.c_str());
@@ -220,10 +220,10 @@ TEST_F(GraphToolsTest, canGetSingleInputLayer) {
     // 1->2
     CONNECT(1, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareSomeInputs(maps, {1});
     })));
-    auto allInputLayers = CNNNetGetAllInputLayers(mockNet);
+    auto allInputLayers = CNNNetGetAllInputLayers(*mockNet);
     ASSERT_EQ(1, allInputLayers.size());
 }
 
@@ -239,7 +239,7 @@ TEST_F(GraphToolsTest, canIterateOverCNNNetwork) {
     CONNECT(6, 7);
     CONNECT(7, 8);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
@@ -265,7 +265,7 @@ TEST_F(GraphToolsTest, canIterateOverCNNNetworkWithCycle) {
     CONNECT(3, 4);
     CONNECT(4, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
@@ -285,7 +285,7 @@ TEST_F(GraphToolsTest, canCompareCNNNetworkIterators) {
     CONNECT(1, 2);
     CONNECT(1, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
@@ -302,7 +302,7 @@ TEST_F(GraphToolsTest, canIterateOverEmptyNetwork) {
     CONNECT(1, 2);
     CONNECT(2, 1);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillOnce(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
@@ -318,11 +318,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSwapWithItself) {
     CONNECT(1, 2);
     CONNECT(2, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -338,11 +338,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSwapWithItself) {
 TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_1) {
     CONNECT(1, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -359,11 +359,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_2) {
     CONNECT(1, 2);
     CONNECT(2, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -381,11 +381,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_3) {
     CONNECT(1, 2);
     CONNECT(2, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -407,11 +407,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersDoesSwapDims) {
     SET_DIMS(2, {20, 1});
     SET_DIMS(3, {30, 1});
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -434,11 +434,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSimpleCase_4) {
     CONNECT(3, 4);
     CONNECT(4, 5);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -458,11 +458,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit) {
     CONNECT(1, 2);
     CONNECT(1, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -479,11 +479,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit_2) {
     CONNECT(1, 2);
     CONNECT(1, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -504,11 +504,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit_3) {
     CONNECT(2, 4);
     CONNECT(2, 5);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -532,11 +532,11 @@ TEST_F(GraphToolsTest, CNNNetSwapLayersSplit_4) {
     CONNECT(4, 2);
     CONNECT(4, 1);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_, _, _)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -562,11 +562,11 @@ TEST_F(GraphToolsTest, CanNotInsertLayerIntoNonAdjiacendLayers) {
     CONNECT(1, 2);
     CONNECT(2, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -580,11 +580,11 @@ TEST_F(GraphToolsTest, CanNotInsertLayerIntoNonAdjiacendLayers) {
 TEST_F(GraphToolsTest, CNNNetworkInsertLayerSimpleCase) {
     CONNECT(1, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -602,11 +602,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSimpleCaseWithMultipleOutputs) {
     CONNECT(1, 2);
     CONNECT(1, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -626,11 +626,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSimpleCaseWithMultipleInputs) {
     CONNECT(1, 2);
     CONNECT(3, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -650,11 +650,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSplitAndConcat) {
     CONNECT_FROM_PORT(1, 1, 2);
     CONNECT_FROM_PORT(1, 2, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0,1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -677,11 +677,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertLayerSplitAndConcat) {
 TEST_F(GraphToolsTest, CNNNetworkInsertAfterLastLayer) {
     CONNECT(1, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -698,11 +698,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertAfterAll) {
     CONNECT(1, 2);
     CONNECT(1, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -719,11 +719,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertAllAfterSplit) {
     CONNECT_FROM_PORT(1, 0, 2);
     CONNECT_FROM_PORT(1, 1, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -741,11 +741,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsert1AfterSplit) {
     CONNECT_FROM_PORT(1, 1, 3);
     CONNECT_FROM_PORT(1, 2, 4);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -764,11 +764,11 @@ TEST_F(GraphToolsTest, CNNNetworkInsertAfter2ConnectionsToEltwise) {
     CONNECT(1, 2);
     CONNECT(1, 2);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -786,11 +786,11 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveNullPointerLayer) {
     CONNECT_FROM_PORT(1, 1, 3);
     CONNECT_FROM_PORT(1, 2, 4);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -804,11 +804,11 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveInputOrOutputLayer) {
     CONNECT_FROM_PORT(2, 0, 3);
     CONNECT_FROM_PORT(1, 0, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -825,11 +825,11 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveLayerThaHas2Outputs) {
     CONNECT_FROM_PORT(1, 0, 3);
     CONNECT_FROM_PORT(5, 0, 4);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -853,11 +853,11 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveLayerSplit) {
     CONNECT_FROM_PORT(1, 1, 3);
     CONNECT_FROM_PORT(2, 0, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -883,11 +883,11 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveLayerSplit2) {
     CONNECT_FROM_PORT(2, 0, 4);
     CONNECT_FROM_PORT(2, 0, 5);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
@@ -911,11 +911,11 @@ TEST_F(GraphToolsTest, CNNNetworkRemoveSimpleLayer) {
     CONNECT_FROM_PORT(1, 0, 2);
     CONNECT_FROM_PORT(2, 0, 3);
 
-    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+    EXPECT_CALL(*mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
         prepareInputs(maps);
     })));
 
-    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+    EXPECT_CALL(*mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
         l = layerByName(name);
         return l== nullptr ? GENERAL_ERROR : OK;
     })));
index 2612f53..16f7564 100644 (file)
@@ -51,7 +51,13 @@ TEST_F(BlobProxyTests, shouldNotDeAllocate)
     SizeVector v = {1, 2, 3};
     auto allocator = createMockAllocator();
 
-    TBlobProxy<float> proxy(Precision::FP32, C, TBlob<float>({ Precision::FP32, v, CHW}, dynamic_pointer_cast<IAllocator>(allocator)), 2, {2});
+    TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
+
+    Blob::Ptr spBlob(&blob, [](Blob*) {
+        //don't delete
+    });
+
+    TBlobProxy<float> proxy(Precision::FP32, C, spBlob, 2, {2});
 
     EXPECT_EQ(((Blob&)proxy).deallocate(), false);
 }
@@ -72,7 +78,11 @@ TEST_F(BlobProxyTests, canAccessProxyBlobUsingBaseMethod)
     TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
     blob.allocate();
 
-    TBlobProxy<float> proxy(Precision::FP32, C, move(blob), 2, {2});
+    Blob::Ptr spBlob(&blob, [](Blob*) {
+        //don't delete
+    });
+
+    TBlobProxy<float> proxy(Precision::FP32, C, spBlob, 2, {2});
 
     auto proxyBuffer = proxy.buffer();
     float *ptr = (float*)(void*)proxyBuffer;
@@ -95,7 +105,11 @@ TEST_F(BlobProxyTests, canAccessProxyBlobUsingHelpers)
     TBlob<float> blob({Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
     blob.allocate();
 
-    TBlobProxy<float> proxy(Precision::FP32, C, std::move(blob), 2, {2});
+    Blob::Ptr spBlob(&blob, [](Blob*) {
+        //don't delete
+    });
+
+    TBlobProxy<float> proxy(Precision::FP32, C, spBlob, 2, {2});
 
     auto proxyData = proxy.data();
     float *ptr = (float * )&proxyData[0];
index 19547a5..f4bbeec 100644 (file)
@@ -513,9 +513,10 @@ TEST_F(BlobTests, cannotIncreaseSizeOfPreallocated) {
 
     float input[] = {0.1f, 0.2f, 0.3f};
     auto  b = make_shared_blob({ Precision::FP32, {1, 2}, HW }, input);
+    ASSERT_NE(nullptr, b->buffer().as<float*>());
 
     b->Resize({1,3});
-    //since allocator isno't releasing, user have to be carefull that this still use old array
+    //since allocator isn't releasing, user have to be carefull that this still use old array
     ASSERT_EQ(nullptr, b->buffer().as<float*>());
 
     b->Resize({1,1});
@@ -530,6 +531,8 @@ TEST_F(BlobTests, canAcceptpreallocatedSize) {
 
     float input[] = {0.1f, 0.2f, 0.3f};
     auto  b = make_shared_blob({ Precision::FP32, {1, 2}, HW }, input, 100);
+    ASSERT_NE(nullptr, b->buffer().as<float*>());
+
     b->Resize({1,101});
     //since allocator isn't releasing, user have to be carefull that this still use old array
     ASSERT_EQ(nullptr, b->buffer().as<float*>());
index d69972d..90cab63 100644 (file)
@@ -21,7 +21,6 @@ public:
 };
 
 TEST_F(CNNNetworkTests, throwsOnInitWithNull) {
-    IE_SUPPRESS_DEPRECATED_START
-    ASSERT_THROW(CNNNetwork network(nullptr), InferenceEngine::details::InferenceEngineException);
-    IE_SUPPRESS_DEPRECATED_END
+    std::shared_ptr<ICNNNetwork> nlptr = nullptr;
+    ASSERT_THROW(CNNNetwork network(nlptr), InferenceEngine::details::InferenceEngineException);
 }
index 022cc67..9fcb67a 100644 (file)
@@ -25,23 +25,18 @@ TEST_F(ExecutorManagerTests, canCreateSingleExecutorManager) {
 }
 
 TEST_F(ExecutorManagerTests, createDifferentExecutorsForDifferentDevices) {
-    auto device1 = TargetDeviceInfo::name(TargetDevice::eCPU);
-    auto device2 = TargetDeviceInfo::name(TargetDevice::eGPU);
-
-    auto executor1 = _manager.getExecutor(device1);
-    auto executor2 = _manager.getExecutor(device2);
+    auto executor1 = _manager.getExecutor("CPU");
+    auto executor2 = _manager.getExecutor("GPU");
 
     ASSERT_NE(executor1, executor2);
     ASSERT_EQ(2, _manager.getExecutorsNumber());
 }
 
 TEST_F(ExecutorManagerTests, returnTheSameExecutorForTheSameDevice) {
-    auto device1 = TargetDeviceInfo::name(TargetDevice::eCPU);
-    auto device2 = TargetDeviceInfo::name(TargetDevice::eGPU);
-    auto executor1 = _manager.getExecutor(device1);
-    auto executor2 = _manager.getExecutor(device2);
+    auto executor1 = _manager.getExecutor("CPU");
+    auto executor2 = _manager.getExecutor("GPU");
 
-    auto executor = _manager.getExecutor(device2);
+    auto executor = _manager.getExecutor("GPU");
 
     ASSERT_EQ(executor, executor2);
     ASSERT_EQ(2, _manager.getExecutorsNumber());
index 88816b7..7e258da 100644 (file)
@@ -43,7 +43,7 @@ protected:
 
     virtual void SetUp() {
         mock_plugin_impl.reset(new MockInferencePluginInternal());
-        plugin = details::shared_from_irelease(make_ie_compatible_plugin({2, 0, "test", "version"}, mock_plugin_impl));
+        plugin = details::shared_from_irelease(make_ie_compatible_plugin({{2, 1}, "test", "version"}, mock_plugin_impl));
         mockExeNetworkInternal = make_shared<MockExecutableNetworkInternal>();
     }
 
@@ -183,7 +183,7 @@ protected:
 
     virtual void SetUp() {
         mockPluginImpl = make_shared<MockInferencePluginInternal2>();
-        plugin = details::shared_from_irelease(make_ie_compatible_plugin({2, 0, "test", "version"}, mockPluginImpl));
+        plugin = details::shared_from_irelease(make_ie_compatible_plugin({{2, 1}, "test", "version"}, mockPluginImpl));
         mockExeNetwork = make_shared<MockIExecutableNetwork>();
     }
 
index 9bd254c..08d6c10 100644 (file)
@@ -22,7 +22,7 @@ class PluginBaseTests: public ::testing::Test {
     }
     virtual void SetUp() {
         mock_impl.reset(new MockPluginImpl());
-        plugin = details::shared_from_irelease(make_ie_compatible_plugin({2, 0, "test", "version"}, mock_impl));
+        plugin = details::shared_from_irelease(make_ie_compatible_plugin({{2, 1}, "test", "version"}, mock_impl));
     }
 };
 
@@ -33,7 +33,7 @@ TEST_F(PluginBaseTests, canReportVersion) {
     EXPECT_STREQ(V->buildNumber, "test");
     EXPECT_STREQ(V->description, "version");
     EXPECT_EQ(V->apiVersion.major, 2);
-    EXPECT_EQ(V->apiVersion.minor, 0);
+    EXPECT_EQ(V->apiVersion.minor, 1);
 
 }
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/device_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/device_tests.cpp
deleted file mode 100644 (file)
index 280cc18..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include "ie_device.hpp"
-#include "details/ie_exception.hpp"
-
-using namespace InferenceEngine;
-
-class DeviceTests : public ::testing::Test {
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
-public:
-
-};
-
-TEST_F(DeviceTests, internalFindThrowsOnBadDevice) {
-    FindPluginRequest request = { TargetDevice::eBalanced };
-    ASSERT_THROW(findPlugin(request), InferenceEngine::details::InferenceEngineException);
-}
-
-TEST_F(DeviceTests, externalFindReturnsErrorStatus) {
-    FindPluginRequest request = { TargetDevice::eBalanced };
-    FindPluginResponse result;
-    ResponseDesc desc;
-    StatusCode status = findPlugin(request, result, &desc);
-    ASSERT_EQ(status, GENERAL_ERROR);
-}
-
-#if defined(ENABLE_MKL_DNN)
-TEST_F(DeviceTests, externalFindPopulatesResult) {
-    FindPluginRequest request = { TargetDevice::eCPU };
-    FindPluginResponse result;
-    ResponseDesc desc;
-    StatusCode status = findPlugin(request, result, &desc);
-    ASSERT_EQ(status, OK);
-    ASSERT_NE(result.names.size(), 0);
-}
-#endif
-
-TEST_F(DeviceTests, returnsProperDeviceName) {
-    ASSERT_STREQ(getDeviceName(TargetDevice::eDefault), "Default");
-    ASSERT_STREQ(getDeviceName(TargetDevice::eBalanced), "Balanced");
-    ASSERT_STREQ(getDeviceName(TargetDevice::eCPU), "CPU");
-    ASSERT_STREQ(getDeviceName(TargetDevice::eGPU), "GPU");
-    ASSERT_STREQ(getDeviceName(TargetDevice::eFPGA), "FPGA");
-    ASSERT_STREQ(getDeviceName(TargetDevice::eMYRIAD), "MYRIAD");
-    ASSERT_STREQ(getDeviceName(TargetDevice::eGNA), "GNA");
-    ASSERT_STREQ(getDeviceName(TargetDevice::eHETERO), "HETERO");
-    ASSERT_STREQ(getDeviceName(static_cast<TargetDevice>(-1)), "Unknown device");
-    //off by one test - might not be enough
-    ASSERT_STREQ(getDeviceName(static_cast<TargetDevice>((uint8_t)TargetDevice::eHETERO + 1)), "Unknown device");
-}
index 3c4cbf2..6920500 100644 (file)
@@ -6,6 +6,7 @@
 #include <single_layer_common.hpp>
 
 #include <cpp/ie_cnn_net_reader.h>
+#include <net_pass.h>
 
 using namespace ::testing;
 using namespace std;
@@ -96,6 +97,103 @@ class LocaleTests : public ::testing::Test {
 </net>
 )V0G0N";
 
+
+    std::string _model_LSTM = R"V0G0N(
+ <net batch="1" name="model" version="2">
+    <layers>
+        <layer id="0" name="Input" precision="FP32" type="Input">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>30</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="Split" precision="FP32" type="Split">
+            <data axis="1" />
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>30</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="2">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="3">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="2" name="LSTMCell" precision="FP32" type="LSTMCell">
+            <data hidden_size="10" clip="0.2"/>
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="2">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="4">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+            <blobs>
+                <weights offset="0" size="3200"/>
+                <biases offset="3200" size="160"/>
+            </blobs>
+        </layer>
+        <layer name="Eltwise" type="Eltwise" id="3" precision="FP32">
+            <data operation="sum" />
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </input>
+            <output>
+                <port id="2">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        </layers>
+        <edges>
+            <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+            <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
+            <edge from-layer="1" from-port="2" to-layer="2" to-port="1"/>
+            <edge from-layer="1" from-port="3" to-layer="2" to-port="2"/>
+            <edge from-layer="2" from-port="3" to-layer="3" to-port="0"/>
+            <edge from-layer="2" from-port="4" to-layer="3" to-port="1"/>
+        </edges>
+    </net>
+)V0G0N";
+
 protected:
     std::string getModel() const {
         std::string model = _model;
@@ -108,28 +206,35 @@ protected:
         return model;
     }
 
-    void testBody() const {
+    void testBody(bool isLSTM = false) const {
         CNNNetReader reader;
 
         // This model contains layers with float attributes.
         // Conversion from string may be affected by locale.
-        auto model = getModel();
+        std::string model = isLSTM ? _model_LSTM : getModel();
         reader.ReadNetwork(model.data(), model.length());
         auto net = reader.getNetwork();
 
-        auto power_layer = dynamic_pointer_cast<PowerLayer>(net.getLayerByName("power"));
-        ASSERT_EQ(power_layer->scale, 0.75f);
-        ASSERT_EQ(power_layer->offset, 0.35f);
-        ASSERT_EQ(power_layer->power, 0.5f);
+        if (!isLSTM) {
+            auto power_layer = dynamic_pointer_cast<PowerLayer>(net.getLayerByName("power"));
+            ASSERT_EQ(power_layer->scale, 0.75f);
+            ASSERT_EQ(power_layer->offset, 0.35f);
+            ASSERT_EQ(power_layer->power, 0.5f);
 
-        auto sum_layer = dynamic_pointer_cast<EltwiseLayer>(net.getLayerByName("sum"));
-        std::vector<float> ref_coeff {0.77f, 0.33f};
-        ASSERT_EQ(sum_layer->coeff, ref_coeff);
+            auto sum_layer = dynamic_pointer_cast<EltwiseLayer>(net.getLayerByName("sum"));
+            std::vector<float> ref_coeff{0.77f, 0.33f};
+            ASSERT_EQ(sum_layer->coeff, ref_coeff);
 
-        auto info = net.getInputsInfo();
-        auto preproc = info.begin()->second->getPreProcess();
-        ASSERT_EQ(preproc[0]->stdScale, 0.1f);
-        ASSERT_EQ(preproc[0]->meanValue, 104.006f);
+            auto info = net.getInputsInfo();
+            auto preproc = info.begin()->second->getPreProcess();
+            ASSERT_EQ(preproc[0]->stdScale, 0.1f);
+            ASSERT_EQ(preproc[0]->meanValue, 104.006f);
+        } else {
+            InferenceEngine::NetPass::UnrollRNN_if(net, [] (const RNNCellBase& rnn) -> bool { return true; });
+            auto lstmcell_layer = dynamic_pointer_cast<LSTMCell>(net.getLayerByName("LSTMCell"));
+            float ref_coeff(0.2f);
+            ASSERT_EQ(lstmcell_layer->clip, ref_coeff);
+        }
     }
 };
 
@@ -145,6 +250,18 @@ TEST_F(LocaleTests, WithUSLocale) {
     setlocale(LC_ALL, "");
 }
 
+TEST_F(LocaleTests, WithRULocaleOnLSTM) {
+    setlocale(LC_ALL, "ru_RU.UTF-8");
+    testBody(true);
+    setlocale(LC_ALL, "");
+}
+
+TEST_F(LocaleTests, WithUSLocaleOnLSTM) {
+    setlocale(LC_ALL, "en_US.UTF-8");
+    testBody(true);
+    setlocale(LC_ALL, "");
+}
+
 TEST_F(LocaleTests, DISABLED_WithRULocaleCPP) {
     auto prev = std::locale();
     std::locale::global(std::locale("ru_RU.UTF-8"));
index 7609a80..4b707f7 100644 (file)
@@ -25,7 +25,7 @@ protected:
     void TearDown() override {}
     void SetUp() override {}
 
-    void compareICNNNetworks(const ICNNNetwork& newNetwork, const ICNNNetwork& oldNetwork) {
+    void compareICNNNetworks(ICNNNetwork::Ptr newNetwork, const CNNNetwork& oldNetwork) {
         auto compareParamVal = [](const std::string& val1, const std::string& val2) -> bool {
             std::vector<std::string> vals1, vals2;
             std::stringstream ss1(val1);
@@ -62,10 +62,10 @@ protected:
             return true;
         };
         std::vector<std::string> err_log;
-        CNNNetwork network((ICNNNetwork*)&newNetwork);
-        CNNNetwork refNetwork((ICNNNetwork*)&oldNetwork);
-        if (newNetwork.layerCount() != oldNetwork.layerCount())
-            THROW_IE_EXCEPTION << "ICNNNetworks have different numbers of layers! " + std::to_string(newNetwork.layerCount()) + " and " + std::to_string(oldNetwork.layerCount());
+        CNNNetwork network(newNetwork);
+        CNNNetwork refNetwork(oldNetwork);
+        if (newNetwork->layerCount() != oldNetwork.layerCount())
+            THROW_IE_EXCEPTION << "ICNNNetworks have different numbers of layers! " + std::to_string(newNetwork->layerCount()) + " and " + std::to_string(oldNetwork.layerCount());
         auto newIterator = network.begin();
         auto oldIterator = refNetwork.begin();
         for (; newIterator != network.end() && oldIterator != refNetwork.end(); newIterator++, oldIterator++) {
@@ -120,12 +120,10 @@ protected:
 
         InputsDataMap newInput;
         OutputsDataMap newOutput;
-        newNetwork.getInputsInfo(newInput);
-        newNetwork.getOutputsInfo(newOutput);
-        InputsDataMap oldInput;
-        OutputsDataMap oldOutput;
-        oldNetwork.getInputsInfo(oldInput);
-        oldNetwork.getOutputsInfo(oldOutput);
+        newNetwork->getInputsInfo(newInput);
+        newNetwork->getOutputsInfo(newOutput);
+        InputsDataMap oldInput = oldNetwork.getInputsInfo();
+        OutputsDataMap oldOutput = oldNetwork.getOutputsInfo();
 
         bool success = newInput.size() == oldInput.size();
         for (const auto& it : newInput) {
@@ -181,7 +179,7 @@ TEST_F(NGraphReaderTests, ReadScalarNetwork) {
     Blob::CPtr blob;
     auto nGraph = reader.read(model, blob);
     ICNNNetwork::Ptr network = convertFunctionToICNNNetwork(nGraph);
-    CNNNetwork cnetwork(network.get());
+    CNNNetwork cnetwork(network);
     cnetwork.begin();
 }
 
@@ -499,7 +497,7 @@ std::string modelV5 = R"V0G0N(
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, DISABLED_ReadProposalNetwork) {
@@ -661,7 +659,7 @@ std::string modelV5 = R"V0G0N(
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadPriorBoxNetwork) {
@@ -906,7 +904,7 @@ std::string modelV5 = R"V0G0N(
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadSplitNetwork) {
@@ -1030,7 +1028,7 @@ TEST_F(NGraphReaderTests, ReadSplitNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, DISABLED_ReadDetectionOutputNetwork) {
@@ -1187,7 +1185,7 @@ TEST_F(NGraphReaderTests, DISABLED_ReadDetectionOutputNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadConcatNetwork) {
@@ -1322,7 +1320,7 @@ TEST_F(NGraphReaderTests, ReadConcatNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, DISABLED_ReadTopKNetwork) {
@@ -1431,7 +1429,7 @@ TEST_F(NGraphReaderTests, DISABLED_ReadTopKNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadMVNNetwork) {
@@ -1532,7 +1530,7 @@ TEST_F(NGraphReaderTests, ReadMVNNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadLrnNetwork) {
@@ -1633,7 +1631,7 @@ TEST_F(NGraphReaderTests, ReadLrnNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, DISABLED_ReadLrnNetwork2) {
@@ -1774,7 +1772,7 @@ TEST_F(NGraphReaderTests, DISABLED_ReadLrnNetwork2) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 
@@ -1876,7 +1874,7 @@ TEST_F(NGraphReaderTests, ReadClampNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadSigmoidNetwork) {
@@ -1975,7 +1973,7 @@ TEST_F(NGraphReaderTests, ReadSigmoidNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadPReLUNetwork) {
@@ -2098,7 +2096,7 @@ TEST_F(NGraphReaderTests, ReadPReLUNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadELUNetwork) {
@@ -2199,7 +2197,7 @@ TEST_F(NGraphReaderTests, ReadELUNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadShapeOfNetwork) {
@@ -2289,7 +2287,7 @@ TEST_F(NGraphReaderTests, ReadShapeOfNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadLeakyReLUNetwork) {
@@ -2390,7 +2388,7 @@ TEST_F(NGraphReaderTests, ReadLeakyReLUNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadTanhNetwork) {
@@ -2489,7 +2487,7 @@ TEST_F(NGraphReaderTests, ReadTanhNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadExpNetwork) {
@@ -2588,7 +2586,7 @@ TEST_F(NGraphReaderTests, ReadExpNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadReLUNetwork) {
@@ -2687,7 +2685,7 @@ TEST_F(NGraphReaderTests, ReadReLUNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadBroadcastNetwork) {
@@ -2855,7 +2853,7 @@ TEST_F(NGraphReaderTests, ReadSoftMaxNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadMaxPoolNetwork) {
@@ -2956,7 +2954,7 @@ TEST_F(NGraphReaderTests, ReadMaxPoolNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 TEST_F(NGraphReaderTests, ReadAvgPoolNetwork) {
     std::string model = R"V0G0N(
@@ -3056,7 +3054,7 @@ TEST_F(NGraphReaderTests, ReadAvgPoolNetwork) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 };
 
 TEST_F(NGraphReaderTests, ReadReLUNetworkWithoutTopologicalOrder) {
@@ -3155,7 +3153,7 @@ TEST_F(NGraphReaderTests, ReadReLUNetworkWithoutTopologicalOrder) {
     InferenceEngine::CNNNetReader net_reader;
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadTileNetwork) {
@@ -3276,7 +3274,7 @@ TEST_F(NGraphReaderTests, ReadTileNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadTileNetwork2) {
@@ -3437,7 +3435,7 @@ TEST_F(NGraphReaderTests, ReadTileNetwork2) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadTransposeNetwork) {
@@ -3558,7 +3556,7 @@ TEST_F(NGraphReaderTests, ReadTransposeNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadReshapeNetwork) {
@@ -3684,7 +3682,7 @@ TEST_F(NGraphReaderTests, ReadReshapeNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadSqueeze) {
@@ -3812,7 +3810,7 @@ TEST_F(NGraphReaderTests, ReadSqueeze) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadInterpolateNetwork) {
@@ -3932,7 +3930,7 @@ TEST_F(NGraphReaderTests, ReadInterpolateNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadMatMulNetwork) {
@@ -4038,7 +4036,7 @@ TEST_F(NGraphReaderTests, ReadMatMulNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadDeconvolution3DNetwork) {
@@ -4172,7 +4170,7 @@ TEST_F(NGraphReaderTests, ReadDeconvolution3DNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadDeconvolution2DNetwork) {
@@ -4297,7 +4295,7 @@ TEST_F(NGraphReaderTests, ReadDeconvolution2DNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadConvolutionNetwork) {
@@ -4422,7 +4420,7 @@ TEST_F(NGraphReaderTests, ReadConvolutionNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadMaximumNetwork) {
@@ -4565,7 +4563,7 @@ TEST_F(NGraphReaderTests, ReadMaximumNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadDivideNetwork) {
@@ -4708,7 +4706,7 @@ TEST_F(NGraphReaderTests, ReadDivideNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadPowNetwork) {
@@ -4851,7 +4849,7 @@ TEST_F(NGraphReaderTests, ReadPowNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadMultiplyNetwork) {
@@ -4994,7 +4992,7 @@ TEST_F(NGraphReaderTests, ReadMultiplyNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ReadAddNoBroadcastNetwork) {
@@ -5137,7 +5135,7 @@ TEST_F(NGraphReaderTests, ReadAddNoBroadcastNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, DISABLED_ReadAddNetwork) {
@@ -5276,7 +5274,7 @@ TEST_F(NGraphReaderTests, DISABLED_ReadAddNetwork) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvBiasFusion) {
@@ -5478,7 +5476,7 @@ TEST_F(NGraphReaderTests, ConvBiasFusion) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvBiasFusionFP16) {
@@ -5680,7 +5678,7 @@ TEST_F(NGraphReaderTests, ConvBiasFusionFP16) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, DISABLED_MatMulBiasFusion) {
@@ -5852,7 +5850,7 @@ TEST_F(NGraphReaderTests, DISABLED_MatMulBiasFusion) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, MatMulBiasFusionNoBroadcast) {
@@ -5987,7 +5985,7 @@ TEST_F(NGraphReaderTests, MatMulBiasFusionNoBroadcast) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertMulAddToScaleShift) {
@@ -6244,7 +6242,7 @@ TEST_F(NGraphReaderTests, ConvertMulAddToScaleShift) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertMulAddToPower) {
@@ -6508,7 +6506,7 @@ TEST_F(NGraphReaderTests, ConvertMulAddToPower) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertMulToPower) {
@@ -6687,7 +6685,7 @@ TEST_F(NGraphReaderTests, ConvertMulToPower) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertAddToPower) {
@@ -6866,7 +6864,7 @@ TEST_F(NGraphReaderTests, ConvertAddToPower) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertMulToScaleShift) {
@@ -7039,7 +7037,7 @@ TEST_F(NGraphReaderTests, ConvertMulToScaleShift) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertAddToScaleShift) {
@@ -7212,7 +7210,7 @@ TEST_F(NGraphReaderTests, ConvertAddToScaleShift) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertMulToEltwise) {
@@ -7398,7 +7396,7 @@ TEST_F(NGraphReaderTests, ConvertMulToEltwise) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertAddToEltwise) {
@@ -7584,7 +7582,7 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-    compareICNNNetworks(*network, net_reader.getNetwork());
+    compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertBroadcastToTiles1) {
@@ -7769,7 +7767,7 @@ TEST_F(NGraphReaderTests, ConvertBroadcastToTiles1) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-   compareICNNNetworks(*network, net_reader.getNetwork());
+   compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertBroadcastToTiles2) {
@@ -7967,7 +7965,7 @@ TEST_F(NGraphReaderTests, ConvertBroadcastToTiles2) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-   compareICNNNetworks(*network, net_reader.getNetwork());
+   compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, ConvertBroadcastToTiles3) {
@@ -8107,7 +8105,7 @@ TEST_F(NGraphReaderTests, ConvertBroadcastToTiles3) {
     net_reader.ReadNetwork(modelV5.data(), modelV5.length());
     net_reader.SetWeights(tWeights);
 
-   compareICNNNetworks(*network, net_reader.getNetwork());
+   compareICNNNetworks(network, net_reader.getNetwork());
 }
 
 TEST_F(NGraphReaderTests, DISABLED_ConvertMulAddToScaleShiftTest) {
@@ -8288,5 +8286,5 @@ TEST_F(NGraphReaderTests, DISABLED_ConvertMulAddToScaleShiftTest) {
    net_reader.ReadNetwork(modelV5.data(), modelV5.length());
    net_reader.SetWeights(tWeights);
 
-   compareICNNNetworks(*network, net_reader.getNetwork());
+   compareICNNNetworks(network, net_reader.getNetwork());
 }
index e3855bf..c280ed8 100644 (file)
@@ -95,27 +95,11 @@ TEST_F(PluginDispatcherTests, throwsOnUnknownPlugin) {
     ASSERT_THROW(dispatcher.getPluginByName(nameExt("unknown_plugin")), InferenceEngine::details::InferenceEngineException);
 }
 
-TEST_F(PluginDispatcherTests, throwsOnDeviceWithoutPlugins) {
-    PluginDispatcher dispatcher({ "./", "./lib" });
-    ASSERT_THROW(dispatcher.getSuitablePlugin(TargetDevice::eBalanced),
-                                                    InferenceEngine::details::InferenceEngineException);
-}
-
 ACTION(ThrowException)
 {
     THROW_IE_EXCEPTION << "Exception!";
 }
 
-TEST_F(PluginDispatcherTests, triesToLoadEveryPluginSuitableForDevice) {
-    MockDispatcher disp({ "./", "./lib" });
-
-    ON_CALL(disp, getPluginByName(_)).WillByDefault(ThrowException());
-#ifdef ENABLE_MKL_DNN
-    EXPECT_CALL(disp, getPluginByName(nameExt("MKLDNNPlugin"))).Times(1);
-#endif
-    ASSERT_THROW(disp.getSuitablePlugin(TargetDevice::eCPU), InferenceEngine::details::InferenceEngineException);
-}
-
 #if defined(ENABLE_MKL_DNN)
 TEST_F(PluginDispatcherTests, returnsIfLoadSuccessfull) {
     MockDispatcher disp({ "./", "./lib" });
@@ -123,7 +107,7 @@ TEST_F(PluginDispatcherTests, returnsIfLoadSuccessfull) {
     auto ptr = dispatcher.getPluginByName(nameExt("mock_engine"));
 
     EXPECT_CALL(disp, getPluginByName(_)).WillOnce(Return(ptr));
-    ASSERT_NO_THROW(disp.getSuitablePlugin(TargetDevice::eCPU));
+    ASSERT_NO_THROW(disp.getPluginByName(nameExt("MKLDNNPlugin")));
 }
 
 #if defined ENABLE_MKL_DNN && !defined _WIN32 && !defined __CYGWIN__ && !defined __APPLE__
diff --git a/inference-engine/tests/unit/inference_engine_tests/range_iterator_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/range_iterator_tests.cpp
deleted file mode 100644 (file)
index 367840a..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <locale>
-#include "range_iterator.hpp"
-#include <cctype>
-
-using namespace std;
-using namespace InferenceEngine;
-
-class RangeIteratorTests: public ::testing::Test {
- protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
- public:
-
-};
-
-TEST_F(RangeIteratorTests, canCompareSameStringsInsensitive) {
-    ASSERT_FALSE(std::lexicographical_compare(null_terminated_string("UPPer"),
-                                             null_terminated_string_end(),
-                                             null_terminated_string("upper"),
-                                             null_terminated_string_end(), [](char a, char b) {
-            std::locale loc;
-            return std::tolower(a, loc) > std::tolower(b, loc);
-        }));
-}
-
-TEST_F(RangeIteratorTests, canCompareNotSameStringsInsensitive) {
-    ASSERT_TRUE(std::lexicographical_compare(null_terminated_string("UPPer"),
-                                         null_terminated_string_end(),
-                                         null_terminated_string("uppel"),
-                                         null_terminated_string_end(), [](char a, char b) {
-        std::locale loc;
-        return std::tolower(a, loc) > std::tolower(b, loc);
-    }));
-    
-}
-
-TEST_F(RangeIteratorTests, cannotDereferenceEndIterator) {
-    ASSERT_ANY_THROW(*null_terminated_string_end());
-    ASSERT_ANY_THROW(++null_terminated_string_end());
-    ASSERT_ANY_THROW(null_terminated_string_end()++);
-}
index 81fad63..5107576 100644 (file)
@@ -19,6 +19,7 @@
 #include "util_test.hpp"
 #include "util_const_infer_test.hpp"
 #include <details/ie_cnn_network_tools.h>
+#include <precision_utils.h>
 
 namespace IE = InferenceEngine;
 
@@ -123,7 +124,8 @@ IE::BlobMap RemoveLayerTests::fillConstData(const std::vector<std::string>& cons
             IE::Blob::Ptr blob = make_blob_with_precision(desc);
             blob->allocate();
             auto* buffer = blob->buffer().as<float*>();
-            for (int i = 0; i < blob->size(); i++) {
+            size_t buffer_length = blob->byteSize() / sizeof(float);
+            for (int i = 0; i < buffer_length; i++) {
                 buffer[i] = i + 1;
             }
             constData[outData->getName()] = blob;
@@ -145,6 +147,75 @@ IE::BlobMap RemoveLayerTests::initConstLayers(const std::vector<std::string>& co
     return customBlobs;
 }
 
+IE::BlobMap RemoveLayerTests::fillConstDataDiffPrec (const std::vector<std::string>& constLayers) {
+    IE::BlobMap constData;
+    for (const auto& name:constLayers) {
+        auto layer = getLayer(name);
+        for (const auto& outData:layer->outData) {
+            IE::TensorDesc desc = outData->getTensorDesc();
+            IE::Blob::Ptr blob = make_blob_with_precision(desc);
+            blob->allocate();
+            switch(layer->precision) {
+                case IE::Precision::U8: {
+                    auto *buffer = blob->buffer().as<uint8_t *>();
+                    for (int i = 0; i < blob->size(); i++) {
+                        buffer[i] = i + 2;
+                    }
+                    break;
+                }
+                case IE::Precision::I32: {
+                    auto *buffer = blob->buffer().as<int *>();
+                    for (int i = 0; i < blob->size(); i++) {
+                        buffer[i] = i + 2;
+                    }
+                    break;
+                }
+                case IE::Precision::I64: {
+                    auto *buffer = blob->buffer().as<long long int *>();
+                    for (int i = 0; i < blob->size(); i++) {
+                        buffer[i] = i + 2;
+                    }
+                    break;
+                }
+                case IE::Precision::FP16: {
+                    auto *buffer = blob->buffer().as<IE::ie_fp16 *>();
+                    float j = 0;
+                    for (int i = 0; i < blob->size(); i++) {
+                        buffer[i] = j + (float)2;
+                        buffer[i] = IE::PrecisionUtils::f32tof16(buffer[i]);
+                        j++;
+                    }
+                    break;
+                }
+                case IE::Precision::FP32: {
+                    auto *buffer = blob->buffer().as<float *>();
+                    for (int i = 0; i < blob->size(); i++) {
+                        buffer[i] = i + 2;
+                    }
+                    break;
+                }
+                default:
+                    THROW_IE_EXCEPTION << "Not supported data type";
+            }
+            constData[outData->getName()] = blob;
+        }
+    }
+    return constData;
+}
+
+IE::BlobMap RemoveLayerTests::initConstLayersDiffPrec(const std::vector<std::string> &constLayers) {
+    for (const auto& name : constLayers) {
+        getLayer(name)->type = "Const";
+    }
+    IE::BlobMap customBlobs = fillConstDataDiffPrec(constLayers);
+    for (const auto& layerName: constLayers) {
+        auto layer = getLayer(layerName);
+        layer->type = "Const";
+        layer->blobs["custom"] = customBlobs[layer->outData[0]->getName()];
+    }
+    return customBlobs;
+}
+
 TEST_F(RemoveLayerTests, canTrimL2) {
     auto layer1 = getLayer("layer1");
     auto layer4 = getLayer("layer4");
@@ -828,3 +899,712 @@ TEST_F(AdvancedShapeInferTests, canReshapeWithScalar) {
     ASSERT_EQ(getData("data1")->getTensorDesc().getDims(), newInShape);
     ASSERT_EQ(getData("data3")->getTensorDesc().getDims(), newOutShape);
 }
+
+TEST_F(AdvancedShapeInferTests, canFoldConstWithOneHot) {
+    //   Const-d1-OneHot-d2
+    //                     \
+    //              I1-d3-Eltw(Sum)-d4
+    auto testFunc = [&](IE::Precision precision) {
+        netBuilder = NetBuilder();
+        net = netBuilder
+                .data("data1", IE::SizeVector{2}, precision, IE::Layout::C)
+                .data("data2", IE::SizeVector{2, 10}, precision, IE::Layout::NC)
+                .data("data3", IE::SizeVector{2, 10}, precision, IE::Layout::NC)
+                .data("data4", IE::SizeVector{2, 10}, precision, IE::Layout::NC)
+                .layer<IE::CNNLayer>(IE::LayerParams{"const", "dummy", precision})
+                .layer<IE::CNNLayer>(IE::LayerParams{"oneHot", "OneHot", precision})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input", "input", precision})
+                .layer<IE::CNNLayer>(IE::LayerParams{"eltwise", "Eltwise", precision})
+                .linkToData("const", "data1")
+                .linkDataTo("data1", "oneHot")
+                .linkToData("oneHot", "data2")
+                .linkDataTo("data2", "eltwise")
+                .linkToData("input", "data3")
+                .linkDataTo("data3", "eltwise")
+                .linkToData("eltwise", "data4")
+                .addInput("data3")
+                .finalize();
+        getLayer("oneHot")->params = {
+                {"axis",      "-1"},
+                {"depth",     "10"},
+                {"off_value", "1.0"},
+                {"on_value",  "1.0"}
+        };
+        getLayer("eltwise")->params = {
+                {"operation", "sum"}
+        };
+        originalLayersNum = net->allLayers().size();
+
+        IE::CNNNetwork cnnNetwork(net);
+        initConstLayers({"const"});
+        IE::ConstTransformer transformator(net.get());
+        transformator.fullTrim();
+
+        ASSERT_EQ(net->allLayers().size(), originalLayersNum - 1);
+    };
+
+    testFunc(IE::Precision::FP32);
+    testFunc(IE::Precision::FP16);
+    testFunc(IE::Precision::Q78);
+    testFunc(IE::Precision::I16);
+    testFunc(IE::Precision::U8);
+    testFunc(IE::Precision::I8);
+    testFunc(IE::Precision::U16);
+    testFunc(IE::Precision::I32);
+    testFunc(IE::Precision::I64);
+}
+
+TEST_F(AdvancedShapeInferTests, MulWithTensorConstInferTest) {
+
+    auto testFunc = [&](IE::Precision precisionInData1, IE::Precision precisionInData2, IE::Precision precisionOutData) {
+
+        netBuilder = NetBuilder();
+        net = netBuilder
+                .data("data1", IE::SizeVector{2, 2}, precisionInData1, IE::Layout::NC)
+                .data("data2", IE::SizeVector{2, 2}, precisionInData2, IE::Layout::NC)
+                .data("data3", IE::SizeVector{2, 2}, precisionOutData, IE::Layout::NC)
+                .layer<IE::CNNLayer>(IE::LayerParams{"mulLayer", "Eltwise"})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input1", "Const", precisionInData1})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Const", precisionInData2})
+                .linkToData("input1", "data1")
+                .linkToData("input2", "data2")
+                .linkDataTo("data1", "mulLayer")
+                .linkDataTo("data2", "mulLayer")
+                .linkToData("mulLayer", "data3")
+                .addInput("data1")
+                .addInput("data2")
+                .finalize();
+
+        getLayer("mulLayer")->params = {
+                {"operation", "mul"}
+        };
+
+        IE::CNNNetwork cnnNetwork(net);
+        initConstLayersDiffPrec({"input1", "input2"});
+        float ref[] = {4, 9, 16, 25};
+        if (precisionOutData == IE::Precision::FP16) {
+            for (int i = 0; i < 4; i++)
+                ref[i] = IE::PrecisionUtils::f32tof16(ref[i]);
+        }
+        IE::ConstTransformer transformator(net.get());
+        transformator.foldConstSubgraphs();
+        switch(precisionOutData) {
+            case IE::Precision::U8: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<uint8_t *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I32: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I64: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<long long int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP16: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<IE::ie_fp16 *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP32: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<float *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            default:
+                THROW_IE_EXCEPTION << "Unsupported precision!";
+        }
+    };
+
+    testFunc(IE::Precision::U8, IE::Precision::U8, IE::Precision::U8);
+    testFunc(IE::Precision::U8, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::U8, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::U8, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::U8, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::U8, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::U8, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I32, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::U8, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::U8, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP16);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP16);
+}
+
+
+TEST_F(AdvancedShapeInferTests, MulWithScalarConstInferTest) {
+
+    auto testFunc = [&](IE::Precision precisionInData1, IE::Precision precisionInData2, IE::Precision precisionOutData) {
+
+        netBuilder = NetBuilder();
+        net = netBuilder
+                .data("data1", IE::SizeVector{2, 2}, precisionInData1, IE::Layout::NC)
+                .data("data2", IE::SizeVector{}, precisionInData2, IE::Layout::SCALAR)
+                .data("data3", IE::SizeVector{2, 2}, precisionOutData, IE::Layout::NC)
+                .layer<IE::CNNLayer>(IE::LayerParams{"mulLayer", "Eltwise"})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input1", "Const", precisionInData1})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Const", precisionInData2})
+                .linkToData("input1", "data1")
+                .linkToData("input2", "data2")
+                .linkDataTo("data1", "mulLayer")
+                .linkDataTo("data2", "mulLayer")
+                .linkToData("mulLayer", "data3")
+                .addInput("data1")
+                .addInput("data2")
+                .finalize();
+
+        getLayer("mulLayer")->params = {
+                {"operation", "mul"}
+        };
+
+        IE::CNNNetwork cnnNetwork(net);
+        initConstLayersDiffPrec({"input1", "input2"});
+        float ref[] = {4, 6, 8, 10};
+        if (precisionOutData == IE::Precision::FP16) {
+            for (int i = 0; i < 4; i++)
+                ref[i] = IE::PrecisionUtils::f32tof16(ref[i]);
+        }
+        IE::ConstTransformer transformator(net.get());
+        transformator.foldConstSubgraphs();
+        switch(precisionOutData) {
+            case IE::Precision::U8: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<uint8_t *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I32: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I64: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<long long int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP16: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<IE::ie_fp16 *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP32: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<float *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            default:
+                THROW_IE_EXCEPTION << "Unsupported precision!";
+        }
+    };
+
+    testFunc(IE::Precision::U8, IE::Precision::U8, IE::Precision::U8);
+    testFunc(IE::Precision::U8, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::U8, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::U8, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::U8, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::U8, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::U8, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I32, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::U8, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::U8, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP16);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP16);
+}
+
+TEST_F(AdvancedShapeInferTests, AddWithScalarConstInferTest) {
+
+    auto testFunc = [&](IE::Precision precisionInData1, IE::Precision precisionInData2, IE::Precision precisionOutData) {
+
+        netBuilder = NetBuilder();
+        net = netBuilder
+                .data("data1", IE::SizeVector{2, 2}, precisionInData1, IE::Layout::NC)
+                .data("data2", IE::SizeVector{}, precisionInData2, IE::Layout::SCALAR)
+                .data("data3", IE::SizeVector{2, 2}, precisionOutData, IE::Layout::NC)
+                .layer<IE::CNNLayer>(IE::LayerParams{"addLayer", "Eltwise"})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input1", "Const", precisionInData1})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Const", precisionInData2})
+                .linkToData("input1", "data1")
+                .linkToData("input2", "data2")
+                .linkDataTo("data1", "addLayer")
+                .linkDataTo("data2", "addLayer")
+                .linkToData("addLayer", "data3")
+                .addInput("data1")
+                .addInput("data2")
+                .finalize();
+
+        getLayer("addLayer")->params = {
+                {"operation", "sum"}
+        };
+
+        IE::CNNNetwork cnnNetwork(net);
+        initConstLayersDiffPrec({"input1", "input2"});
+        float ref[] = {4, 5, 6, 7};
+        if (precisionOutData == IE::Precision::FP16) {
+            for (int i = 0; i < 4; i++)
+                ref[i] = IE::PrecisionUtils::f32tof16(ref[i]);
+        }
+        IE::ConstTransformer transformator(net.get());
+        transformator.foldConstSubgraphs();
+        switch(precisionOutData) {
+            case IE::Precision::U8: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<uint8_t *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I32: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I64: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<long long int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP16: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<IE::ie_fp16 *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP32: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<float *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            default:
+                THROW_IE_EXCEPTION << "Unsupported precision!";
+        }
+    };
+
+    testFunc(IE::Precision::U8, IE::Precision::U8, IE::Precision::U8);
+    testFunc(IE::Precision::U8, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::U8, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::U8, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::U8, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::U8, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::U8, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I32, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::U8, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::U8, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP16);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP16);
+}
+
+TEST_F(AdvancedShapeInferTests, AddWithTensorConstInferTest) {
+
+    auto testFunc = [&](IE::Precision precisionInData1, IE::Precision precisionInData2, IE::Precision precisionOutData) {
+
+        netBuilder = NetBuilder();
+        net = netBuilder
+                .data("data1", IE::SizeVector{2, 2}, precisionInData1, IE::Layout::NC)
+                .data("data2", IE::SizeVector{2,2}, precisionInData2, IE::Layout::NC)
+                .data("data3", IE::SizeVector{2, 2}, precisionOutData, IE::Layout::NC)
+                .layer<IE::CNNLayer>(IE::LayerParams{"addLayer", "Eltwise"})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input1", "Const", precisionInData1})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Const", precisionInData2})
+                .linkToData("input1", "data1")
+                .linkToData("input2", "data2")
+                .linkDataTo("data1", "addLayer")
+                .linkDataTo("data2", "addLayer")
+                .linkToData("addLayer", "data3")
+                .addInput("data1")
+                .addInput("data2")
+                .finalize();
+
+        getLayer("addLayer")->params = {
+                {"operation", "sum"}
+        };
+
+        IE::CNNNetwork cnnNetwork(net);
+        initConstLayersDiffPrec({"input1", "input2"});
+        float ref[] = {4, 6, 8, 10};
+        if (precisionOutData == IE::Precision::FP16) {
+            for (int i = 0; i < 4; i++)
+                ref[i] = IE::PrecisionUtils::f32tof16(ref[i]);
+        }
+        IE::ConstTransformer transformator(net.get());
+        transformator.foldConstSubgraphs();
+        switch(precisionOutData) {
+            case IE::Precision::U8: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<uint8_t *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I32: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I64: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<long long int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP16: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<IE::ie_fp16 *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP32: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<float *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            default:
+                THROW_IE_EXCEPTION << "Unsupported precision!";
+        }
+    };
+
+    testFunc(IE::Precision::U8, IE::Precision::U8, IE::Precision::U8);
+    testFunc(IE::Precision::U8, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::U8, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::U8, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::U8, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::U8, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::U8, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I32, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::U8, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::U8, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP16);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP16);
+}
+
+TEST_F(AdvancedShapeInferTests, AddWithBroadcastingConstInferTest) {
+
+    auto testFunc = [&](IE::Precision precisionInData1, IE::Precision precisionInData2, IE::Precision precisionOutData) {
+
+        netBuilder = NetBuilder();
+        net = netBuilder
+                .data("data1", IE::SizeVector{2, 2}, precisionInData1, IE::Layout::NC)
+                .data("data2", IE::SizeVector{1, 2}, precisionInData2, IE::Layout::NC)
+                .data("data3", IE::SizeVector{2, 2}, precisionOutData, IE::Layout::NC)
+                .layer<IE::CNNLayer>(IE::LayerParams{"addLayer", "Eltwise"})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input1", "Const", precisionInData1})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Const", precisionInData2})
+                .linkToData("input1", "data1")
+                .linkToData("input2", "data2")
+                .linkDataTo("data1", "addLayer")
+                .linkDataTo("data2", "addLayer")
+                .linkToData("addLayer", "data3")
+                .addInput("data1")
+                .addInput("data2")
+                .finalize();
+
+        getLayer("addLayer")->params = {
+                {"operation", "sum"}
+        };
+
+        IE::CNNNetwork cnnNetwork(net);
+        initConstLayersDiffPrec({"input1", "input2"});
+        float ref[] = {4, 5, 7, 8};
+        if (precisionOutData == IE::Precision::FP16) {
+            for (int i = 0; i < 4; i++)
+                ref[i] = IE::PrecisionUtils::f32tof16(ref[i]);
+        }
+        IE::ConstTransformer transformator(net.get());
+        transformator.foldConstSubgraphs();
+        switch(precisionOutData) {
+            case IE::Precision::U8: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<uint8_t *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I32: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I64: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<long long int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP16: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<IE::ie_fp16 *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP32: {
+                auto *l = cnnNetwork.getLayerByName("addLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<float *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            default:
+                THROW_IE_EXCEPTION << "Unsupported precision!";
+        }
+    };
+
+    testFunc(IE::Precision::U8, IE::Precision::U8, IE::Precision::U8);
+    testFunc(IE::Precision::U8, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::U8, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::U8, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::U8, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::U8, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::U8, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I32, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::U8, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::U8, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP16);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP16);
+}
+
+TEST_F(AdvancedShapeInferTests, MulWithBroadcastingConstInferTest) {
+
+    auto testFunc = [&](IE::Precision precisionInData1, IE::Precision precisionInData2, IE::Precision precisionOutData) {
+
+        netBuilder = NetBuilder();
+        net = netBuilder
+                .data("data1", IE::SizeVector{2, 2}, precisionInData1, IE::Layout::NC)
+                .data("data2", IE::SizeVector{1, 2}, precisionInData2, IE::Layout::NC)
+                .data("data3", IE::SizeVector{2, 2}, precisionOutData, IE::Layout::NC)
+                .layer<IE::CNNLayer>(IE::LayerParams{"mulLayer", "Eltwise"})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input1", "Const", precisionInData1})
+                .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Const", precisionInData2})
+                .linkToData("input1", "data1")
+                .linkToData("input2", "data2")
+                .linkDataTo("data1", "mulLayer")
+                .linkDataTo("data2", "mulLayer")
+                .linkToData("mulLayer", "data3")
+                .addInput("data1")
+                .addInput("data2")
+                .finalize();
+
+        getLayer("mulLayer")->params = {
+                {"operation", "mul"}
+        };
+
+        IE::CNNNetwork cnnNetwork(net);
+        initConstLayersDiffPrec({"input1", "input2"});
+        float ref[] = {4, 6, 12, 15};
+        if (precisionOutData == IE::Precision::FP16) {
+            for (int i = 0; i < 4; i++)
+                ref[i] = IE::PrecisionUtils::f32tof16(ref[i]);
+        }
+        IE::ConstTransformer transformator(net.get());
+        transformator.foldConstSubgraphs();
+        switch(precisionOutData) {
+            case IE::Precision::U8: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<uint8_t *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I32: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::I64: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<long long int *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP16: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<IE::ie_fp16 *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            case IE::Precision::FP32: {
+                auto *l = cnnNetwork.getLayerByName("mulLayer__data3__Const").get()->blobs.at("custom")->cbuffer().as<float *>();
+                ASSERT_EQ(l[0], ref[0]);
+                ASSERT_EQ(l[1], ref[1]);
+                ASSERT_EQ(l[2], ref[2]);
+                ASSERT_EQ(l[3], ref[3]);
+                break;
+            }
+            default:
+                THROW_IE_EXCEPTION << "Unsupported precision!";
+        }
+    };
+
+    testFunc(IE::Precision::U8, IE::Precision::U8, IE::Precision::U8);
+    testFunc(IE::Precision::U8, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::U8, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::U8, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::U8, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::U8, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I32, IE::Precision::I32);
+    testFunc(IE::Precision::I32, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::U8, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I32, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::I64, IE::Precision::I64);
+    testFunc(IE::Precision::I64, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::I64, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::U8, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP16, IE::Precision::FP16);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::U8, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::I64, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP32);
+    testFunc(IE::Precision::FP32, IE::Precision::FP32, IE::Precision::FP32);
+    testFunc(IE::Precision::FP16, IE::Precision::FP32, IE::Precision::FP16);
+    testFunc(IE::Precision::FP32, IE::Precision::FP16, IE::Precision::FP16);
+}
index b5fe89a..10bdc53 100644 (file)
@@ -74,6 +74,10 @@ protected:
 
     IE::BlobMap initConstLayers(const std::vector<std::string>& constLayers);
 
+    IE::BlobMap fillConstDataDiffPrec(const std::vector<std::string>& constLayers);
+
+    IE::BlobMap initConstLayersDiffPrec(const std::vector<std::string>& constLayers);
+
     NetBuilder netBuilder;
     IE::details::CNNNetworkImplPtr net;
     size_t originalLayersNum;
index 6fdc1d0..734b883 100644 (file)
@@ -20,7 +20,7 @@ public:
     void setRequestBusy() {
         AsyncInferRequestThreadSafeInternal::setIsRequestBusy(true);
     }
-
+    using AsyncInferRequestThreadSafeInternal::isRequestBusy;
     bool isRequestBusy() {
         return AsyncInferRequestThreadSafeInternal::isRequestBusy();
     }
index 1bdac7d..1a15e52 100644 (file)
@@ -35,7 +35,7 @@ class MockICNNNetwork : public InferenceEngine::ICNNNetwork {
     MOCK_QUALIFIED_METHOD1(setBatchSize, noexcept, InferenceEngine::StatusCode (const size_t size));
     MOCK_QUALIFIED_METHOD2(setBatchSize, noexcept, InferenceEngine::StatusCode (const size_t size, InferenceEngine::ResponseDesc*));
     MOCK_QUALIFIED_METHOD0(getBatchSize, const noexcept, size_t ());
-    MOCK_QUALIFIED_METHOD0(getStats, const noexcept, InferenceEngine::ICNNNetworkStats& ());
+    MOCK_QUALIFIED_METHOD2(getStats, const noexcept, InferenceEngine::StatusCode (InferenceEngine::ICNNNetworkStats** /*stats*/, InferenceEngine::ResponseDesc* /*resp*/));
     MOCK_QUALIFIED_METHOD0(Release, noexcept, void ());
     MOCK_QUALIFIED_METHOD1(getInputShapes, const noexcept, void (InferenceEngine::ICNNNetwork::InputShapes&));
     MOCK_QUALIFIED_METHOD2(reshape, noexcept, InferenceEngine::StatusCode (const InferenceEngine::ICNNNetwork::InputShapes &, InferenceEngine::ResponseDesc *));
@@ -52,16 +52,16 @@ public:
     MOCK_QUALIFIED_METHOD0(getPrecision, const noexcept, InferenceEngine::Precision ());
     MOCK_QUALIFIED_METHOD1(getOutputsInfo, const noexcept, void (InferenceEngine::OutputsDataMap& out));
     MOCK_QUALIFIED_METHOD1(getInputsInfo, const noexcept, void (InferenceEngine::InputsDataMap &inputs));
-    MOCK_QUALIFIED_METHOD1(getInput, noexcept, InferenceEngine::InputInfo::Ptr (const std::string &inputName));
+    MOCK_QUALIFIED_METHOD1(getInput, const noexcept, InferenceEngine::InputInfo::Ptr (const std::string &inputName));
     MOCK_QUALIFIED_METHOD2(getName, const noexcept, void (char* pName, size_t len));
     MOCK_QUALIFIED_METHOD0(getName, const noexcept, const std::string& ());
     MOCK_QUALIFIED_METHOD0(layerCount, const noexcept, size_t ());
     MOCK_QUALIFIED_METHOD1(getData, noexcept, InferenceEngine::DataPtr&(const char* dname));
     MOCK_QUALIFIED_METHOD1(addLayer, noexcept, void(const InferenceEngine::CNNLayerPtr& layer));
     MOCK_QUALIFIED_METHOD3(addOutput, noexcept, InferenceEngine::StatusCode (const std::string &, size_t , InferenceEngine::ResponseDesc*));
-    MOCK_QUALIFIED_METHOD3(getLayerByName, noexcept, InferenceEngine::StatusCode (const char* , InferenceEngine::CNNLayerPtr& , InferenceEngine::ResponseDesc* ));
+    MOCK_QUALIFIED_METHOD3(getLayerByName, const noexcept, InferenceEngine::StatusCode (const char* , InferenceEngine::CNNLayerPtr& , InferenceEngine::ResponseDesc* ));
     MOCK_QUALIFIED_METHOD1(setTargetDevice, noexcept, void (InferenceEngine::TargetDevice device));
-    MOCK_QUALIFIED_METHOD0(getTargetDevice, noexcept, InferenceEngine::TargetDevice ());
+    MOCK_QUALIFIED_METHOD0(getTargetDevice, const noexcept, InferenceEngine::TargetDevice ());
     MOCK_QUALIFIED_METHOD1(setBatchSize, noexcept, InferenceEngine::StatusCode (const size_t size));
     MOCK_QUALIFIED_METHOD2(setBatchSize, noexcept, InferenceEngine::StatusCode (const size_t size, InferenceEngine::ResponseDesc*));
     MOCK_QUALIFIED_METHOD0(getBatchSize, const noexcept, size_t ());
index 1edefb7..563005c 100644 (file)
@@ -42,7 +42,7 @@ public:
     MOCK_QUALIFIED_METHOD1(setBatchSize, noexcept, StatusCode (const size_t size));
     MOCK_QUALIFIED_METHOD2(setBatchSize, noexcept, StatusCode (const size_t size, ResponseDesc*));
     MOCK_QUALIFIED_METHOD0(getBatchSize, const noexcept, size_t ());
-    MOCK_QUALIFIED_METHOD0(getStats, const noexcept, InferenceEngine::ICNNNetworkStats& ());
+    MOCK_QUALIFIED_METHOD2(getStats, const noexcept, InferenceEngine::StatusCode (InferenceEngine::ICNNNetworkStats** /*stats*/, InferenceEngine::ResponseDesc* /*resp*/));
     MOCK_QUALIFIED_METHOD0(Release, noexcept, void ());
     MOCK_QUALIFIED_METHOD1(getInputShapes, const noexcept, void (ICNNNetwork::InputShapes &));
     MOCK_QUALIFIED_METHOD2(reshape, noexcept, StatusCode (const ICNNNetwork::InputShapes &, ResponseDesc *));
index 6e1f7b5..cb9545e 100644 (file)
@@ -790,6 +790,42 @@ INSTANTIATE_TEST_CASE_P(
                                                       {{2, 128, 10, 10}}}),
                                       MapParams(MapStrStr(std::map<std::string, std::string>{ {"levels", "2"}})),
                                       LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("Unique"),
+                                      InOutShapes({{{5}},
+                                                   {{5}, {5}}}),
+                                      NewInOutShapes({{{25}},
+                                                      {{25}, {25}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"sorted", "false"},
+                                                                                             {"return_inverse", "true"},
+                                                                                             {"return_counts", "false"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("Unique"),
+                                      InOutShapes({{{5}},
+                                                   {{5}, {5}, {5}}}),
+                                      NewInOutShapes({{{25}},
+                                                      {{25}, {25}, {25}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"sorted", "false"},
+                                                                                             {"return_inverse", "true"},
+                                                                                             {"return_counts", "true"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("Scatter"),
+                                      InOutShapes({{{3, 3}, {2, 3}},
+                                                   {{3,3}}}),
+                                      NewInOutShapes({{{4,  4}, {3, 4}},
+                                                      {{4,4}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("NonMaxSuppression"),
+                                      InOutShapes({{{1, 2, 4}, {1, 3, 2}},
+                                                   {{6, 3}}}),
+                                      NewInOutShapes({{{2, 5, 4}, {2, 3, 5}},
+                                                      {{30, 3}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"center_point_box", "0"}})),
+                                      LayerDataName("data"),
                                       CanInfer(true))
         )
 );
diff --git a/inference-engine/tests/unit/transformations/eltwise_broadcast_test.cpp b/inference-engine/tests/unit/transformations/eltwise_broadcast_test.cpp
deleted file mode 100644 (file)
index 23f27c0..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <string.h>
-#include <transform/transform_network.hpp>
-#include <transform/transformations/eltwise_broadcast.hpp>
-#include <ie_builders.hpp>
-
-#include "tranformations_test.hpp"
-
-using namespace testing;
-using namespace InferenceEngine;
-
-class TransformNetworkTest: public TransformationTestCommon {};
-
-TEST_F(TransformationTestCommon, EltwiseBroadcastOneDimension) {
-    Builder::Network builder("eltwiseBroadcast");
-
-    idx_t firstInputId = builder.addLayer(Builder::InputLayer("FirstInput").setPort(Port({1, 3, 227, 1})));
-    idx_t secondInputId = builder.addLayer(Builder::InputLayer("SecondInput").setPort(Port({1, 3, 227, 227})));
-    idx_t eltwiseSumId = builder.addLayer({firstInputId, secondInputId}, Builder::EltwiseLayer("Sum").
-                                                                         setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM).
-                                                                         setOutputPort(Port({1, 3, 227, 227})));
-    auto network = Transform::Network(builder);
-
-    Transform::TransformationEltwiseBroadcast transformationEltwiseBroadcast;
-    transformationEltwiseBroadcast.execute(network);
-    auto firstInputLayer = network.getLayer(firstInputId);
-    auto tileLayer = network.getLayer(firstInputId).getOutPort().getConnection().getDestination().getLayer();
-    ASSERT_EQ(tileLayer.getType(), "Tile");
-    ASSERT_EQ(tileLayer.getParameter("axis").as<size_t>(), 3);
-    ASSERT_EQ(tileLayer.getParameter("tiles").as<size_t>(), 227);
-    ASSERT_EQ(firstInputLayer.getOutPort().getConnection().getDestination().getLayer().getId(), tileLayer.getId());
-    ASSERT_EQ(tileLayer.getOutPort().getConnection().getDestination().getLayer().getId(), eltwiseSumId);
-}
-
-TEST_F(TransformationTestCommon, EltwiseBroadcastTwoDimensions) {
-    Builder::Network builder("eltwiseBroadcast");
-
-    idx_t firstInputId = builder.addLayer(Builder::InputLayer("FirstInput").setPort(Port({1, 1, 227, 1})));
-    idx_t secondInputId = builder.addLayer(Builder::InputLayer("SecondInput").setPort(Port({1, 3, 227, 227})));
-    idx_t eltwiseSumId = builder.addLayer({firstInputId, secondInputId}, Builder::EltwiseLayer("Sum").
-                                                                         setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM).
-                                                                         setOutputPort(Port({1, 3, 227, 227})));
-    auto network = Transform::Network(builder);
-
-    Transform::TransformationEltwiseBroadcast transformationEltwiseBroadcast;
-    transformationEltwiseBroadcast.execute(network);
-    auto firstInputLayer = network.getLayer(firstInputId);
-    auto tile1Layer = network.getLayer(firstInputId).getOutPort().getConnection().getDestination().getLayer();
-    auto tile2Layer = tile1Layer.getOutPort().getConnection().getDestination().getLayer();
-    ASSERT_EQ(tile1Layer.getType(), "Tile");
-    ASSERT_EQ(tile1Layer.getParameter("axis").as<size_t>(), 1);
-    ASSERT_EQ(tile1Layer.getParameter("tiles").as<size_t>(), 3);
-    ASSERT_EQ(tile2Layer.getType(), "Tile");
-    ASSERT_EQ(tile2Layer.getParameter("axis").as<size_t>(), 3);
-    ASSERT_EQ(tile2Layer.getParameter("tiles").as<size_t>(), 227);
-    ASSERT_EQ(firstInputLayer.getOutPort().getConnection().getDestination().getLayer().getId(), tile1Layer.getId());
-    ASSERT_EQ(tile1Layer.getOutPort().getConnection().getDestination().getLayer().getId(), tile2Layer.getId());
-    ASSERT_EQ(tile2Layer.getOutPort().getConnection().getDestination().getLayer().getId(), eltwiseSumId);
-}
\ No newline at end of file
diff --git a/inference-engine/tests/unit/transformations/sub_test.cpp b/inference-engine/tests/unit/transformations/sub_test.cpp
deleted file mode 100644 (file)
index 85b9b4a..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <string.h>
-#include <transform/transform_network.hpp>
-#include <transform/transformations/sub.hpp>
-#include <ie_builders.hpp>
-
-#include "tranformations_test.hpp"
-
-using namespace testing;
-using namespace InferenceEngine;
-
-class TransformNetworkTest: public TransformationTestCommon {};
-
-TEST_F(TransformationTestCommon, Sub) {
-    Builder::Network builder("sub");
-
-    idx_t firstInputId = builder.addLayer(Builder::InputLayer("FirstInput").setPort(Port({1,3, 227, 227})));
-    idx_t secondInputId = builder.addLayer(Builder::InputLayer("SecondInput").setPort(Port({1,3, 227, 227})));
-    idx_t eltwiseSubId = builder.addLayer({firstInputId, secondInputId}, Builder::EltwiseLayer("Sub").setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUB));
-    idx_t clampId = builder.addLayer({eltwiseSubId}, Builder::ClampLayer("clamp"));
-    auto network = Transform::Network(builder);
-
-    Transform::TransformationSub transformationSub;
-    transformationSub.execute(network);
-    ASSERT_THROW(network.getLayer("Sub"), InferenceEngine::details::InferenceEngineException);
-    auto sumLayer = network.getLayer(firstInputId).getOutPort().getConnection().getDestination().getLayer();
-    auto powerLayer = network.getLayer(secondInputId).getOutPort().getConnection().getDestination().getLayer();
-    ASSERT_EQ(sumLayer.getType(), "Eltwise");
-    ASSERT_EQ(sumLayer.getParameter("operation").as<std::string>(), "sum");
-    ASSERT_EQ(powerLayer.getType(), "Power");
-    ASSERT_EQ(powerLayer.getParameter("power").as<float>(), 1.0f);
-    ASSERT_EQ(powerLayer.getParameter("scale").as<float>(), -1.0f);
-    ASSERT_EQ(powerLayer.getParameter("shift").as<float>(), 0.0f);
-    ASSERT_EQ(sumLayer.getOutPort().getConnection().getDestination().getLayer().getId(), clampId);
-}
\ No newline at end of file
diff --git a/inference-engine/tests/unit/transformations/tranformations_test.hpp b/inference-engine/tests/unit/transformations/tranformations_test.hpp
deleted file mode 100644 (file)
index 797c298..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <string.h>
-#include <ie_builders.hpp>
-#include <blob_factory.hpp>
-
-#include "../builders/builder_test.hpp"
-
-class TransformationTestCommon : public BuilderTestCommon {
-public:
-};
\ No newline at end of file
index 5a4b259..54de20f 100644 (file)
@@ -7,13 +7,6 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
   set(CMAKE_CC_FLAGS "${CMAKE_CC_FLAGS} -Wno-unknown-warning-option -Wno-inconsistent-missing-override -Wno-pass-failed")
 endif()
 
-add_subdirectory(pugixml)
-export(TARGETS pugixml NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets.cmake")
-export(TARGETS pugixml NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
-
-add_subdirectory(stb_lib)
-add_subdirectory(ade)
-
 if (ENABLE_CLDNN)
     set(CLDNN__OUTPUT_BIN_DIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
     set(CLDNN__OUTPUT_LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
@@ -35,20 +28,35 @@ if (ENABLE_CLDNN)
     add_subdirectory(clDNN)
 endif()
 
-if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
-    remove_definitions(-fvisibility=hidden)
-    add_definitions(-fvisibility=default)
-endif()
+function(build_with_lto)
+    if(ENABLE_LTO)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto")
+        set(CMAKE_AR  "gcc-ar")
+        set(CMAKE_RANLIB "gcc-ranlib")
+    endif()
 
-include(ngraph.cmake)
+    add_subdirectory(pugixml)
+    export(TARGETS pugixml NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets.cmake")
+    export(TARGETS pugixml NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
 
-if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
-    remove_definitions(-fvisibility=default)
-    add_definitions(-fvisibility=hidden)
-endif()
+    if (TARGET pugixml_mt)
+        export(TARGETS pugixml_mt NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets.cmake")
+        export(TARGETS pugixml_mt NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
+    endif()
+
+    add_subdirectory(stb_lib)
+
+    add_subdirectory(ade)
+    export(TARGETS ade NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
+
+    include(ngraph.cmake)
+
+    add_subdirectory(fluid/modules/gapi)
+    export(TARGETS fluid NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
+endfunction()
+
+build_with_lto()
 
 if(ENABLE_MKL_DNN)
     include(mkldnn.cmake)
 endif()
-
-add_subdirectory("${IE_MAIN_SOURCE_DIR}/thirdparty/fluid/modules/gapi")
index 562e301..cbe2db6 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 562e301ccc8327e4016ccc3f1bc3a8592f50ea21
+Subproject commit cbe2db61a659c2cc304c3837406f95c39dfa938e
index 624d95c..c39fe5c 100644 (file)
@@ -548,8 +548,10 @@ endif()
 # - on others:  shared libraries directory.
 if(__CLDNN_TargetOs MATCHES "^windows$")
   set(CLDNN__IOCL_ICD_LIBDIRS ${CLDNN__IOCL_ICD_STLDIRS} CACHE INTERNAL "Paths to libraries to link for Intel OpenCL SDK ICD.")
+  set(CLDNN__IOCL_ICD_LIBPATH ${CLDNN__IOCL_ICD_LIBDIRS}/${CMAKE_STATIC_LIBRARY_PREFIX}OpenCL${CMAKE_STATIC_LIBRARY_SUFFIX} CACHE INTERNAL "")
 else()
   set(CLDNN__IOCL_ICD_LIBDIRS ${CLDNN__IOCL_ICD_SHLDIRS} CACHE INTERNAL "Paths to libraries to link for Intel OpenCL SDK ICD.")
+  set(CLDNN__IOCL_ICD_LIBPATH ${CLDNN__IOCL_ICD_LIBDIRS}/${CMAKE_SHARED_LIBRARY_PREFIX}OpenCL${CMAKE_SHARED_LIBRARY_SUFFIX} CACHE INTERNAL "")
 endif()
 
 unset(__CLDNN_IOclIcdVersions)
@@ -669,7 +671,7 @@ message(STATUS "[clDNN]  - Root:            ${CLDNN__IOCL_ICD_ROOT}")
 message(STATUS "[clDNN]     + Headers:      ${CLDNN__IOCL_ICD_INCDIRS}")
 message(STATUS "[clDNN]     + Static libs:  ${CLDNN__IOCL_ICD_STLDIRS}")
 message(STATUS "[clDNN]     + Shared libs:  ${CLDNN__IOCL_ICD_SHLDIRS}")
-message(STATUS "[clDNN]     + Libs to link: ${CLDNN__IOCL_ICD_LIBDIRS}")
+message(STATUS "[clDNN]     + Libs to link: ${CLDNN__IOCL_ICD_LIBPATH}")
 message(STATUS "[clDNN] =============================================================================")
 unset(__CLDNN_DetectedArch_Target)
 
@@ -680,7 +682,7 @@ unset(__CLDNN_DetectedArch_Target)
 
 # =================================== Main targets names and labels ====================================
 
-set(CLDNN_BUILD__PROJ__clDNN       "${CLDNN_BUILD__PROJ_NAME_PREFIX}clDNN_shlib")
+set(CLDNN_BUILD__PROJ__clDNN       "${CLDNN_BUILD__PROJ_NAME_PREFIX}clDNN_lib")
 set(CLDNN_BUILD__PROJ_LABEL__clDNN "clDNN")
 
 # ================================================ Outputs =============================================
@@ -817,7 +819,9 @@ foreach(__CLDNN_CompilerFlagName IN ITEMS "CMAKE_CXX_FLAGS" "CMAKE_C_FLAGS")
       endif()
     endif()
   elseif(CMAKE_COMPILER_IS_INTEL)
-    message(FATAL_ERROR "TODO Support native ICC")
+    if(UNIX)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-warning=68,654,1125")
+    endif()
   # Adding needed settings specific to GCC.
   # NOTE: Following options can be needed in the future (although some not recommended: NR):
   # [NR] -fno-short-enums
@@ -1009,17 +1013,6 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
     CLDNN_CMAKE
   )
 
-if (MSVC)
-#  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
-#      _SCL_SECURE_NO_WARNINGS
-#    )
-elseif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
-  set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
-    _GLIBCXX_USE_CXX11_ABI=0
-    $<$<CONFIG:Release>:_FORTIFY_SOURCE=2>
-  )
-endif()
-
 # ===================================== Include/Link directories =======================================
 
 include_directories(
@@ -1028,10 +1021,14 @@ include_directories(
     "${CLDNN__KHR_CLHPP_DIR}"
     "${CLDNN__CODEGEN_INCDIR}"
   )
-link_directories(
-    ${CLDNN__IOCL_ICD_LIBDIRS}
+
+add_library(clDNN_OpenCL UNKNOWN IMPORTED)
+set_target_properties(clDNN_OpenCL
+    PROPERTIES
+        IMPORTED_LOCATION ${CLDNN__IOCL_ICD_LIBPATH}
   )
 
+
 # =================================== Link targets and dependencies ====================================
 if(CLDNN__INCLUDE_CORE)
   add_subdirectory(src)
diff --git a/inference-engine/thirdparty/clDNN/api/C/activation.h b/inference-engine/thirdparty/clDNN/api/C/activation.h
deleted file mode 100644 (file)
index 0f35d4d..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Activation using rectified linear unit or parameterized rectified linear unit.
-/// @details Can get one negative slope or negative slope per channel.
-/// @par Algorithm:
-///   out(i,x,y) = max(0, in(i,x,y)) + slope(i) * min(0, in(i,x,y))
-/// @par Where:
-///   @li out(i,x,y) : value at x, y from i-th feature map after activation.
-///   @li in(i,x,y) : value at x, y from i-th feature map before activation.
-///   @li slope(i) : the slope value of the i-th feature map (can be shared across channels or one slope per channel).
-CLDNN_BEGIN_PRIMITIVE_DESC(activation)
-/// @brief activation function.
-cldnn_activation_func activation_func;
-/// @brief Activation additional params.
-/// activation_relu_negative_slope  - additional_params.a is a negative slope
-/// activation_brelu                - additional_params.a is a upper bound
-/// activation_linear               - additional_params.a/b uses as a*val + b
-cldnn_activation_additional_params additional_params;
-/// @brief Activation additional params stored on a memory object
-/// activation_relu_negative_slope  - negative slope per feature map
-/// activation_brelu                - upper bound per feature map
-/// activation_linear               - a,b per feature map
-cldnn_primitive_id additional_params_input;
-CLDNN_END_PRIMITIVE_DESC(activation)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(activation);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/activation_grad.h b/inference-engine/thirdparty/clDNN/api/C/activation_grad.h
deleted file mode 100644 (file)
index 7a1e532..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Activation gradient for rectified linear unit or parameterized rectified linear unit.
-/// @par Algorithm:
-///   out(i,x,y) = input_gradient(i,x,y) * ((input(i,x,y) > 0) + slope(i)  * (input(i,x,y) <= 0)
-/// @par Where:
-///   @li out(i,x,y) : value at x, y from i-th feature map after activation.
-///   @li in(i,x,y) : value at x, y from i-th feature map before activation.
-///   @li slope(i) : the slope value of the i-th feature map (can be shared across channels or one slope per channel).
-CLDNN_BEGIN_PRIMITIVE_DESC(activation_grad)
-/// @brief activation gradient function.
-cldnn_activation_grad_func activation_grad_func;
-/// @brief Activation additional params.
-/// activation_relu_negative_slope_grad  - additional_params.a is a negative slope
-cldnn_activation_additional_params additional_params;
-/// @brief Activation additional params stored on a memory object
-/// activation_relu_negative_slope_grad  - negative slope per feature map
-cldnn_primitive_id additional_params_input;
-CLDNN_END_PRIMITIVE_DESC(activation_grad)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(activation_grad);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/apply_adam.h b/inference-engine/thirdparty/clDNN/api/C/apply_adam.h
deleted file mode 100644 (file)
index a775ef9..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Apply Adam primitive.
-/// @details Updates output using Adam algorithm. The output of this primitive should be mutable_data type in case user wants to update
-/// variable accross network. If output is not mutable_data then it will be initialized with 0.
-/// "Adam: A Method for Stochastic Optimization" by Diederik P. Kingma, Jimmy Ba
-/// @n See: https://arxiv.org/abs/1412.6980
-///
-/// <b>Algorithm:</b>
-/// @n float lr[t] = lr * sqrt(1 - beta2^t) / (1 - beta1^t);
-/// @n float m[t] = beta1 * m[t-1] + (1 - beta1) * grad[t];
-/// @n float v[t] = beta2 * v[t-1] + (1 - beta2) * grad[t] * grad[t];
-/// @n float result = result - lr[t] * m[t] / (sqrt(v[t]) + epsilon);
-
-CLDNN_BEGIN_PRIMITIVE_DESC(apply_adam)
-/// @brief Primitive id containing m data.
-cldnn_primitive_id m;
-/// @brief Primitive id containing v data.
-cldnn_primitive_id v;
-/// @brief Primitive id containing beta1^t.
-cldnn_primitive_id beta1_power;
-/// @brief Primitive id containing beta2^t.
-cldnn_primitive_id beta2_power;
-/// @brief Learning rate parameter.
-float lr;
-/// @brief Beta1 parameter.
-float beta1;
-/// @brief Beta2 parameter.
-float beta2;
-/// @brief Epsilon.
-float epsilon;
-/// @brief Optional primitive id that need to complete before execution of this primitive. Used only for synchronization.
-cldnn_primitive_id dependency_id;
-CLDNN_END_PRIMITIVE_DESC(apply_adam)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(apply_adam);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/arg_max_min.h b/inference-engine/thirdparty/clDNN/api/C/arg_max_min.h
deleted file mode 100644 (file)
index 6535909..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Enum type to specify output type - index of max or min values
-typedef enum {
-    cldnn_arg_max,
-    cldnn_arg_min,
-} cldnn_arg_max_min_out;
-
-/// @brief Enum type to specify axis to maximize/minimize along.
-typedef enum {
-    cldnn_arg_max_min_batch,
-    cldnn_arg_max_min_feature,
-    cldnn_arg_max_min_x,
-    cldnn_arg_max_min_y,
-    cldnn_arg_max_min_xyf
-} cldnn_arg_max_min_axis;
-
-/// @brief Finds the index of the k max/min values of input.
-CLDNN_BEGIN_PRIMITIVE_DESC(arg_max_min)
-/// @brief Number of indices to output.
-uint32_t top_k;
-/// @brief Type of output - max or mix.
-cldnn_arg_max_min_out output_type;
-/// @brief Axis to maximize/minimize along. If not set, maximize the flattened x, y ,f dimensions for each index of the first dimension.
-cldnn_arg_max_min_axis axis;
-/// @brief Indicates that the primitive has user defined axis to maximize/minimize along.
-uint32_t with_axis;
-/// @brief Sets output order: if True than first output contains values and second (optional) - indices.
-uint32_t values_first;
-/// @brief Type of sorting - by values or indices.
-uint32_t sort;
-CLDNN_END_PRIMITIVE_DESC(arg_max_min)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(arg_max_min);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/average_unpooling.h b/inference-engine/thirdparty/clDNN/api/C/average_unpooling.h
deleted file mode 100644 (file)
index ea45ff2..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs "average_unpooling" operation.
-/// @details Reverse operation of average pooling.
-/// Each element in every pooling window is filled with output / window size value. In case of window overlap the elements are added.
-CLDNN_BEGIN_PRIMITIVE_DESC(average_unpooling)
-/// @brief Defines shift in output buffer.
-cldnn_tensor stride;
-/// @brief Pooling kernel size.
-cldnn_tensor size;
-/// @brief Output size of this primitive.
-cldnn_tensor output_size;
-CLDNN_END_PRIMITIVE_DESC(average_unpooling)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(average_unpooling);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/batch_norm.h b/inference-engine/thirdparty/clDNN/api/C/batch_norm.h
deleted file mode 100644 (file)
index 58e0f0b..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Batch normalization primitive.
-/// @details Performs batch normalization as described in
-/// "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" by Ioffe, Szegedy
-/// @n See: http://arxiv.org/abs/1502.03167
-///
-/// <b>Algorithm:</b>
-/// @n global stats can be computed as:
-/// @n out[i] = ( (in[i] - mean[b]) / sqrt(variance[b] + epsilon) ) * scale[b] + shift[b]
-
-CLDNN_BEGIN_PRIMITIVE_DESC(batch_norm)
-/// @brief Primitive id containing mean data.
-cldnn_primitive_id mean;
-/// @brief Primitive id containing variance.
-cldnn_primitive_id variance;
-/// @brief Primitive id containing scale.
-cldnn_primitive_id scale;
-/// @brief Primitive id containing shift.
-cldnn_primitive_id shift;
-/// @brief Primitive id containing inverted variance used in future gradient computing.
-cldnn_primitive_id inv_variance;
-/// @brief Epsilon.
-float epsilon;
-CLDNN_END_PRIMITIVE_DESC(batch_norm)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(batch_norm);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/batch_norm_grad.h b/inference-engine/thirdparty/clDNN/api/C/batch_norm_grad.h
deleted file mode 100644 (file)
index 81a6957..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs backward batch normalization layer.
-/// @details Calculates mean gradient and gradient * input for every feature in data,
-/// then output is calculated as inv_variance * (input_grad - mean_grad_input * input - mean_grad)
-CLDNN_BEGIN_PRIMITIVE_DESC(batch_norm_grad)
-/// @brief Primitive id containing inverted variance from forward pass.
-cldnn_primitive_id inv_variance;
-CLDNN_END_PRIMITIVE_DESC(batch_norm_grad)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(batch_norm_grad);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/binary_convolution.h b/inference-engine/thirdparty/clDNN/api/C/binary_convolution.h
deleted file mode 100644 (file)
index a819fc5..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs forward spatial binary_convolution with weight sharing.
-/// @details Parameters are defined in context of "direct" binary_convolution, but actual algorithm is not implied.
-CLDNN_BEGIN_PRIMITIVE_DESC(binary_convolution)
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the binary_convolution window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines shift in input buffer between adjacent calculations of output values.
-cldnn_tensor stride;
-/// @brief Defines gaps in the input - dilation rate k=1 is normal binary_convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
-/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
-/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
-cldnn_tensor dilation;
-/// @brief Weights groups count
-uint32_t split;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-/// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
-int groups;
-/// @brief Logical value of padding. Can be one of 3 values: 1 - pad bits equal to 1; -1 -> pad bits equal to 0; 0 -> pad is not counted
-float pad_value;
-/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr weights;
-
-CLDNN_END_PRIMITIVE_DESC(binary_convolution)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(binary_convolution);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/border.h b/inference-engine/thirdparty/clDNN/api/C/border.h
deleted file mode 100644 (file)
index 29ddc2b..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Type of border that will be added to the input by current layer / primitive
-///        ( @CLDNN_PRIMITIVE_DESC{border} â€‹).
-typedef enum /*:int32_t*/ {
-    /// @brief All points in the border are set to constant value.
-    cldnn_border_constant,
-    cldnn_border_zero = cldnn_border_constant,  /// keep bwd compatibilty
-    /// @brief Border is constructed as an mirror of image (edge is also mirrored).
-    /// @details Size of border in any dimension cannot be larger than size of
-    ///          input in the same dimension.
-    cldnn_border_mirror,
-    /// @brief Border is constructed as an mirror of image (edge is NOT mirrored).
-    /// @details Size of border in any dimension cannot be larger than size of
-    ///          input in the same dimension decreased by @c 1.
-    cldnn_border_mirror_101,
-    /// @brief Border is constructed as an replication of edge.
-    /// @details Size of border in any dimension cannot be larger than size of
-    ///          input in the same dimension.
-    cldnn_border_edge
-} cldnn_border_type;
-
-/// @brief Adds border around input.
-///
-/// @details Applies border of specified type around input data. The size of output data is increased
-///          by @c left_top_sizes and by @right_bottom_sizes.
-/// @n
-/// @n@b Requirements:
-/// @n - @c left_top_sizes and @c right_bottom_sizes must be non-negative on all dimensions and compatible
-///      with size of input (describe the same dimensions).
-/// @n - For @c border_type equal to @c cldnn_border_mirror, @c left_top_sizes and @c right_bottom_sizes
-///      must be lower than or equal to size of input on corresponding dimension (for all dimensions)
-/// @n - For @c border_type equal to @c cldnn_border_mirror_101, @c left_top_sizes and @c right_bottom_sizes
-///      must be lower than size of input on corresponding dimension (for all dimensions)
-CLDNN_BEGIN_PRIMITIVE_DESC(border)
-/// @brief Size of border that needs to be added from left (in X dimension) and from top (in Y dimension).
-cldnn_tensor left_top_sizes;
-/// @brief Size of border that needs to be added from right (in X dimension) and from bottom (in Y dimension).
-cldnn_tensor right_bottom_sizes;
-/// @brief Type of border that needs to be added to the input.
-cldnn_border_type border_type;
-/// @brief Border value that is used in constant mode.
-float border_value;
-CLDNN_END_PRIMITIVE_DESC(border)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(border);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/broadcast.h b/inference-engine/thirdparty/clDNN/api/C/broadcast.h
deleted file mode 100644 (file)
index 519210f..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Broadcasts input to defined by @p broadcast_sizes output. @p broadcast_axes are used to
-///        reinterpret input (reshape) inside algorithm.
-///
-/// @details Takes input, reinterpret it according to @p broadcast_axes
-///          and copies it to output once or multiple times.
-/// @n
-/// @n Simple example with empty @p broadcast_axes. Lets assume that:
-/// @n      <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
-/// @n      <tt>broadcast_sizes = (bs_b, bs_f, bs_y, bs_x)</tt>
-/// @n      <tt>broadcast_axes = () - empty</tt>
-/// @n The input is broadcasted on each dimension where <tt>bs_{dim} > in_{dim}</tt> and <tt>bs_{dim}</tt>
-///    is dividable by <tt>in_{dim}</tt> (input is copied <tt>bs_{dim} / in_{dim}</tt> times).
-///    The dimensions where <tt>bs_{dim}</tt> is equal to <tt>in_{dim}</tt> remain unchanged.
-/// @n The resulting output will have sizes equal to @p broadcast_sizes and contains values from
-///    input that meet following criteria:
-/// @n      <tt>output[(b, f, y, x)] = input[(b % in_b, f % in_f, y % in_y, x % in_x)]</tt>
-/// @n where <tt>(b, f, y, x)</tt> is a position of value in a primitive output.
-/// @n
-/// @n More complicated example with non empty @p broadcast_axes. Lets assume that:
-/// @n      <tt>broadcast_sizes = (bs_b, bs_f, bs_y, bs_x)</tt>
-/// @n      <tt>broadcast_axes = (2)</tt>
-/// @n Taking into account broadcast_axes size (=1) primitive's input must be (4 - 1 = 3):
-/// @n      <tt>primitive input = (1, in_b, in_f, in_x)</tt>
-/// @n Due to broadcast_axes = (2) primitive will interpret input as:
-/// @n      <tt>primitive input(internal representation) = (in_b, in_f, 1, in_x)</tt>
-/// @n Now, you can apply broadcast rules from previous example to modified (reinterpreted)
-///    input and output:
-/// @n      <tt>input_sizes = (in_b, in_f, 1, in_x)</tt>
-/// @n      <tt>output_shape = (bs_b, bs_f, bs_y, bs_x)</tt>
-/// @n      <tt>broadcast_axes = () - empty</tt>
-/// @n
-/// @n@b Requirements:
-/// @n - @p broadcast_sizes must be positive on all dimensions.
-/// @n - @p broadcast_axes size (dimensions count) must be within (inclusive) range
-///      0 - 4.
-/// @n - @p broadcast_axes mustn't have duplicate values.
-/// @n - Values of @p broadcast_axes must be within (inclusive) range 0 - 3
-/// @n - @p output_shape must be greater (dividable) than or equal to reinterpreted
-///      input on all dimensions.
-/// @n Breaking any of these conditions will raise an exception.
-CLDNN_BEGIN_PRIMITIVE_DESC(broadcast)
-/// @brief Sizes of broadcast. Output size of current primitive will match broadcast sizes (layout type
-///        will not change).
-cldnn_tensor broadcast_sizes;
-/// @brief Array of axes positions from output shape (0-based, from left to right)
-///        along which broadcast should happen.
-cldnn_uint16_t_arr broadcast_axes;
-
-CLDNN_END_PRIMITIVE_DESC(broadcast)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(broadcast);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/cldnn.h b/inference-engine/thirdparty/clDNN/api/C/cldnn.h
deleted file mode 100644 (file)
index cbee0ec..0000000
+++ /dev/null
@@ -1,891 +0,0 @@
-/*
-// Copyright (c) 2016-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-// exporting symbols form dynamic library
-#ifdef EXPORT_NEURAL_SYMBOLS
-#if defined(_MSC_VER)
-//  Microsoft
-#define CLDNN_API __declspec(dllexport)
-#elif defined(__GNUC__)
-//  GCC
-#define CLDNN_API __attribute__((visibility("default")))
-#else
-#define CLDNN_API
-#pragma warning Unknown dynamic link import / export semantics.
-#endif
-#else  // import dll
-#if defined(_MSC_VER)
-//  Microsoft
-#define CLDNN_API __declspec(dllimport)
-#elif defined(__GNUC__)
-//  GCC
-#define CLDNN_API
-#else
-#define CLDNN_API
-#pragma warning Unknown dynamic link import / export semantics.
-#endif
-#endif
-
-#include <stdint.h>
-#include <stddef.h>
-
-/// @addtogroup c_api C API
-/// @{
-
-/// @defgroup c_memory Memory Management
-
-/// @defgroup c_topology Network Topology
-
-/// @defgroup c_engine Execution Engine
-
-/// @defgroup c_network Network Execution
-
-/// @defgroup c_error Error Handling
-
-/// @defgroup c_version Version Information
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @addtogroup c_error
-/// @{
-#define CLDNN_SUCCESS 0
-#define CLDNN_ERROR -1
-#define CLDNN_INVALID_ARG -2
-#define CLDNN_OUT_OF_RESOURCES -3
-#define CLDNN_DEVICE_ERROR -4
-#define CLDNN_UNSUPPORTED_SIZE -5
-#define CLDNN_UNSUPPORTED_FORMAT -6
-#define CLDNN_DIMENSION_MISMATCH -7
-#define CLDNN_ALLOC_SIZE_EXCEEDED -8
-#define CLDNN_GLOBAL_SIZE_EXCEEDED -9
-
-#define CLDNN_API_STRING_SIZE_MAX 256
-/// @brief Represents errors status for all API calls
-typedef int32_t cldnn_status;
-/// @}
-
-/// @addtogroup c_version
-/// @{
-/// @brief Represents version information of API.
-typedef struct {
-    int32_t major;     ///< Major version component (major version of clDNN API interface).
-    int32_t minor;     ///< Minor version component (minor version of API interface - correlated with IE API version).
-    int32_t build;     ///< Build version component (version/revision of official Open Source drop of clDNN library).
-    int32_t revision;  ///< Revision version component (incremental identifier of current build/compilation).
-} cldnn_version;
-/// @}
-
-/// @ingroup c_engine
-/// @brief Engine object
-typedef struct cldnn_engine_impl* cldnn_engine;
-
-/// @ingroup c_network
-/// @brief Event object
-typedef struct cldnn_event_impl* cldnn_event;
-
-/// @ingroup c_topology
-/// @brief Network topology to be defined by user
-typedef struct cldnn_topology_impl* cldnn_topology;
-
-/// @ingroup c_program
-/// @brief Compiled program build from @ref cldnn_topology by @ref cldnn_engine
-typedef struct cldnn_program_impl* cldnn_program;
-
-/// @ingroup c_network
-/// @brief Executable network allocated from @ref cldnn_program
-typedef struct cldnn_network_impl* cldnn_network;
-
-/// @ingroup c_memory
-/// @brief Memory object
-typedef struct cldnn_memory_impl* cldnn_memory;
-
-/// @addtogroup c_engine
-/// @{
-
-/// @brief Defines available engine types
-typedef enum /*:int32_t*/ {
-    cldnn_engine_ocl  ///< OpenCL engine
-} cldnn_engine_type;
-
-/// @brief Priority modes.
-typedef enum /*:int16_t*/ {
-    cldnn_priority_disabled,
-    cldnn_priority_low,
-    cldnn_priority_med,
-    cldnn_priority_high
-} cldnn_priority_mode_type;
-
-/// @brief Throttle modes.
-typedef enum /*:int16_t*/ {
-    cldnn_throttle_disabled,
-    cldnn_throttle_low,
-    cldnn_throttle_med,
-    cldnn_throttle_high
-} cldnn_throttle_mode_type;
-
-/// @brief Configuration parameters for created engine.
-typedef struct {
-    uint32_t enable_profiling;                           ///< Enable per-primitive profiling.
-    uint32_t meaningful_kernels_names;                   ///< Generate meaniful names fo OpenCL kernels.
-    uint32_t dump_custom_program;                        ///< dump the custom generated program to files
-    const char* compiler_options;                        ///< OpenCL compiler options string.
-    const char* single_kernel_name;                      ///< If provided, runs specific layer.
-    uint32_t enable_parallelisation;                     ///< Enables parallel execution of primitives which don't depend on each other. Disabled by default.
-    const char* engine_log;                              ///< Specifies a file to which engine log should be dumped. Null/empty values means no logging.
-    const char* sources_dumps_dir;                       ///< Specifies a directory where sources of cldnn::program objects should be dumped.
-                                                         ///< Null/empty values means no loggins.
-    /*cldnn_priority_mode_type*/ int16_t priority_mode;  ///< Priority mode (support of OpenCL priority hints in command queue).
-    /*cldnn_throttle_mode_type*/ int16_t throttle_mode;  ///< Throttle mode (support of throttle hints in command queue).
-    uint32_t enable_memory_pool;                         ///< Enables memory usage optimization. memory objects will be reused when possible.
-    uint16_t n_streams;                                  ///< Number of queues executed in parallel
-    void* context;
-    const char* tuning_cache_path;  ///< Enables defining other than default path to tuning cache json
-} cldnn_engine_configuration;
-
-/// @brief Information about the engine returned by cldnn_get_engine_info().
-typedef struct {
-    uint32_t cores_count;     ///< Number of available HW cores.
-    uint32_t core_frequency;  ///< Clock frequency in MHz.
-
-    uint64_t max_work_group_size;  ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model.
-    uint64_t max_local_mem_size;   ///< Maximum size of local memory arena in bytes.
-    uint64_t max_global_mem_size;  ///< Maximum size of global device memory in bytes.
-    uint64_t max_alloc_mem_size;   ///< Maximum size of memory object allocation in bytes.
-
-    uint64_t max_image2d_width;   ///< Maximum image 2d width supported by the device.
-    uint64_t max_image2d_height;  ///< Maximum image 2d height supported by the device.
-
-    // Flags (for layout compatibility fixed size types are used).
-    uint8_t supports_fp16;             ///< Does engine support FP16.
-    uint8_t supports_fp16_denorms;     ///< Does engine support denormalized FP16.
-    uint8_t supports_subgroups_short;  ///< Does engine support cl_intel_subgroups_short.
-    uint8_t supports_image;            ///< Does engine support images (CL_DEVICE_IMAGE_SUPPORT cap).
-
-    uint8_t supports_imad;   ///< Does engine support int8 mad.
-    uint8_t supports_immad;  ///< Does engine support int8 multi mad.
-
-    char ocl_device_name[CLDNN_API_STRING_SIZE_MAX];        ///< Device ID string
-    char ocl_driver_version[CLDNN_API_STRING_SIZE_MAX];  ///< Version of OpenCL driver
-} cldnn_engine_info;
-/// @}
-
-/// @addtogroup c_network
-/// @{
-
-/// @brief user-defined event handler callback.
-typedef void (*cldnn_event_handler)(void*);
-
-/// @brief Profiling information for an executed network primitive.
-/// @details Every @ref cldnn_event associated with @ref cldnn_network_output.
-/// can contain one or more profiling information intervals.
-typedef struct {
-    const char* name;  ///< Profiling interval name.
-    uint64_t nanoseconds;
-} cldnn_profiling_interval;
-
-/// @brief Network build option types.
-typedef enum /*:int32_t*/ {
-    cldnn_build_option_fusing,               ///< Allow primitives fusing during network build.
-    cldnn_build_option_optimize_data,        ///< Enable implicit reordering for user input.
-    cldnn_build_option_debug,                ///< Enable debug mode.
-    cldnn_build_option_outputs,              ///< User selected list of network outputs.
-    cldnn_build_option_tuning_config,        ///< Tuning config.
-    cldnn_build_option_graph_dumps_dir,      ///< Specifies a directory to which stages of network compilation should be dumped.
-    cldnn_build_option_serialization,        ///< Specifies a name of files to which serialization should be dumped.
-    cldnn_build_option_load_program,         ///< Specifies a name of load_program process.
-    cldnn_build_option_learning_config,      ///< User defined learning parameters.
-    cldnn_build_option_detection_output_gpu  ///< Run detection output layer always on GPU, regardless performance
-} cldnn_build_option_type;
-
-/// @brief Tuning modes.
-typedef enum /*:int32_t*/ {
-    cldnn_tuning_disabled,        ///< Tuning is disabled.
-    cldnn_tuning_use_cache,       ///< Tuning using the cached data (no on-line tuning for non-existing data).
-    cldnn_tuning_tune_and_cache,  ///< Tuning using the cached data if exist, tune and update cache otherwise.
-} cldnn_tuning_mode_type;
-
-/// @brief Tuning config.
-struct cldnn_tuning_config {
-    const int32_t mode;           ///< #cldnn_tuning_mode_type.
-    const char* cache_file_path;  ///< A path to the tuning cache file.
-};
-
-/// @brief Learning params.
-struct cldnn_learning_params {
-    const float momentum;
-    const float weights_decay;
-};
-
-/// @brief Represents network build option.
-typedef struct {
-    int32_t type;      ///< #cldnn_build_option_type.
-    const void* data;  ///< option parameter - e.g list of outputs.
-} cldnn_build_option;
-
-/// @brief Output information for executed @a cldnn_network.
-/// @details User should wait for event before accessing the memory.
-typedef struct {
-    cldnn_event event;    ///< Event to be waited.
-    cldnn_memory memory;  ///< Output memory.
-                          ///< User should wait for the event before access this field.
-} cldnn_network_output;
-
-/// @}
-
-/// @addtogroup c_memory
-/// @{
-
-/// @brief Represents memory formats (orders).
-/// @n In CNN most of data is describe as 4 dimensional blocks. In Intel(R) clDNN library we describe memory with 4 letters
-/// - b - number of blocks in batch. For weights formats: output features - conv, neurons - inner product
-/// - f - number of feature maps, features or channels. For weights formats: input features - conv, inputs, inner product
-/// - x - spatial, width
-/// - y - spatial, height
-/// /n
-/// For explanation how each format type is implemented in memory we will use naming shown bellow (b=2,f=3,y=3,x=3):
-/// \image html layout_memory_representation.jpg
-typedef enum /*:int32_t*/ {
-    // Data formats
-    cldnn_format_yxfb,                       ///< batch first, feature and than spatials \n \image html yxfb.jpg
-    cldnn_format_byxf,                       ///< used in bitmaps, input from user i.e b images of RGB format \n \image html byxf.jpg
-    cldnn_format_bfyx,                       ///< the most common format for activations in clDNN. \n \image html bfyx.jpg
-    cldnn_format_fyxb,                       ///< format not used inside clDNN, but supported in reorder as extension for user provided formats.
-    cldnn_format_bfyx_f16,                   ///< format used for blocked convolution
-    cldnn_format_bs_xs_xsv8_bsv8,            ///< format used only for fully connected weights: bs - batch slice, xs - x slice,
-                                             ///< bsv8 - 8 values of single slice.
-                                             ///< \n \image html bs_xs_xsv8_bsv8.jpg
-    cldnn_format_bs_xs_xsv8_bsv16,           ///< format used only for fully connected weights: bs - batch slice, xs - x slice,
-                                             ///< bsv16 - 16 values of single slice.
-                                             ///< \n \image html bs_xs_xsv8_bsv16.jpg
-    cldnn_format_bs_x_bsv16,                 ///< format used only for fully connected weights fp16 batch=1 : bs - batch slice (responses slice),
-                                             ///< bsv16 - 16 values of single batch slice, x - flattened plane of (fyx).
-                                             ///< \n \image html bs_x_bsv16.jpg
-    cldnn_format_bf8_xy16,                   ///< format used only for convolution 1x1 input, xy aligned to 16, f aligned to 8
-                                             ///< \n \image html bf8_xy16.jpg
-    cldnn_format_b_fs_yx_32fp,               ///< format for data for binary convolutions
-    cldnn_format_winograd_2x3_s1_data,       ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1
-    cldnn_format_byxf_af32,                  ///< format for input for primitives using MMAD
-    cldnn_format_byx8_f4,                    ///< format for input for MMAD convolutions
-    cldnn_format_fs_bs_yx_bs4_fs32,          ///< format for batched input for primitives using MMAD
-    cldnn_format_b_fs_yx_fsv4,               ///< format for input for IMAD convolutions
-    cldnn_format_bfzyx,                      ///< format for 3D convolutions
-    cldnn_format_bfwzyx,                     ///< 6D format
-    cldnn_format_fs_b_yx_fsv32,              ///< format for fp16 convolutions using 32 features
-
-    // Weights formats
-    cldnn_format_o_i_yx_i16_o16,                          ///< format used for blocked convolution
-    cldnn_format_os_iyx_osv16,                            ///< format used only for convolution weights: os - output feature maps slice,
-                                                          ///< i - input feature maps, yx - spatials, sv16 - 16 values of single slice.
-                                                          ///< \n \image html os_iyx_osv16.jpg
-    cldnn_format_oiyx_o16,                                ///< format used only for convolution weights: os - output feature maps slice,
-                                                          ///< i - input feature maps, yx - spatials, sv16 - 16 values of single slice.
-    cldnn_format_os_iyx_osv32,                            ///< format used only for convolution weights: os - output feature maps slice,
-                                                          ///< i - input feature maps, yx - spatials, sv32 - 32 values of single slice.
-    cldnn_format_os_iyx_osv64,                            ///< format used only for convolution weights: os - output feature maps slice,
-                                                          ///< i - input feature maps, yx - spatials, sv64 - 64 values of single slice.
-    cldnn_format_image_2d_weights_c4_fyx_b,               ///< image format for weights, image 2d, 4-channel,
-                                                          ///< width size is f*y*x/4 (4-channels filled with fyx data), height is b
-                                                          ///< \n \image html image_2d_weights_c4_fyx_b.jpg
-    cldnn_format_image_2d_weights_c1_b_fyx,               ///< image format for weights, image 2d, single channel, width size is b, height is f*y*x
-                                                          ///< \n \image html image_2d_weights_c1_b_fyx.jpg
-    cldnn_format_winograd_2x3_s1_weights,                 ///< format used for weights for winograd non-fused convolution, F(2,3) -- filter 3x3 with stride 1
-    cldnn_format_winograd_2x3_s1_fused_weights,           ///< format used for weights for winograd fused convolution, F(2,3) -- filter 3x3 with stride 1
-    cldnn_format_winograd_6x3_s1_fused_weights,           ///< format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
-    cldnn_format_image_2d_weights_winograd_6x3_s1_fbxyb,  ///< image format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
-    cldnn_format_image_2d_weights_winograd_6x3_s1_xfbyb,  ///< image format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
-    cldnn_format_os_is_yx_isa8_osv8_isv4,                 ///< format for weights for MMAD convolutions,
-                                                          ///< stored as ((aligned_to_8(O)/8) * (aligned_to_32(I)/32) * Y * X * ( 8 ) * ( 8 ) * ( 4 )
-    cldnn_format_os_is_yx_isa8_osv8_isv4_swizzled_by_4,   ///< format for weights for MMAD convolutions
-    cldnn_format_is_o_yx_isv32,                           ///< format for weights for 1x1 MMAD convolutions
-    cldnn_format_is_o32_yx_isv32_swizzled_by_4,           ///< format for weights for 1x1 MMAD convolutions
-    cldnn_format_os_is_y_x8_osv8_isv4,                    ///< format for weights for MMAD convolutions
-    cldnn_format_os_is_y_x8_osv8_isv4_swizzled_by_4,      ///< format for weights for MMAD convolutions
-    cldnn_bf_lyx_yx,                                      ///< format for local convolution weights
-    cldnn_format_os_is_yx_osv16_isv4,                     ///< format for weights for IMAD convolutions
-    cldnn_format_os_is_yx_osv32_isv32p,                   ///< format for weights for binary convolutions
-    cldnn_format_format_num,                              ///< number of format types
-    cldnn_format_any = -1
-} cldnn_format_type;
-
-#define CLDNN_FLOAT_TYPE_MASK 0x80
-#define CLDNN_UINT_TYPE_MASK 0x40
-#define CLDNN_BIN_TYPE_MASK 0x20
-
-#define CLDNN_TENSOR_BATCH_DIM_MAX 1
-#define CLDNN_TENSOR_FEATURE_DIM_MAX 1
-#define CLDNN_TENSOR_SPATIAL_DIM_MAX 4
-#define CLDNN_TENSOR_LOCAL_DIM_MAX 2
-#define CLDNN_TENSOR_DIM_MAX 8
-
-/// @brief N-dimensional vector. Mostly used to represent memory size.
-typedef struct {
-    size_t batch_num;
-    size_t feature_num;
-    size_t spatial_num;
-    size_t local_num;
-    int32_t sizes[CLDNN_TENSOR_DIM_MAX];
-} cldnn_tensor;
-
-/// @brief Padding information.
-typedef struct {
-    cldnn_tensor lower_size;  ///< Lower padding sizes. For spatials, it means size of left (X) and top (Y) padding.
-    cldnn_tensor upper_size;  ///< Upper padding sizes. For spatials, it means size of right (X) and bottom (Y) padding.
-    float filling_value;      ///< Filling value for an element of padding. If data type of elements is different than float it is converted
-                              ///< to it using round-towards-nearest-even (for floating-point data types) or round-towards-zero (for integral
-                              ///< data types).
-} cldnn_padding;
-
-/// @brief Data type stored in memory.
-typedef enum /*:size_t*/ {
-    cldnn_bin = sizeof(int32_t) | CLDNN_BIN_TYPE_MASK,
-    cldnn_u8 = sizeof(uint8_t) | CLDNN_UINT_TYPE_MASK,
-    cldnn_i8 = sizeof(int8_t),
-    cldnn_f16 = sizeof(int16_t) | CLDNN_FLOAT_TYPE_MASK,
-    cldnn_f32 = sizeof(float) | CLDNN_FLOAT_TYPE_MASK,
-    cldnn_i32 = sizeof(int32_t),
-    cldnn_i64 = sizeof(int64_t)
-} cldnn_data_type;
-
-/// @brief Memory layout description.
-typedef struct {
-    size_t data_type;       ///< data type (@ref cldnn_data_type) stored in memory.
-    int32_t format;         ///< Memor format (@ref cldnn_format_type)
-    cldnn_tensor size;      ///< N-dimensional vector describes size (in elements) of memory (excluding padding).
-    cldnn_padding padding;  ///< Explicitly added padding to memory buffer.
-} cldnn_layout;
-/// @}
-
-/// @addtogroup c_topology
-/// @{
-
-/// @brief Represents reference to an array of floats.
-typedef struct {
-    const float* data;  ///< Pointer to float array.
-    size_t size;        ///< Size (in floats) of the array.
-} cldnn_float_arr;
-
-/// @brief Represents reference to an array of uint16_t.
-typedef struct {
-    const uint16_t* data;  ///< Pointer to uint16_t array.
-    size_t size;           ///< Size (in uint16_t) of the array.
-} cldnn_uint16_t_arr;
-
-/// @brief Represents reference to an array of uint8_t.
-typedef struct {
-    const uint8_t* data;  ///< Pointer to uint8_t array.
-    size_t size;          ///< Size (in uint8_t) of the array.
-} cldnn_uint8_t_arr;
-
-/// @brief Represents reference to an array of tensor.
-typedef struct {
-    const cldnn_tensor* data;  ///< Pointer to tensor array.
-    size_t size;               ///< Size (in tensor) of the array.
-} cldnn_tensor_arr;
-
-/// @brief Globally unique primitive's type id
-typedef const struct cldnn_primitive_type* cldnn_primitive_type_id;
-
-/// @brief Unique @p id of a primitive within a topology.
-typedef const char* cldnn_primitive_id;
-
-/// @brief Represents reference to an array of primitive ids.
-typedef struct {
-    const cldnn_primitive_id* data;  ///< Pointer to ids array.
-    size_t size;                     ///< Number of ids in the array.
-} cldnn_primitive_id_arr;
-
-/// @brief Detailed information about program primitives after a graph optimization step.
-typedef struct {
-    cldnn_primitive_id original_id;
-    const char* type_id;
-    cldnn_primitive_id_arr dependencies;
-    cldnn_primitive_id_arr users;
-    cldnn_primitive_id_arr fused_ids;
-    cldnn_layout output_layout;
-    const char* kernel_id;
-    const char* layout_str;
-    int is_cpu;
-    int exec_id;
-} cldnn_primitive_info;
-
-typedef struct {
-    cldnn_data_type data_type;
-    // No bool type available...
-    char enabled;
-} cldnn_optional_data_type;
-
-/// @brief Custom primitive kernel source code
-typedef const char* cldnn_kernel_code;
-/// @brief Custom primitive kernel source code array
-typedef cldnn_kernel_code* cldnn_kernels_code;
-/// @brief Custom primitive kernel entry point
-typedef const char* cldnn_kernel_entry_point;
-/// @brief Custom primitive kernel build options
-typedef const char* cldnn_kernel_build_options;
-/// @brief Custom primitive kernel workgroup sizes
-typedef const size_t* cldnn_work_group_sizes;
-
-/// @brief Custom primitive kernel argument type
-typedef enum cldnn_arg_type_t {
-    arg_input,
-    arg_output,
-} cldnn_arg_type;
-
-/// @brief Custom primitive kernel argument index
-typedef uint32_t cldnn_arg_index;
-
-/// @brief Custom primitive kernel argument type
-typedef struct cldnn_arg_t {
-    cldnn_arg_type arg_type;
-    cldnn_arg_index index;
-} cldnn_arg;
-
-/// @brief Custom primitive kernel argument array
-typedef const cldnn_arg* cldnn_kernel_arguments;
-
-/// @brief activation functions
-typedef enum cldnn_activation_func_t {
-    activation_none,                 // val
-    activation_logistic,             // 1/(1 + exp(-val))
-    activation_hyperbolic_tan,       // tanh(val)
-    activation_relu,                 // max(0, val)
-    activation_relu_negative_slope,  // max(0, val) + a * min(0, val)    (a is additional param)
-    activation_clamp,                // max(a, min(b, val)               (a,b are additional param)
-    activation_softrelu,             // log(1 + exp(val))
-    activation_abs,                  // abs(val)
-    activation_linear,               // a*val + b                        (a,b are additional params)
-    activation_square,               // val*val
-    activation_sqrt,                 // sqrt(val)
-    activation_elu,                  // max(0, val) + a * (exp(min(0, val) - 1) (a is additional param)
-    activation_sin,                  // sin(val)
-    activation_asin,                 // asin(val)
-    activation_sinh,                 // sinh(val)
-    activation_asinh,                // asinh(val)
-    activation_cos,                  // cos(val)
-    activation_acos,                 // acos(val)
-    activation_cosh,                 // cosh(val)
-    activation_acosh,                // acosh(val)
-    activation_log,                  // log(val)
-    activation_log2,                 // log2(val)
-    activation_exp,                  // exp(val)
-    activation_tan,                  // tan(val)
-    activation_atan,                 // atan(val)
-    activation_atanh,                // atanh(val)
-    activation_floor,                // floor(val)
-    activation_ceil,                 // ceil(val)
-    activation_negative,             // -val
-    activation_not,                  // !val
-    activation_pow,                  // pow(val, a)
-    activation_reciprocal,           // (1/val)
-    activation_erf,                  // Gauss error function
-    activation_hard_sigmoid,         // max(0, min(1, a * val + b))       (a,b are additional params)
-    activation_selu,                 // for val <= 0: b * (a * e^val - a); for val > 0: b * val (a,b are additional params)
-    activation_sign,                 // val > 0: 1; val < 0: -1; val == 0: 0
-    activation_softplus,             // ln(exp(val) + 1)
-    activation_softsign              // (val/(1+|val|))
-} cldnn_activation_func;
-
-/// @brief activation gradient functions
-typedef enum cldnn_activation_grad_func_t {
-    activation_grad_none,                 // val
-    activation_grad_relu,                 // val * (input > 0)
-    activation_grad_relu_negative_slope,  // val * ((input > 0) + a * (input <= 0)    (a is additional param)
-} cldnn_activation_grad_func;
-
-/// @brief activation additional params
-typedef struct cldnn_activation_additional_params_t {
-    float a, b;
-} cldnn_activation_additional_params;
-
-/// @brief Axis which index_select primitive will index.
-typedef enum index_select_axis_name_t {
-    along_b,
-    along_f,
-    along_y,
-    along_x
-} index_select_axis_name;
-
-/// @brief  Axis which index_select primitive will index array
-typedef const index_select_axis_name* index_select_axis_name_arr;
-
-/// @brief reorder mean operation modes
-typedef enum cldnn_reorder_mean_mode_t {
-    mean_none,      // val
-    mean_subtract,  // val - mean
-    mean_mul,       // val * mean
-    mean_div,       // val/mean
-} cldnn_reorder_mean_mode;
-
-/// @brief Begin primitive description definition
-/// @details Defines @p 'cldnn_primitive_type_desc' structure with first 5 fields
-/// common for all primitive descriptors. Other fields should be added after this macro.
-/// primitive descriptor definition should be closed by @ref CLDNN_END_PRIMITIVE_DESC.
-#define CLDNN_BEGIN_PRIMITIVE_DESC(PType)                                                                \
-    struct cldnn_##PType##_desc {                                                                        \
-        cldnn_primitive_type_id type;              /**< @brief Primitive type identificator. */          \
-        cldnn_primitive_id id;                     /**< @brief Primitive id unique within a topology. */ \
-        cldnn_primitive_id_arr input;              /**< @brief Input primitives ids. */                  \
-        cldnn_padding output_padding;              /**< @brief Output padding information. */            \
-        cldnn_optional_data_type output_data_type; /**< @brief If specified, describes an explicit change of the output precision of the primitive. */
-
-/// @brief Close primitive descriptor definition.
-#define CLDNN_END_PRIMITIVE_DESC(PType) \
-    };
-
-#define CLDNN_PRIMITIVE_DESC(PType) cldnn_##PType##_desc
-
-/// @brief Basic primitive descriptor structure.
-CLDNN_BEGIN_PRIMITIVE_DESC(primitive)
-CLDNN_END_PRIMITIVE_DESC(primitive)
-
-/// @}
-
-/// @addtogroup c_version
-/// @{
-/// @brief Get information about version of clDNN.
-CLDNN_API cldnn_version cldnn_get_version(cldnn_status* status);
-/// @}
-
-/// @addtogroup c_topology
-/// @{
-
-/// @brief Create empty network topology
-CLDNN_API cldnn_topology cldnn_create_topology(cldnn_status* status);
-
-/// @brief Add new primitive to the topology.
-/// @param[in] dto The pointer to a structure defined by @ref CLDNN_BEGIN_PRIMITIVE_DESC and @ref CLDNN_END_PRIMITIVE_DESC
-CLDNN_API void cldnn_add_primitive(cldnn_topology topology, const struct CLDNN_PRIMITIVE_DESC(primitive) * dto, cldnn_status* status);
-
-/// @brief Change input layout of the topology.
-/// @param[in] id of the input layout in the topology
-/// @param[in] new_layout of the input layout
-CLDNN_API void cldnn_change_input_layout(cldnn_topology topology, cldnn_primitive_id id, cldnn_layout new_layout, cldnn_status* status);
-
-/// @brief Return all primitives id from topology.
-/// @details Function fills user provided buffer by primitive ids. Each id is followed by '\0'.
-/// @param[in] ids Pointer to user-allocated buffer to store names.
-/// @param[in] size Size (in chars) of the buffer.
-/// @param[out] size_ret Required size (in chars) to store result.
-CLDNN_API void cldnn_get_primitive_ids(cldnn_topology topology, char* ids, size_t size, size_t* size_ret, cldnn_status* status);
-
-/// @brief Increment reference counter for the topology object.
-CLDNN_API void cldnn_retain_topology(cldnn_topology topology, cldnn_status* status);
-
-/// @brief Decrement reference counter for the topology object. Deletes object when counter becomes zero.
-CLDNN_API void cldnn_release_topology(cldnn_topology topology, cldnn_status* status);
-/// @}
-
-/// @addtogroup c_engine
-/// @{
-
-/// @brief number of available engines of the particular type
-CLDNN_API uint32_t cldnn_get_engine_count(/*cldnn_engine_type*/ int32_t type, cldnn_status* status);
-
-/// @brief Release pending memory allocated in OpenCL context.
-/// @param[in] type Engine type @ref cldnn_engine_type. Only OCL engine is supported.
-/// @details OpenCL does not guarantee that the memory will be released (even with cl:Buffers releaed).
-/// Use this function to force releasing whole pending memory.
-CLDNN_API void cldnn_release_pending_memory(cldnn_engine engine, uint16_t stream_id, cldnn_status* status);
-
-/// @brief Create new engine of the specified @p type, @p engine_num, and @p configuration options.
-/// @param[in] type Engine type @ref cldnn_engine_type. Only OCL engine is supported.
-/// @param[in] engine_num Engine index. Should be 0.
-/// @param[in] configuration Pointer to engine configuration options.
-CLDNN_API cldnn_engine cldnn_create_engine(
-    /*cldnn_engine_type*/ int32_t type,
-    uint32_t engine_num,
-    const cldnn_engine_configuration* configuration,
-    cldnn_status* status);
-
-/// @brief Increment reference counter for the engine object.
-CLDNN_API void cldnn_retain_engine(cldnn_engine engine, cldnn_status* status);
-
-/// @brief Decrement reference counter for the engine object. Deletes object when counter becomes zero.
-CLDNN_API void cldnn_release_engine(cldnn_engine engine, cldnn_status* status);
-
-/// @brief Returns engine information. See @ref cldnn_engine_info for details.
-CLDNN_API cldnn_engine_info cldnn_get_engine_info(cldnn_engine engine, cldnn_status* status);
-
-/// @brief Returns the @ref cldnn_engine_type for the particular engine
-CLDNN_API /*cldnn_engine_type*/ int32_t cldnn_get_engine_type(cldnn_engine engine, cldnn_status* status);
-
-/// @brief Returns total size of all resources allocated using given engine
-CLDNN_API int64_t cldnn_get_temp_used_device_memory_size(cldnn_engine engine, cldnn_status* status);
-/// @}
-
-/// @brief Returns max size of resources allocated using given engine
-CLDNN_API int64_t cldnn_get_max_used_device_memory_size(cldnn_engine engine, cldnn_status* status);
-
-/// @addtogroup c_network
-/// @{
-
-/// @brief Creates an event which can be set by user.
-CLDNN_API cldnn_event cldnn_create_user_event(cldnn_engine engine, uint16_t stream_id, cldnn_status* status);
-
-/// @brief Checks if an event was created by user.
-CLDNN_API int32_t cldnn_is_user_event(cldnn_event event, cldnn_status* status);
-
-/// @brief Increment reference counter for the event object.
-CLDNN_API void cldnn_retain_event(cldnn_event event, cldnn_status* status);
-
-/// @brief Decrement reference counter for the event object. Deletes object when counter becomes zero.
-CLDNN_API void cldnn_release_event(cldnn_event event, cldnn_status* status);
-
-/// @brief Waits for event completion or error.
-CLDNN_API void cldnn_wait_for_event(cldnn_event event, cldnn_status* status);
-
-/// @brief Set event status to @p completed.
-CLDNN_API void cldnn_set_event(cldnn_event event, cldnn_status* status);
-
-/// @brief Register call back to be called on event completion.
-/// @param[in] handler Pointer to @ref cldnn_event_handler call-back function.
-/// @param[in] param user-defined value to be passed to the call back function.
-CLDNN_API void cldnn_add_event_handler(cldnn_event event, cldnn_event_handler handler, void* param, cldnn_status* status);
-
-/// @brief Returns the profiling information for an network primitive associated with event.
-/// @param[in] profiling Pointer to the array of @ref cldnn_profiling_interval where information to be stored.
-/// @param[in] size Number of elements in the array of @ref cldnn_profiling_interval.
-/// @param[out] size_ret Number of elements required to store profiling information.
-CLDNN_API void cldnn_get_event_profiling_info(cldnn_event event, cldnn_profiling_interval* profiling, size_t size, size_t* size_ret, cldnn_status* status);
-/// @}
-
-/// @addtogroup c_program
-/// @{
-
-/// @brief Builds executable program based on user-defined @p topology by specified @p engine.
-/// @param[in] engine The engine which will be used to build the program.
-/// @param[in] topology The user-defined topology on which the network will be based.
-/// @param[in] options The pointer of array of @ref cldnn_build_option which define network build options.
-/// @param[in] options_num Number of elements in the @p options array.
-CLDNN_API cldnn_program cldnn_build_program(
-    cldnn_engine engine,
-    cldnn_topology topology,
-    cldnn_build_option* options,
-    size_t options_num,
-    cldnn_status* status);
-
-/// @brief Increment reference counter for the program object.
-CLDNN_API void cldnn_retain_program(cldnn_program program, cldnn_status* status);
-
-/// @brief Decrement reference counter for the program object. Deletes object when counter becomes zero.
-CLDNN_API void cldnn_release_program(cldnn_program program, cldnn_status* status);
-/// @}
-
-/// @addtogroup c_network
-/// @{
-
-/// @brief Builds and allocates executable network based on user-defined @p topology by specified @p engine.
-/// This is a shorthand for cldnn_build_program and cldnn_allocate_network.
-/// @param[in] engine The engine which will be used to build the metwork.
-/// @param[in] topology The user-defined topology on which the network will be based.
-/// @param[in] options The pointer of array of @ref cldnn_build_option which define network build options.
-/// @param[in] options_num Number of elements in the @p options array.
-CLDNN_API cldnn_network cldnn_build_network(
-    cldnn_engine engine,
-    cldnn_topology topology,
-    cldnn_build_option* options,
-    size_t options_num,
-    cldnn_status* status);
-
-/// @brief Allocates memory for a new network which will be able to execute specified @p program.
-/// @param[in] program The program object which holds binaries compiled from some topology and engine. Multiple network objects can share the same program.
-CLDNN_API cldnn_network cldnn_allocate_network(cldnn_program program, uint16_t stream_id, cldnn_status* status);
-
-/// @brief Increment reference counter for the network object.
-CLDNN_API void cldnn_retain_network(cldnn_network network, cldnn_status* status);
-
-/// @brief Decrement reference counter for the network object. Deletes object when counter becomes zero.
-CLDNN_API void cldnn_release_network(cldnn_network network, cldnn_status* status);
-
-/// @brief Provides user input data to the network (for @p input_layout primitives).
-/// @param[in] id Primitive @p id of @p input_layout primitive defined in @p topology.
-/// @param[in] mem Memory object with user data which @p layout matches the @p input_layout defined in @p topology.
-/// @details User should set the input data for every @p input_layout primitive defined in @p topology
-/// by calling this function before call to cldnn_execute_network().
-CLDNN_API void cldnn_set_network_input(cldnn_network network, cldnn_primitive_id id, cldnn_memory mem, cldnn_status* status);
-
-/// @brief Sets learning rate for training primitives in network.
-/// @param[in] lr Learning rate.
-CLDNN_API void cldnn_set_learning_rate(cldnn_network network, float lr, cldnn_status* status);
-
-/// @brief Returns learning rate value.
-CLDNN_API float cldnn_get_learning_rate(cldnn_network network, cldnn_status* status);
-
-/// @brief Returns information about particular primitive.
-/// @details Function fills user provided buffer by primitive description.
-/// @param[in] id Primitive @p id of @p input_layout primitive defined in @p topology.
-/// @param[in] info Pointer to user-allocated buffer to store names.
-/// @param[in] size Size (in chars) of the buffer.
-/// @param[out] size_ret Required size (in chars) to store result.
-/// @returns pointer to array of chars with detailed information about particular primitive.
-CLDNN_API void cldnn_get_primitive_info(cldnn_network network, cldnn_primitive_id id, char* info, size_t size, size_t* size_ret, cldnn_status* status);
-
-/// @brief Returns @p engine associated with the @p network.
-CLDNN_API cldnn_engine cldnn_get_network_engine(cldnn_network network, cldnn_status* status);
-
-/// @brief Returns @p program associated with the @p network.
-CLDNN_API cldnn_program cldnn_get_network_program(cldnn_network network, cldnn_status* status);
-
-/// @brief Returns names of network outputs.
-/// @details Function fills user provided buffer by primitive names. Each name is followed by '\0'.
-/// Empty name "\0\0" means end of data.
-/// @param[in] names Pointer to user-allocated buffer to store names.
-/// @param[in] size Size (in chars) of the buffer.
-/// @param[out] size_ret Required size (in chars) to store result.
-CLDNN_API void cldnn_get_network_output_names(cldnn_network network, char* names, size_t size, size_t* size_ret, cldnn_status* status);
-
-/// @brief Returns names of executed primitives.
-/// @details Function fills user provided buffer by primitive names. Each name is followed by '\0'.
-/// Empty name "\0\0" means end of data.
-/// @param[in] names Pointer to user-allocated buffer to store names.
-/// @param[in] size Size (in chars) of the buffer.
-/// @param[out] size_ret Required size (in chars) to store result.
-CLDNN_API void cldnn_get_network_executed_primitive_names(cldnn_network network, char* names, size_t size, size_t* size_ret, cldnn_status* status);
-
-/// @brief Returns names of all primitives in network.
-/// @details Function fills user provided buffer by primitive names. Each name is followed by '\0'.
-/// Empty name "\0\0" means end of data.
-/// @param[in] names Pointer to user-allocated buffer to store names.
-/// @param[in] size Size (in chars) of the buffer.
-/// @param[out] size_ret Required size (in chars) to store result.
-CLDNN_API void cldnn_get_network_all_primitive_names(cldnn_network network, char* names, size_t size, size_t* size_ret, cldnn_status* status);
-
-/// @brief Returns names of all primitives in network before graph optimization.
-/// @details Function fills user provided buffer by primitive names. Each name is followed by '\0'.
-/// Empty name "\0\0" means end of data.
-/// @param[in] names Pointer to user-allocated buffer to store names.
-/// @param[in] size Size (in chars) of the buffer.
-/// @param[out] size_ret Required size (in chars) to store result.
-CLDNN_API void cldnn_get_network_all_primitive_org_names(cldnn_network network, char* names, size_t size, size_t* size_ret, cldnn_status* status);
-
-/// @brief Executes network.
-/// @details User should call cldnn_set_network_input() for every @p input_layout defined in tho source @p topology.
-/// Function returns immediately, even if @p dependencies are not set yet.
-/// @params dependencies Pointer to an array of @ref cldnn_events to be waited for network execution.
-/// @param deps_num Number of elements in the @p dependencies array.
-CLDNN_API void cldnn_execute_network(cldnn_network network, cldnn_event* dependencies, size_t deps_num, cldnn_status* status);
-
-/// @brief Returns executed network output information.
-/// @details User should call this function after cldnn_execute_network() to get result of network execution.
-/// @param name Output name to get the result.
-/// @returns @ref cldnn_network_output structure with the output information.
-/// To work with the result of this function, user should first wait for cldnn_network_output::event
-/// before getting an access to cldnn_network_output::memory.
-CLDNN_API cldnn_network_output cldnn_get_network_output(cldnn_network network, const char* name, cldnn_status* status);
-
-/// @brief Returns @ref memory corresponding to output with @p name.
-/// @details User can call this function even before calling cldnn_execute_network(), but then content of memory is uninitialized.
-/// @param name Output name to get the result.
-/// @returns @ref cldnn_memory structure with the output information.
-CLDNN_API cldnn_memory cldnn_get_network_output_memory(cldnn_network network, const char* name, cldnn_status* status);
-
-/// @brief Returns @ref event corresponding to output with @p name.
-/// @details User can call this function even before calling cldnn_execute_network(), but then content of memory is uninitialized.
-/// @param name Output name to get the result.
-/// @returns @ref cldnn_event structure with the output information.
-CLDNN_API cldnn_event cldnn_get_network_output_event(cldnn_network network, const char* name, cldnn_status* status);
-
-/// @brief Returns description of final runtime graph
-/// @details Function fills user provided buffer by primitive_info structures.
-/// Should be called after network compilation.
-/// @param[in] info array with the runtime information for each primitive.
-/// @param[in] size Elements count in the buffer.
-/// @param[out] size_ret Required size of array (in elements) to store result.
-CLDNN_API void cldnn_get_primitives_info(cldnn_network event, const cldnn_primitive_info** info, size_t size,
-                                         size_t* size_ret, cldnn_status* status);
-
-/// @brief Returns description of all optimization stages
-/// @details Function fills user provided buffer by primitive_info structures.
-/// Should be called after network compilation.
-/// @param[in] info array with the runtime information for each primitive after all optimization passes.
-/// @param[in] pass_sizes array with descriptors count on each step.
-/// @param[in] pass_names array with names on each step.
-/// @param[in] total_size Elements count in the buffer.
-/// @param[out] total_size_ret Required size of info array (in elements) to store result for all steps.
-/// @param[out] pass_count_ret Required size of steps_size array (in elements) to store step lengths in info array.
-/// @param[out] pass_names_total_size_ret Required size of step_names array (in char elements) to store step names.
-CLDNN_API void cldnn_get_optimizer_passes_info(cldnn_network network,
-                                               const cldnn_primitive_info** info, int* pass_sizes, char* pass_names,
-                                               size_t total_size,
-                                               size_t* total_size_ret, size_t* pass_count_ret, size_t* pass_names_total_size_ret,
-                                               cldnn_status* status);
-/// @}
-
-/// @addtogroup c_memory
-/// @{
-
-/// @brief Allocate memory on @p engine using specified @p layout.
-CLDNN_API cldnn_memory cldnn_allocate_memory(cldnn_engine engine, cldnn_layout layout, uint16_t stream_id, cldnn_status* status);
-/// @brief Create memory object attached to the buffer allocated by user.
-/// @note User is responsible for buffer deallocation. Buffer lifetime should be bigger than lifetime of the memory object.
-CLDNN_API cldnn_memory cldnn_attach_memory(cldnn_layout layout, void* pointer, size_t size, uint16_t stream_id, cldnn_status* status);
-/// @brief Checks if two memory objects refer to the same underlaying buffer.
-CLDNN_API int32_t cldnn_is_the_same_buffer(cldnn_memory mem1, cldnn_memory mem2, cldnn_status* status);
-/// @brief Increment reference counter for the memory object.
-CLDNN_API void cldnn_retain_memory(cldnn_memory memory, cldnn_status* status);
-/// @brief Decrement reference counter for the memory object. Deletes object when counter becomes zero.
-CLDNN_API void cldnn_release_memory(cldnn_memory memory, cldnn_status* status);
-/// @brief Locks memory buffer. Provides direct access to memory data.
-/// @returns Direct pointer to the memory data.
-CLDNN_API void* cldnn_lock_memory(cldnn_memory memory, cldnn_status* status);
-/// @brief Unlocks memory locked by cldnn_lock_memory(cldnn_memory memory, cldnn_status* status).
-CLDNN_API void cldnn_unlock_memory(cldnn_memory memory, cldnn_status* status);
-/// @brief Returns memory layout
-/// @returns @ref cldnn_layout which describes memory.
-CLDNN_API cldnn_layout cldnn_get_memory_layout(cldnn_memory memory, cldnn_status* status);
-/// @brief Returns stream id of the memory object
-CLDNN_API uint16_t cldnn_get_memory_stream_id(cldnn_memory memory, cldnn_status* status);
-/// @brief Returns stream id of the network
-CLDNN_API uint16_t cldnn_get_network_stream_id(cldnn_network network, cldnn_status* status);
-/// @brief Returns reference to the engine associated with memory object.
-/// @returns The engine associated with memory object. Or NULL if memory was attached to user-allocated buffer.
-CLDNN_API cldnn_engine cldnn_get_memory_engine(cldnn_memory memory, cldnn_status* status);
-/// @brief converts float(32 bit) to half_t(fp16 bit)
-/// @returns 16bit half_t
-CLDNN_API uint16_t cldnn_float_to_half(float, cldnn_status*);
-/// @brief converts  half_t(f16 bit) to float(32 bit)
-/// @returns 32bit float
-CLDNN_API float cldnn_half_to_float(uint16_t, cldnn_status*);
-
-/// @}
-
-/// @addtogroup c_error
-/// @{
-
-/// @brief If cldnn function returns status different than CLDNN_SUCCESS, user call this function to get more details.
-/// @returns pointer to array of chars with more detailed description of last error.
-/// @note If sequence of error occure, description of only last error will avaiable
-CLDNN_API const char* cldnn_get_last_error_message();
-/// @}
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-
-// primitives
-#ifdef __cplusplus
-#define CLDNN_DECLARE_PRIMITIVE_TYPE_ID(PType) extern "C" CLDNN_API cldnn_primitive_type_id cldnn_##PType##_type_id(cldnn_status* status)
-#else
-#define CLDNN_DECLARE_PRIMITIVE_TYPE_ID(PType) CLDNN_API cldnn_primitive_type_id cldnn_##PType##_type_id(cldnn_status* status)
-#endif
-
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/concatenation.h b/inference-engine/thirdparty/clDNN/api/C/concatenation.h
deleted file mode 100644 (file)
index 9620f83..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-// Copyright (c) 2016-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-    cldnn_concatenation_along_b = 0,
-    cldnn_concatenation_along_f = CLDNN_TENSOR_BATCH_DIM_MAX,
-    cldnn_concatenation_along_x = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX,
-    cldnn_concatenation_along_y = cldnn_concatenation_along_x + 1,
-    cldnn_concatenation_along_z = cldnn_concatenation_along_y + 1,
-    cldnn_concatenation_along_w = cldnn_concatenation_along_z + 1
-} cldnn_concatenation_axis;
-
-/// @details Concatenation is used to concatenate multiple sources into one destination along specified dimension.
-/// Note that all other dimensions (except the one along which concatenation take place) must have the same value in each source
-/// and each source should have the same format.
-/// @par Alogrithm:
-/// \code
-///     int outputIdx = 0
-///     for(i : input)
-///     {
-///         for(f : i.features)
-///         {
-///             output[outputIdx] = f
-///             outputIdx += 1
-///         }
-///     }
-/// \endcode
-/// @par Where:
-///   @li input : data structure holding all source inputs for this primitive
-///   @li output : data structure holding output data for this primitive
-///   @li i.features : number of features in currently processed input
-///   @li outputIdx : index of destination feature
-CLDNN_BEGIN_PRIMITIVE_DESC(concatenation)
-/// @brief Dimension along which concatenation should take place.
-cldnn_concatenation_axis axis;
-CLDNN_END_PRIMITIVE_DESC(concatenation)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(concatenation);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/condition.h b/inference-engine/thirdparty/clDNN/api/C/condition.h
deleted file mode 100644 (file)
index 7c47ed7..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Function, which will be used during comparison.
-typedef enum /*:int32_t*/ {
-    EQUAL,
-    GREATER,
-    LESS
-} cldnn_cond_functions;
-
-/// @brief Adds primitive, which works like "if".
-///
-/// @details
-/// @n   Applies comparision between 2 inputs.
-/// @n   Compare data - sizes of that input specifes the range of the comparison.
-/// @n   Offset - offset in memory, when comparing values.
-CLDNN_BEGIN_PRIMITIVE_DESC(condition)
-/// @brief An identifier of topology, which will be executed when comparison returns true.
-cldnn_topology topology_true;
-/// @brief An identifier of topology, which will be executed when comparison returns false.
-cldnn_topology topology_false;
-/// @brief An identifier of primitive which contains compare values.
-cldnn_primitive_id compare_data;
-/// @brief Used function during comparison.
-cldnn_cond_functions function;
-/// @brief Offset for compare data.
-cldnn_tensor offset;
-
-CLDNN_END_PRIMITIVE_DESC(condition)
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(condition);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/contract.h b/inference-engine/thirdparty/clDNN/api/C/contract.h
deleted file mode 100644 (file)
index da5ad67..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Select reduction operation for contract layer ( @CLDNN_PRIMITIVE_DESC{contract} ?).
-typedef enum /*:int32_t*/ {
-    /// @brief Sum reduction.
-    cldnn_contract_sum,
-    /// @brief Product reduction.
-    cldnn_contract_product,
-    /// @brief All reduction.
-    cldnn_contract_all,
-    /// @brief Any reduction.
-    cldnn_contract_any,
-    /// @brief Max reduction.
-    cldnn_contract_max
-} cldnn_contract_mode;
-
-/// @brief Reduces input with an operation defined by @p mode along defined
-///        by @p reduction_axes dimensions.
-///
-/// @details Reduces the input using the binary operation determined by
-///          @p mode. The @p reduction_axes determine the final shape of the
-///          output, which is calculated based on the input shape by
-///          collapsing the dimensions along which the reduction happens.
-///          For example, for the input with
-/// @n      <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
-/// @n a reduction with
-/// @n      <tt>reduction_axes = (2)</tt>
-/// @n would collapse the Y dimension, producing
-/// @n      <tt>output_shape = (1, in_b, in_f, in_x)</tt>
-/// @n where every element is a @p mode reduction of the input elements with
-/// @n the same B, F and X coordinates.
-/// @n
-/// @n@b Requirements:
-/// @n - @p reduction_axes size (dimensions count) must be within (inclusive) range
-///      1 - 4.
-/// @n - @p reduction_axes mustn't have duplicate values.
-/// @n - Values of @p reduction_axes must be within (inclusive) range 0 - 3
-/// @n Breaking any of these conditions will raise an exception.
-CLDNN_BEGIN_PRIMITIVE_DESC(contract)
-/// @brief Reduction mode. See #cldnn_contract_mode.
-int32_t mode; /*cldnn_contract_mode*/
-/// @brief Array of axes positions from input shape (0-based, from left to right)
-///        along which reduction should happen.
-cldnn_uint16_t_arr reduction_axes;
-
-CLDNN_END_PRIMITIVE_DESC(contract)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(contract);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/convolution.h b/inference-engine/thirdparty/clDNN/api/C/convolution.h
deleted file mode 100644 (file)
index 1405060..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
-// Copyright (c) 2016-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs forward spatial convolution with weight sharing.
-/// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} available by setting it in arguments.
-/// @details Parameters are defined in context of "direct" convolution, but actual algorithm is not implied.
-CLDNN_BEGIN_PRIMITIVE_DESC(convolution)
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines shift in input buffer between adjacent calculations of output values.
-cldnn_tensor stride;
-/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
-/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
-/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
-cldnn_tensor dilation;
-/// @brief Enable Relu activation.
-uint32_t with_activation;
-/// @brief Relu activation slope.
-float activation_negative_slope;
-/// @brief On how many cards split the computation to.
-uint32_t split;
-/// @brief Indicates that the primitive has user-defined output size (non-zero value).
-uint32_t with_output_size;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr weights;
-/// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr bias;
-/// @brief List of primitive ids containing weights quanitization factors per output feature map.
-cldnn_primitive_id_arr weights_quantization_factors;
-/// @brief List of primitive ids containing output calibration factors per output feature map.
-cldnn_primitive_id_arr output_calibration_factors;
-/// @brief Input quantization factor
-float input_quantization_factor;
-/// @brief Output quantization factor
-float output_quantization_factor;
-/// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
-uint32_t groups;
-/// @param deformable_groups Defines a number of deformable groups that splits trans input into several parts
-/// by channel dimension.
-uint32_t deformable_groups;
-/// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
-cldnn_tensor padding_above;
-/// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
-cldnn_tensor padding_below;
-/// @param deformable_mode.
-uint8_t deformable_mode;
-
-CLDNN_END_PRIMITIVE_DESC(convolution)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(convolution);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/convolution_grad_input.h b/inference-engine/thirdparty/clDNN/api/C/convolution_grad_input.h
deleted file mode 100644 (file)
index 9007307..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs transposed convolution.
-/// @details convolution_grad_input is similar to convolution layer with the weights flipped on the axis and stride
-/// and input padding parameters used in opposite sense as in convolution.
-CLDNN_BEGIN_PRIMITIVE_DESC(convolution_grad_input)
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution_grad_input window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines the spatial dimensions of stride of adjacent elements in input buffer.
-cldnn_tensor stride;
-/// @brief On how many cards split the computation to.
-uint32_t split;
-/// @brief Indicates that the primitive has user-defined output size (non-zero value).
-uint32_t with_output_size;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr weights;
-CLDNN_END_PRIMITIVE_DESC(convolution_grad_input)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(convolution_grad_input);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/convolution_grad_weights.h b/inference-engine/thirdparty/clDNN/api/C/convolution_grad_weights.h
deleted file mode 100644 (file)
index 3fe6f40..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include <stdbool.h>
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs backward convolution operation for weights and biases.
-/// @details convolution_grad_weights updates weights and bias mutable data for training purposes.
-/// @details Please note that this primitive was not heavily tested and currently only batch=1 is enabled for this primitive.
-CLDNN_BEGIN_PRIMITIVE_DESC(convolution_grad_weights)
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution_grad_weights window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines the spatial dimensions of stride of adjacent elements in input buffer.
-cldnn_tensor stride;
-/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
-/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
-/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
-cldnn_tensor dilation;
-/// @brief On how many cards split the computation to.
-uint32_t split;
-/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr weights;
-/// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split or should be empty (if not using bias).
-cldnn_primitive_id_arr bias;
-/// @brief Primitive id containing convolution gradient data. Used for proper order of gradient calculation. Leave empty if primitive is last in backward pass.
-cldnn_primitive_id conv_grad;
-/// @brief Array of primitive ids containing weights gradient data calculated in previous iteration.
-/// Amount of primitives and their memory sizes should be same as weights.
-cldnn_primitive_id_arr prev_weights_grad;
-/// @brief Array of primitive ids containing bias gradient data calculated in previous iteration.
-/// Amount of primitives and their memory sizes should be same as biases.
-cldnn_primitive_id_arr prev_bias_grad;
-/// @brief Should primitive give weights gradient (delta) as an output
-bool output_grad_w;
-
-CLDNN_END_PRIMITIVE_DESC(convolution_grad_weights)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(convolution_grad_weights);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/crop.h b/inference-engine/thirdparty/clDNN/api/C/crop.h
deleted file mode 100644 (file)
index f1e1df3..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs crop operation on input.
-/// @details Crops the input to the shape of reference_input across all dimensions taking into account specified input offsets.
-/// @n       Borders variant calculated output shape from input shape minus the specified borders.
-/// @n
-/// @n\b Examples
-/// @n Crop without offset example:
-/// \image html crop_no_offset.jpg
-/// @n Crop with offset example:
-/// \image html crop_w_offset.jpg
-/// @n
-/// @n\b Requirements (reference size variant)
-/// @n - Input size cannot be greater than reference size in any dimension
-/// @n - All sizes have to have positive numbers
-/// @n - Reference size plus offset cannot exceed input size
-/// @n
-/// @n\b Requirements (borders variant)
-/// @n - Borders support batch, feature and spatial dimensions (rest of dimensions ignored).
-/// @n - Input size cannot be greater than reference size in any dimension
-/// @n - All sizes specified in borders have to have non-negative values (positive or @c 0).
-/// @n - Sum of sizes of opposite borders must be lower than input size (on all non-ignored dimensions).
-/// @n
-/// @n Breaking any of this conditions will cause exception throw.
-CLDNN_BEGIN_PRIMITIVE_DESC(crop)
-/// @brief Reference input tensor with the required dimensions (if positive) or
-///        negated value of right/bottom/upper border size (if non-positive).
-cldnn_tensor reference_input;
-/// @brief Input offsets (reference_input is positive) or left/top/lower border
-///        size (reference_input is negative).
-cldnn_tensor offsets;
-CLDNN_END_PRIMITIVE_DESC(crop)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(crop);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/custom_gpu_primitive.h b/inference-engine/thirdparty/clDNN/api/C/custom_gpu_primitive.h
deleted file mode 100644 (file)
index 3c3129a..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief This primitive executes a custom kernel provided by the application
-/// @details The application is required to provide all relevant details for executing the custom kernel
-/// such as: sources, entry point, work sizes and parameter bindings.
-CLDNN_BEGIN_PRIMITIVE_DESC(custom_gpu_primitive)
-/// @brief Source code for the kernel
-cldnn_primitive_id_arr kernels_code;
-/// @brief The name of the entry point function in the kernel
-cldnn_kernel_entry_point kernel_entry_point;
-/// @brief Argument bindings for the entry point function
-cldnn_kernel_arguments kernel_arguments;
-/// @brief The number of arguments used by the kernel
-int kernel_arguments_num;
-/// @brief The kernel's build options
-cldnn_kernel_build_options build_options;
-/// @brief The output layout declared by the primitive
-cldnn_layout output_layout;
-/// @brief The global working sizes
-cldnn_work_group_sizes gws;
-/// @brief The number of global work sizes
-int gws_num;
-/// @brief The local working sizes
-cldnn_work_group_sizes lws;
-/// @brief The number of local work sizes
-int lws_num;
-
-CLDNN_END_PRIMITIVE_DESC(custom_gpu_primitive)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(custom_gpu_primitive);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/data.h b/inference-engine/thirdparty/clDNN/api/C/data.h
deleted file mode 100644 (file)
index 6c89141..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Provides input data to topology.
-/// @details This primitive allows to pass data which is known at topology creation (constants).
-/// For example, weights and biases for scoring networks.
-/// @note Passing data at topology may improve network performance if data optimization is enabled.
-CLDNN_BEGIN_PRIMITIVE_DESC(data)
-/// @brief Memory object which contains data.
-/// @note If memory is attached by ::cldnn_attach_memory(),
-/// attached buffer should be valid on ::cldnn_build_network() call.
-cldnn_memory mem;
-CLDNN_END_PRIMITIVE_DESC(data)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(data);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/deconvolution.h b/inference-engine/thirdparty/clDNN/api/C/deconvolution.h
deleted file mode 100644 (file)
index 0dd6d92..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs transposed convolution.
-/// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} available by setting it in arguments.
-/// @details Deconvolution is similar to convolution layer with the weights flipped on the axis
-/// and stride and input padding parameters used in opposite sense as in convolution.
-CLDNN_BEGIN_PRIMITIVE_DESC(deconvolution)
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the deconvolution window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines the spatial dimensions of stride of adjacent elements in input buffer.
-cldnn_tensor stride;
-/// @brief Enables Relu activation.
-uint32_t with_activation;
-/// @brief Relu activation slope.
-float activation_negative_slope;
-/// @brief On how many cards split the computation to.
-uint32_t split;
-/// @brief Indicates that the primitive has user-defined output size (non-zero value).
-uint32_t with_output_size;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr weights;
-/// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split or should be empty (if not using bias).
-cldnn_primitive_id_arr bias;
-/// @brief Indicates that deconvolution is used for convolution backward computation (convolution_grad_input)
-uint32_t gradient;
-/// @brief Number of feature groups (grouped deconvolution). If more than 1 then weights/bias count needs to be 1.
-uint32_t groups;
-CLDNN_END_PRIMITIVE_DESC(deconvolution)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(deconvolution);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/deformable_conv.h b/inference-engine/thirdparty/clDNN/api/C/deformable_conv.h
deleted file mode 100644 (file)
index 58b2d8a..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs deformable convolution on a preprocessed data. Should be created after deformable_interp primitive.
-CLDNN_BEGIN_PRIMITIVE_DESC(deformable_conv)
-/// @brief On how many cards split the computation to.
-uint32_t split;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr weights;
-/// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr bias;
-/// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
-uint32_t groups;
-
-CLDNN_END_PRIMITIVE_DESC(deformable_conv)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(deformable_conv);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/deformable_interp.h b/inference-engine/thirdparty/clDNN/api/C/deformable_interp.h
deleted file mode 100644 (file)
index c71b716..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs interpolation pass for deformable convolution. Output tensor has IC*KH*KW channels.
-CLDNN_BEGIN_PRIMITIVE_DESC(deformable_interp)
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines shift in input buffer between adjacent calculations of output values.
-cldnn_tensor stride;
-/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
-/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
-/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
-cldnn_tensor dilation;
-/// @brief On how many cards split the computation to.
-uint32_t split;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-/// @brief Size of the weights tensor.
-cldnn_tensor kernel_size;
-uint32_t groups;
-/// @param deformable_groups Defines a number of deformable groups that splits trans input into several parts
-/// by channel dimension.
-uint32_t deformable_groups;
-/// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
-cldnn_tensor padding_above;
-/// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
-cldnn_tensor padding_below;
-
-CLDNN_END_PRIMITIVE_DESC(deformable_interp)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(deformable_interp);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
diff --git a/inference-engine/thirdparty/clDNN/api/C/depth_to_space.h b/inference-engine/thirdparty/clDNN/api/C/depth_to_space.h
deleted file mode 100644 (file)
index c5255f7..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-CLDNN_BEGIN_PRIMITIVE_DESC(depth_to_space)
-/// @brief Size of spatial block in the output tensor. Should be >= 2.
-size_t block_size;
-CLDNN_END_PRIMITIVE_DESC(depth_to_space)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(depth_to_space);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/detection_output.h b/inference-engine/thirdparty/clDNN/api/C/detection_output.h
deleted file mode 100644 (file)
index ea01203..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Select method for coding the prior-boxes in Detection Output layer ( @CLDNN_PRIMITIVE_DESC{detection_output} ).
-typedef enum /*:int32_t*/ {
-    cldnn_code_type_corner,
-    cldnn_code_type_center_size,
-    cldnn_code_type_corner_size,
-} cldnn_prior_box_code_type;
-
-/// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
-/// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
-/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
-CLDNN_BEGIN_PRIMITIVE_DESC(detection_output)
-/// @brief Number of classes to be predicted.
-uint32_t num_classes;
-/// @brief Number of total bounding boxes to be kept per image after NMS step.
-uint32_t keep_top_k;
-/// @brief If not 0, bounding box are shared among different classes.
-uint32_t share_location;
-/// @brief Background label id (-1 if there is no background class).
-int background_label_id;
-/// @brief Threshold for NMS step.
-float nms_threshold;
-/// @brief Maximum number of results to be kept in NMS.
-int top_k;
-/// @brief Used for adaptive NMS.
-float eta;
-/// @brief Type of coding method for bounding box. See #cldnn_prior_box_code_type.
-int32_t code_type;
-/// @brief If not 0, variance is encoded in target; otherwise we need to adjust the predicted offset accordingly.
-uint32_t variance_encoded_in_target;
-/// @brief Only keep detections with confidences larger than this threshold.
-float confidence_threshold;
-/// @brief Number of elements in a single prior description (4 if priors calculated using PriorBox layer, 5 - if Proposal)
-int32_t prior_info_size;
-/// @brief Offset of the box coordinates w.r.t. the beginning of a prior info record
-int32_t prior_coordinates_offset;
-/// @brief If true, priors are normalized to [0; 1] range.
-uint32_t prior_is_normalized;
-/// @brief Width of input image.
-int32_t input_width;
-/// @brief Height of input image.
-int32_t input_height;
-/// @brief Decrease label id to skip background label equal to 0. Can't be used simultaneously with background_label_id.
-int32_t decrease_label_id;
-/// @brief Clip decoded boxes right after decoding
-int32_t clip_before_nms;
-/// @brief Clip decoded boxes after nms step
-int32_t clip_after_nms;
-CLDNN_END_PRIMITIVE_DESC(detection_output)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(detection_output);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/detection_output_sort.h b/inference-engine/thirdparty/clDNN/api/C/detection_output_sort.h
deleted file mode 100644 (file)
index 94d6b3c..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
-/// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
-/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
-CLDNN_BEGIN_PRIMITIVE_DESC(detection_output_sort)
-/// @brief Number of classes to be predicted.
-uint32_t num_classes;
-/// @brief Number of classes to be predicted.
-uint32_t num_images;
-/// @brief Number of total bounding boxes to be kept per image after NMS step.
-uint32_t keep_top_k;
-/// @brief If true, bounding box are shared among different classes.
-uint32_t share_location;
-/// @brief Maximum number of results to be kept in NMS.
-int top_k;
-/// @brief Background label id (-1 if there is no background class).
-int background_label_id;
-CLDNN_END_PRIMITIVE_DESC(detection_output_sort)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(detection_output_sort);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/eltwise.h b/inference-engine/thirdparty/clDNN/api/C/eltwise.h
deleted file mode 100644 (file)
index 64a26ea..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2016-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Select mode for eltwise layer ( @CLDNN_PRIMITIVE_DESC{eltwise} â€‹).
-typedef enum /*:int32_t*/ {
-    /// @brief Eltwise sum.
-    cldnn_eltwise_sum,
-    /// @brief Eltwise subtract.
-    cldnn_eltwise_sub,
-    /// @brief Eltwise max.
-    cldnn_eltwise_max,
-    /// @brief Eltwise product (Hadamard).
-    cldnn_eltwise_prod,
-    /// @brief Eltwise div.
-    cldnn_eltwise_div,
-    /// @brief Eltwise min.
-    cldnn_eltwise_min,
-    /// @brief Eltwise pow.
-    cldnn_eltwise_pow,
-    /// @brief Eltwise mod.
-    cldnn_eltwise_mod,
-    /// @brief Eltwise equal.
-    cldnn_eltwise_eq,
-    /// @brief Eltwise not equal.
-    cldnn_eltwise_ne,
-    /// @brief Eltwise less.
-    cldnn_eltwise_lt,
-    /// @brief Eltwise less of equal.
-    cldnn_eltwise_le,
-    /// @brief Eltwise greater.
-    cldnn_eltwise_gt,
-    /// @brief Eltwise greater or equal.
-    cldnn_eltwise_ge,
-    /// @brief Eltwise and.
-    cldnn_eltwise_and,
-    /// @brief Eltwise or.
-    cldnn_eltwise_or,
-    /// @brief Eltwise xor.
-    cldnn_eltwise_xor,
-    /// @brief Eltwise squared diff.
-    cldnn_eltwise_squared_diff,
-    /// @brief Eltwise floormod.
-    cldnn_eltwise_floor_mod
-} cldnn_eltwise_mode;
-
-/// @brief Performs elementwise operations (sum, subtract, max or product) on two input primitives
-/// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} available by setting it in arguments.
-/// @notes
-/// - both inputs have to have equal sizes in all dimensions or the input tensors are broadcastable
-///   to the same shape in which the size of each dimention is a max. of input sizes on this dimension)
-/// - format of both inputs has to be the same
-/// - when using integer types, only following eltwise modes are supported: sum, sub, prod, div
-CLDNN_BEGIN_PRIMITIVE_DESC(eltwise)
-/// @brief Primitive id containing output quanitization factors per output feature map.
-cldnn_primitive_id output_calibration_factors;
-/// @brief Output quantization factor
-float output_quantization_factor;
-/// @brief List of primitive ids containing input quantization factors per feature map, one primitive id for each input.
-cldnn_primitive_id_arr input_calibration_factors;
-/// @brief List of quantization factors per input.
-cldnn_float_arr input_quantization_factors;
-/// @brief Eltwise mode. See #cldnn_eltwise_mode.
-int32_t mode; /*cldnn_eltwise_mode*/
-/// @brief Blob-wise coefficient for SUM operation
-cldnn_float_arr coefficients;
-/// @brief Enables Relu activation.
-uint32_t with_activation;
-/// @brief Relu activation slope.
-float activation_negative_slope;
-/// @brief Defines shift in input buffers between adjacent calculations of output values.
-cldnn_tensor_arr stride;
-
-CLDNN_END_PRIMITIVE_DESC(eltwise)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(eltwise);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/embed.h b/inference-engine/thirdparty/clDNN/api/C/embed.h
deleted file mode 100644 (file)
index b127822..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-CLDNN_BEGIN_PRIMITIVE_DESC(embed)
-
-/// @brief Primitive id containing weights data.
-cldnn_primitive_id weights;
-/// @brief Primitive id containing bias data.
-cldnn_primitive_id bias;
-
-CLDNN_END_PRIMITIVE_DESC(embed)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(embed);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/fully_connected.h b/inference-engine/thirdparty/clDNN/api/C/fully_connected.h
deleted file mode 100644 (file)
index 2b46d93..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs forward fully connected layer (inner product).
-/// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} available by setting it in arguments.
-CLDNN_BEGIN_PRIMITIVE_DESC(fully_connected)
-/// @brief Enable Relu activation.
-uint32_t with_activation;
-/// @brief Relu activation slope.
-float activation_negative_slope;
-/// @brief Primitive id containing weights data.
-cldnn_primitive_id weights;
-/// @brief Primitive id containing bias data.
-cldnn_primitive_id bias;
-/// @brief Primitive id containing weights quanitization factors per output feature map.
-cldnn_primitive_id weights_quantization_factors;
-/// @brief Primitive id containing output quanitization factors per output feature map.
-cldnn_primitive_id output_calibration_factors;
-/// @brief Input quantization factor
-float input_quantization_factor;
-/// @brief Output quantization factor
-float output_quantization_factor;
-
-CLDNN_END_PRIMITIVE_DESC(fully_connected)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(fully_connected);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/fully_connected_grad_input.h b/inference-engine/thirdparty/clDNN/api/C/fully_connected_grad_input.h
deleted file mode 100644 (file)
index a12ba53..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs backward fully connected layer (inner product) for input.
-CLDNN_BEGIN_PRIMITIVE_DESC(fully_connected_grad_input)
-/// @brief Primitive id containing weights data.
-cldnn_primitive_id weights;
-CLDNN_END_PRIMITIVE_DESC(fully_connected_grad_input)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(fully_connected_grad_input);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/fully_connected_grad_weights.h b/inference-engine/thirdparty/clDNN/api/C/fully_connected_grad_weights.h
deleted file mode 100644 (file)
index 84ae61d..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs backward fully connected layer (inner product) for weights and biases.
-CLDNN_BEGIN_PRIMITIVE_DESC(fully_connected_grad_weights)
-/// @brief Primitive id containing weights data.
-cldnn_primitive_id weights;
-/// @brief Primitive id containing bias data.
-cldnn_primitive_id bias;
-/// @brief Primitive id containing fully connected gradient data. Used for proper order of gradient calculation.
-/// Leave empty if primitive is last in backward pass.
-cldnn_primitive_id fc_grad;
-/// @brief Primitive id containing weight gradient calculated in previous iteration. Memory size should be same as weights.
-cldnn_primitive_id prev_weights_grad;
-/// @brief Primitive id containing bias gradient calculated in previous iteration. Memory size should be same as bias.
-cldnn_primitive_id prev_bias_grad;
-CLDNN_END_PRIMITIVE_DESC(fully_connected_grad_weights)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(fully_connected_grad_weights);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/gather.h b/inference-engine/thirdparty/clDNN/api/C/gather.h
deleted file mode 100644 (file)
index 7bf2396..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-typedef enum {
-    cldnn_gather_along_b = 0,
-    cldnn_gather_along_f = CLDNN_TENSOR_BATCH_DIM_MAX,
-    cldnn_gather_along_x = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX,
-    cldnn_gather_along_y = cldnn_gather_along_x + 1
-} cldnn_gather_axis;
-
-CLDNN_BEGIN_PRIMITIVE_DESC(gather)
-/// @brief Gathering axis;
-cldnn_gather_axis axis;
-/// @brief Output shape
-cldnn_tensor output_shape;
-CLDNN_END_PRIMITIVE_DESC(gather)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(gather);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/gemm.h b/inference-engine/thirdparty/clDNN/api/C/gemm.h
deleted file mode 100644 (file)
index d28a757..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include <stdbool.h>
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs forward attention layer.
-
-CLDNN_BEGIN_PRIMITIVE_DESC(gemm)
-/// @brief Variable containing ALPHA parameter
-float alpha;
-/// @brief Variable containing BETA parameter
-float beta;
-/// @brief Flag for transposing first input matrix
-bool transpose_input0;
-/// @brief Flag for transposing second input matrix
-bool transpose_input1;
-CLDNN_END_PRIMITIVE_DESC(gemm)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(gemm);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/index_select.h b/inference-engine/thirdparty/clDNN/api/C/index_select.h
deleted file mode 100644 (file)
index 3d687ed..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-#include <stdbool.h>
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Select index, which will be copied to the output..
-///
-/// @details Applies index selecting along specified dimension. The indices, which will be copied are specifed by
-///          by @c indices.
-/// @n
-/// @n Example:
-/// @n      <tt>input_sizes  = (1, 2, 4, 2)</tt>
-/// @n      <tt>input_values = (a, b, c, d)</tt>
-/// @n      <tt>               (e, f, g, h)</tt>
-/// @n      <tt>indices_sizes  = (1, 1, 6, 1)</tt>
-/// @n      <tt>indices_values = {0, 0, 1, 1, 3, 3}</tt>
-/// @n  For axis: along_x:
-/// @n      <tt>output_sizes  = (1, 2, 6, 2)</tt>
-/// @n      <tt>output_values = (a, a, b, b, d, d)</tt>
-/// @n      <tt>                (e, e, f, f, h, h)</tt>
-/// @n
-/// @n The resulting output will have sizes equal to input_size with changed concrete tensor size to inidices x size.
-/// @n
-/// @n@b Requirements:
-/// @n - @c input must be a valid primitive_id, which output's format is bfyx/yxfb;
-/// @n - @c indices must be a valid primitive_id, which output's layout is: (bfyx/yxfb, i32, {1, 1, indicies_size, 1})
-/// @n - @c axis - valid index_select_axis_name instance.
-/// @n Breaking any of this conditions will cause exeption throw.
-CLDNN_BEGIN_PRIMITIVE_DESC(index_select)
-
-/// @brief A list of axes of index selecting.
-index_select_axis_name_arr axis;
-/// @brief Number of axes of index selecting.
-int axis_num;
-/// @brief Do index_select in reverse order on axis.
-bool reverse;
-
-CLDNN_END_PRIMITIVE_DESC(index_select)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(index_select);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/input_layout.h b/inference-engine/thirdparty/clDNN/api/C/input_layout.h
deleted file mode 100644 (file)
index e750dc3..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Provides input layout for a data to be passed later to network.
-/// @details This primitive allows to define the layout for input data
-/// which will be passed to network before execution.
-/// For example, network input images.
-/// @note User should call network::set_input_data() for every @p input_layout primitive before network execution.
-/// @sa network::set_input_data(), cldnn::data
-CLDNN_BEGIN_PRIMITIVE_DESC(input_layout)
-/// @brief Defines layout for the data will be passed to network.
-cldnn_layout layout;
-CLDNN_END_PRIMITIVE_DESC(input_layout)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(input_layout);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/lookup_table.h b/inference-engine/thirdparty/clDNN/api/C/lookup_table.h
deleted file mode 100644 (file)
index 45fb500..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Enum type to specify axis to return values from.
-typedef enum {
-    cldnn_lookup_table_batch,
-    cldnn_lookup_table_feature,
-    cldnn_lookup_table_x,
-    cldnn_lookup_table_y,
-    cldnn_lookup_table_xyf
-} cldnn_lookup_table_axis;
-
-/// @brief Returns values from data on which given indices are pointing at.
-CLDNN_BEGIN_PRIMITIVE_DESC(lookup_table)
-/// @brief Axis to return values from. If not set, returns data which index is pointing at in the flattened x, y, f dimensions for each batch.
-cldnn_lookup_table_axis axis;
-/// @brief Indicates that the primitive has user defined axis to return values from.
-uint32_t with_axis;
-CLDNN_END_PRIMITIVE_DESC(lookup_table)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lookup_table);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/lrn.h b/inference-engine/thirdparty/clDNN/api/C/lrn.h
deleted file mode 100644 (file)
index d57a7f0..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum /*:int32_t*/ {
-    cldnn_lrn_norm_region_across_channel,
-    cldnn_lrn_norm_region_within_channel
-} cldnn_lrn_norm_region;
-
-/// @brief Local response normalization
-/// @details LRN layer as described in chapter 3.3 of "ImageNet Classification with Deep Convolutional
-/// Neural Networks" by Khrizevsky, Sutskever, Hinton. @n See: http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
-/// @par Alogrithm:
-///   b(i,x,y) = a(i,x,y) / (k+alpha*sum(min(N-1, i+n/2); j=max(0,i-n/2); a(j,x,y)^2))
-/// @par Where:
-///   @li b(i,x,y) : value at x, y from i-th feature map after normalization
-///   @li a(i,x,y) : value at x, y from i-th feature map before normalization
-///   @li N : number of feature maps
-///   @li n : size of normalization
-///   @li k, alpha, beta : hyper parameters (equal to 2, 10e-4, 0.75 in paper).
-CLDNN_BEGIN_PRIMITIVE_DESC(lrn)
-/// @brief Size of normalization.
-uint32_t size;
-/// @brief Hyper parameter "k".
-float k;
-/// @brief Hyper parameter "alpha".
-float alpha;
-/// @brief Hyper parameter "beta".
-float beta;
-/// @brief Normalize across or within channel
-cldnn_lrn_norm_region norm_region;
-CLDNN_END_PRIMITIVE_DESC(lrn)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lrn);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/lstm.h b/inference-engine/thirdparty/clDNN/api/C/lstm.h
deleted file mode 100644 (file)
index 4331162..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include <stdbool.h>
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Weights orders
-/// @details Specifies the order in which the weights are concatenated.
-/// e.g. [i, o, f, z] : [input, output, forget, block]
-/// ONNX order: iofz
-/// Caffe order: ifoz
-/// pyTorch order: izof
-/// IE order: fizo
-typedef enum /*:int32_t*/ {
-    cldnn_lstm_offset_order_iofz = 0,
-    cldnn_lstm_offset_order_ifoz,
-    cldnn_lstm_offset_order_izof,
-    cldnn_lstm_offset_order_fizo
-} cldnn_lstm_offset_order;
-
-/// @brief LSTM Output selection
-/// @details The current implementation allows the use to select the output
-/// of an LSTM node by specifing any of the following options
-typedef enum /*:int32_t*/ {
-    /// output the entire hidden sequence
-    cldnn_lstm_output_sequence = 0,
-    /// output just the last hidden value
-    cldnn_lstm_output_hidden,
-    /// output the last hidden and last cell values
-    cldnn_lstm_output_hidden_cell,
-    /// output the hidden sequence concatenated with the last cell
-    cldnn_lstm_output_sequence_cell
-} cldnn_lstm_output;
-
-/// @brief Performs forward Long Short-Term Memory (LSTM) layer.
-/// @details The current implementation of LSTM is described the following equations.
-///   it = f(Xt*(Wi^T) + Ht-1*Ri + Wbi)
-///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Wbf)
-///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc)
-///   Ct = ft (.) Ct-1 + it (.) ct
-///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
-///   Ht = ot (.) h(Ct)
-/// Where f = Sigmoid, g = Tanh, and h = Tanh.
-CLDNN_BEGIN_PRIMITIVE_DESC(lstm)
-/// @brief Array of primitive ids containing weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id weights;
-/// @brief Array of primitive ids containing recurrent weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id recurrent;
-/// @brief Array of primitive ids containing bias vectors for input, output, forget, and cell gates.
-cldnn_primitive_id bias;
-/// @brief Array of primitive ids containing the initial value of the hidden data (Ht-1).
-cldnn_primitive_id initial_hidden;
-/// @brief Array of primitive ids containing the initial value of the cell state data (Ct-1).
-cldnn_primitive_id initial_cell;
-/// @brief Array of primitive ids containing peephole weight vectors for input, output, and forget gates.
-cldnn_primitive_id peepholes;
-/// @brief Cell clip threshold T. It is applied to the input of activations [-T, T]. No clip is applied if it is not specified.
-float clip;
-/// @brief Couple the input and forget gates if input_forget is 1. Default is 0.
-bool input_forget;
-/// @brief A list of 3 activation functions for the input, output, forget, cell, and hidden.
-cldnn_activation_func activations[3];
-/// @brief Optional scaling values used by some activation functions. The values are consumed in the order of activation functions.
-cldnn_activation_additional_params activation_params[3];
-/// @brief Output selection. Default the entire hidden sequence is returned
-cldnn_lstm_output output_selection;
-/// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
-cldnn_lstm_offset_order offset_order;
-// NOT SUPPORTED YET
-// uint32_t output_sequence;
-CLDNN_END_PRIMITIVE_DESC(lstm)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lstm);
-
-/// @brief LSTM Layer GEMM helper primitive.
-/// @details The current helper primitive performs fused GEMM operations.
-CLDNN_BEGIN_PRIMITIVE_DESC(lstm_gemm)
-/// @brief Array of primitive ids containing weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id weights;
-/// @brief Array of primitive ids containing recurrent weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id recurrent;
-/// @brief Array of primitive ids containing bias vectors for input, output, forget, and cell gates.
-cldnn_primitive_id bias;
-/// @brief Array of primitive ids containing the initial value of the hidden data (Ht-1).
-cldnn_primitive_id hidden;
-/// @brief direction default = 0, bidirectional = 1.
-uint32_t direction;
-CLDNN_END_PRIMITIVE_DESC(lstm_gemm)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lstm_gemm);
-
-/// @brief LSTM Layer element-wise helper primitive.
-/// @details The current helper primitive performs fused element-wise operations.
-CLDNN_BEGIN_PRIMITIVE_DESC(lstm_elt)
-/// @brief Array of primitive ids containing the initial value of the cell state data (Ct-1).
-cldnn_primitive_id cell;
-/// @brief Cell clip threshold T. It is applied to the input of activations [-T, T]. No clip is applied if it is not specified.
-float clip;
-/// @brief Couple the input and forget gates if input_forget is 1. Default is 0.
-bool input_forget;
-/// @brief A list of 3 activation functions for the input, output, forget, cell, and hidden.
-cldnn_activation_func activations[3];
-/// @brief Optional scaling values used by some activation functions. The values are consumed in the order of activation functions.
-cldnn_activation_additional_params activation_params[3];
-/// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
-cldnn_lstm_offset_order offset_order;
-/// @brief direction default = 0, bidirectional = 1.
-uint32_t direction;
-// NOT SUPPORTED YET
-// uint32_t output_sequence;
-CLDNN_END_PRIMITIVE_DESC(lstm_elt)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lstm_elt);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/lstm_dynamic.h b/inference-engine/thirdparty/clDNN/api/C/lstm_dynamic.h
deleted file mode 100644 (file)
index 1de13fa..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include <stdbool.h>
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-/// @brief Performs forward Long Short-Term Memory (LSTM_DYNAMIC) layer.
-/// @details The current implementation of LSTM_DYNAMIC is described the following equations.
-///   it = f(Xt*(Wi^T) + Ht-1*Ri + Wbi)
-///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Wbf)
-///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc)
-///   Ct = ft (.) Ct-1 + it (.) ct
-///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
-///   Ht = ot (.) h(Ct)
-/// Where f = Sigmoid, g = Tanh, and h = Tanh.
-CLDNN_BEGIN_PRIMITIVE_DESC(lstm_dynamic)
-/// @brief Array of primitive ids containing weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id weights;
-/// @brief Array of primitive ids containing recurrent weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id recurrent;
-/// @brief Primitive Id of mutable data primitive pointing to buffer, which will be filled with last hidden state.
-cldnn_primitive_id last_hidden_state;
-/// @brief Primitive Id of mutable data primitive pointing to buffer, which will be filled with last cell state.
-cldnn_primitive_id last_cell_state;
-/// @brief Array of primitive ids containing bias vectors for input, output, forget, and cell gates.
-cldnn_primitive_id bias;
-/// @brief Array of primitive ids containing the initial value of the hidden data (Ht-1).
-cldnn_primitive_id initial_hidden;
-/// @brief Array of primitive ids containing the initial value of the cell state data (Ct-1).
-cldnn_primitive_id initial_cell;
-/// @brief Primitive id containing the dynamic sequence lengths.
-cldnn_primitive_id dyn_length;
-/// @brief Cell clip threshold T. It is applied to the input of activations [-T, T]. No clip is applied if it is not specified.
-float clip;
-/// @brief Couple the input and forget gates if input_forget is 1. Default is 0.
-bool input_forget;
-CLDNN_END_PRIMITIVE_DESC(lstm_dynamic)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lstm_dynamic);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/max_unpooling.h b/inference-engine/thirdparty/clDNN/api/C/max_unpooling.h
deleted file mode 100644 (file)
index a2ca824..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs "max_unpooling" operation.
-/// @details Reverse operation of max pooling, based on the argmax data where indices of each max pooling region are stored.
-CLDNN_BEGIN_PRIMITIVE_DESC(max_unpooling)
-/// @brief Primitive id which contains indices of each max pooling region. Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-cldnn_primitive_id argmax;
-/// @brief Defines a shift, relative to (0,0) position of the input buffer,
-/// where (0,0) point of the pooling window should start calculations. Used only for output size computation.
-cldnn_tensor input_offset;
-/// @brief Defines shift in input buffer between adjacent calculations of output values. Used only for output size computation.
-cldnn_tensor stride;
-/// @brief Pooling kernel size. Used only for output size computation.
-cldnn_tensor size;
-/// @brief Indicates that the primitive has user-defined output size (non-zero value).
-uint32_t with_output_size;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-CLDNN_END_PRIMITIVE_DESC(max_unpooling)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(max_unpooling);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/mutable_data.h b/inference-engine/thirdparty/clDNN/api/C/mutable_data.h
deleted file mode 100644 (file)
index ac6f133..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Enum type to specify function for weights filling.
-typedef enum {
-    zero,
-    one,
-    xavier
-} cldnn_filler_type;
-
-/// @brief Provides mutable data.
-/// @details This primitive allows to pass data which can be written to during training.
-/// For example, weights and biases for scoring networks.
-/// This primitive can be also set as other primitive's output. In this case the underlying buffer will be the same in mutable_data and preceding primitive.
-CLDNN_BEGIN_PRIMITIVE_DESC(mutable_data)
-/// @brief Memory object which contains data.
-/// @note If memory is attached by ::cldnn_attach_memory(),
-/// attached buffer should be valid on ::cldnn_build_network() call.
-cldnn_memory mem;
-/// @brief Specifies function which will be used to fill data.
-cldnn_filler_type fill_type;
-CLDNN_END_PRIMITIVE_DESC(mutable_data)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(mutable_data);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/mvn.h b/inference-engine/thirdparty/clDNN/api/C/mvn.h
deleted file mode 100644 (file)
index 3c7875c..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Mean Variance Normalization primitive.
-/// @details Normalizes the input to have 0-mean and/or unit (1) variance.
-
-CLDNN_BEGIN_PRIMITIVE_DESC(mvn)
-/// @brief Determines if the normalization is done across or within channels.
-uint32_t across_channels;
-/// @brief Determines if normalize variance is applied.
-uint32_t normalize_variance;
-/// @brief Epsilon for not dividing by zero while normalizing.
-float epsilon;
-CLDNN_END_PRIMITIVE_DESC(mvn)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(mvn);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/normalize.h b/inference-engine/thirdparty/clDNN/api/C/normalize.h
deleted file mode 100644 (file)
index 2e4a2a3..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Normalizes the input using an L2 norm and multiplies the output with scale value.
-/// The scale can be equal for all channels or one scale per channel.
-/// @details The L2 norm is computed as:<br>
-/// Across spatial mode (across_spatial=true)-<br>
-/// norm(i,x,y) = sqrt( &Sigma;( in(f,w,h)^2 ) + epsilon ) where f in range (0,num_of_features), w in range (0,input_width), h in range (0,input_height).<br>
-/// The summation is performed over all the pixels in the batch.<br>
-/// Within spatial mode (across_spatial=false)-<br>
-/// norm(i,x,y) = sqrt( &Sigma;( in(f,x,y)^2 ) + epsilon ) where f in range (0,num_of_features).<br>
-/// The summation is performed over this (x,y) position on all the features.<br>
-/// @par Algorithm:
-///   out(i,x,y) = ( in(i,x,y) / norm(i,x,y) ) * scale(i)
-/// @par Where:
-///   @li out(i,x,y) : value at x, y from i-th feature map after normalization.
-///   @li in(i,x,y) : value at x, y from i-th feature map before normalization.
-///   @li norm(i,x,y) : L2 norm as described above.
-///   @li scale(i) : the scale value of the i-th feature map.
-CLDNN_BEGIN_PRIMITIVE_DESC(normalize)
-/// @brief Scale input primitive id with values needed for scaling after the normalization.
-/// Scale x dimension should be 1 (if all channels have the same scale) or equal to input feature size (one scale per channel).
-/// All other dimensions should be 1.
-cldnn_primitive_id scale_input;
-/// @brief Determines if the normalization is done across or within spatial (see documentation above).
-uint32_t across_spatial;
-/// @brief Epsilon for not dividing by zero while normalizing.
-float epsilon;
-CLDNN_END_PRIMITIVE_DESC(normalize)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(normalize);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/one_hot.h b/inference-engine/thirdparty/clDNN/api/C/one_hot.h
deleted file mode 100644 (file)
index 479d330..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Creates a one-hot encoding of the input.
-/// @details Creates a one-hot encoding of the input, putting the new one-hot axis in the position
-/// @n       specified by the @p one_hot_axis input, using the @p shape tensor as size reference.
-/// @n       The size of @p shape must be appropriate for adding a one-hot axis to input. For example,
-/// @n      <tt>input_sizes = (1, in_f, in_y, in_x)</tt>
-/// @n expanded with
-/// @n      <tt>one_hot_axis = 2</tt>
-/// @n would insert the one-hot axis in the Y dimension, requiring
-/// @n      <tt>shape = (in_f, in_y, one-hot_limit, in_x)</tt>
-/// @n The output values would then be determined by input as
-/// @n      <tt>output[f, y, i, x] = (input[0, f, y, x] == i) ? 1 : 0;</tt>
-/// @n Since determining whether the input is appropriate (that the one-hot axis
-/// @n has enough space to fully encode all inputs) requires scanning the whole
-/// @n input, the primitive doesn't check for that, instead producing all-zeros
-/// @n output axes for inputs below 0 and greater than the limit set by
-/// @n @p shape.
-/// @n
-/// @n\b Requirements
-/// @n - @p one_hot_axis must be within (inclusive) range 0 - 3.
-/// @n - @p shape must fit input sizes (see example above).
-/// @n - input batch size must be equal to 1.
-/// @n
-/// @n Breaking any of this conditions will cause exception throw.
-CLDNN_BEGIN_PRIMITIVE_DESC(one_hot)
-/// @brief Output size reference.
-cldnn_tensor shape;
-/// @brief One-hot axis position in output shape (0-based, from left to right).
-uint16_t one_hot_axis;
-/// @brief The locations represented by indices in input take this value.
-float on_value;
-/// @brief The locations not represented by indices in input take this value.
-float off_value;
-CLDNN_END_PRIMITIVE_DESC(one_hot)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(one_hot);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/permute.h b/inference-engine/thirdparty/clDNN/api/C/permute.h
deleted file mode 100644 (file)
index c531b5e..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Permutes data in the memory, with respect to provided order.
-/// @details Permute order is set as vector with positions meaning corresponding to tensor.
-/// Vector values represent dimensions to be permuted in bfyx format. For example: <br>
-/// input_dimensions = tensor{ 5, 3, 6, 3 } <br>
-/// permute_order = { 2, 3, 1, 0 } <br>
-/// output_dimensions = { 6, 3, 3, 5 } <br>
-/// <br>
-/// When permute_order is { 0, 1, 2, 3 } then input_dimensions = output_dimensions
-CLDNN_BEGIN_PRIMITIVE_DESC(permute)
-/// @brief Array of permuted output order in bfyx format.
-cldnn_uint16_t_arr permute_order;
-CLDNN_END_PRIMITIVE_DESC(permute)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(permute);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/pooling.h b/inference-engine/thirdparty/clDNN/api/C/pooling.h
deleted file mode 100644 (file)
index 19aaa57..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
-// Copyright (c) 2016-2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Select method for Pooling layer ( @CLDNN_PRIMITIVE_DESC{pooling} ).
-typedef enum /*:int32_t*/ {
-    /// @brief Maximum-pooling method.
-    cldnn_pooling_max,
-    /// @brief Average-pooling method.
-    cldnn_pooling_average,
-    /// @brief Average-pooling method without values which are outside of the input.
-    cldnn_pooling_average_no_padding,
-    /// @brief Maximum-pooling method with additional buffer to store argmax indices.
-    cldnn_pooling_max_with_argmax,
-    /// @brief Pooling with bilinear interpolation
-    cldnn_pooling_bilinear,
-    /// @brief Deformable pooling with bilinear interpolation
-    cldnn_pooling_deformable_bilinear
-} cldnn_pooling_mode;
-
-/// @brief Performs "pooling" operation which is a form of non-linear down-sampling.
-/// @details Pools the input image by taking the max, average, etc. within regions.
-CLDNN_BEGIN_PRIMITIVE_DESC(pooling)
-/// @brief Primitive id which contains indices of each max pooling region. Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-cldnn_primitive_id argmax;
-/// @brief Pooling method. See #cldnn_pooling_mode.
-int32_t mode;
-/// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor)
-int8_t global_pooling;
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the pooling window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines shift in input buffer between adjacent calculations of output values.
-cldnn_tensor stride;
-/// @brief Pooling kernel size.
-cldnn_tensor size;
-/// @brief Indicates that the primitive has user-defined output size (non-zero value).
-uint32_t with_output_size;
-/// @brief User-defined output data size of the primitive (w/o padding).
-cldnn_tensor output_size;
-CLDNN_END_PRIMITIVE_DESC(pooling)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(pooling);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/prior_box.h b/inference-engine/thirdparty/clDNN/api/C/prior_box.h
deleted file mode 100644 (file)
index 552b0ec..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Generates a set of default bounding boxes with different sizes and aspect ratios.
-/// @details The prior-boxes are shared across all the images in a batch (since they have the same width and height).
-/// First feature stores the mean of each prior coordinate.
-/// Second feature stores the variance of each prior coordinate.
-CLDNN_BEGIN_PRIMITIVE_DESC(prior_box)
-/// @brief Image width and height.
-cldnn_tensor img_size;
-/// @brief Minimum box sizes in pixels.
-cldnn_float_arr min_sizes;
-/// @brief Maximum box sizes in pixels.
-cldnn_float_arr max_sizes;
-/// @brief Various of aspect ratios. Duplicate ratios will be ignored.
-cldnn_float_arr aspect_ratios;
-/// @brief If not 0, will flip each aspect ratio. For example, if there is aspect ratio "r", aspect ratio "1.0/r" we will generated as well.
-uint32_t flip;
-/// @brief If not 0, will clip the prior so that it is within [0, 1].
-uint32_t clip;
-/// @brief Variance for adjusting the prior boxes.
-cldnn_float_arr variance;
-/// @brief Step width.
-float step_width;
-/// @brief Step height.
-float step_height;
-/// @brief Offset to the top left corner of each cell.
-float offset;
-/// @broef If false, only first min_size is scaled by aspect_ratios
-uint32_t scale_all_sizes;
-CLDNN_END_PRIMITIVE_DESC(prior_box)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(prior_box);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/proposal.h b/inference-engine/thirdparty/clDNN/api/C/proposal.h
deleted file mode 100644 (file)
index 745027f..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
-// Copyright (c) 2017-2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define CLDNN_ROI_VECTOR_SIZE 5
-
-CLDNN_BEGIN_PRIMITIVE_DESC(proposal)
-int max_proposals;
-float iou_threshold;
-int base_bbox_size;
-int min_bbox_size;
-int feature_stride;
-int pre_nms_topn;
-int post_nms_topn;
-cldnn_float_arr ratios;
-cldnn_float_arr scales;
-float coordinates_offset;
-float box_coordinate_scale;
-float box_size_scale;
-uint32_t swap_xy;
-uint32_t initial_clip;
-uint32_t clip_before_nms;
-uint32_t clip_after_nms;
-uint32_t round_ratios;
-uint32_t shift_anchors;
-uint32_t normalize;
-uint32_t for_deformable;
-CLDNN_END_PRIMITIVE_DESC(proposal)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(proposal);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/reduce.h b/inference-engine/thirdparty/clDNN/api/C/reduce.h
deleted file mode 100644 (file)
index 13d3436..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-    cldnn_reduce_along_b = 0,
-    cldnn_reduce_along_f = CLDNN_TENSOR_BATCH_DIM_MAX,
-    cldnn_reduce_along_x = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX,
-    cldnn_reduce_along_y = cldnn_reduce_along_x + 1,
-    cldnn_reduce_along_z = cldnn_reduce_along_y + 1,
-    cldnn_reduce_along_w = cldnn_reduce_along_z + 1
-} cldnn_reduce_axis;
-
-// @brief Select mode for reduce layer ( @CLDNN_PRIMITIVE_DESC{reduce} â€‹).
-typedef enum {
-    /// @brief Reduce max
-    cldnn_reduce_max,
-    /// @brief Reduce min
-    cldnn_reduce_min,
-    /// @brief Reduce mean
-    cldnn_reduce_mean,
-    /// @brief Reduce prod
-    cldnn_reduce_prod,
-    /// @brief Reduce sum
-    cldnn_reduce_sum,
-    /// @brief Reduce and
-    cldnn_reduce_and,
-    /// @brief Reduce or
-    cldnn_reduce_or,
-    /// @brief Reduce sum square
-    cldnn_reduce_sum_square,
-    /// @brief Reduce l1
-    cldnn_reduce_l1,
-    /// @brief Reduce l2
-    cldnn_reduce_l2,
-    /// @brief Reduce log sum
-    cldnn_reduce_log_sum,
-    /// @brief Reduce log sum exp
-    cldnn_reduce_log_sum_exp
-} cldnn_reduce_mode;
-
-CLDNN_BEGIN_PRIMITIVE_DESC(reduce)
-/// @brief Keep the reduced dimension or not, 1 mean keep reduced dimension
-int32_t keep_dims;
-/// @brief Reduce operation type
-int32_t mode;
-/// @brief List of axes to reduce
-cldnn_uint16_t_arr axes;
-CLDNN_END_PRIMITIVE_DESC(reduce)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(reduce);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/region_yolo.h b/inference-engine/thirdparty/clDNN/api/C/region_yolo.h
deleted file mode 100644 (file)
index 883f5da..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief region softmax specific for yolo2 topology
-/// @details
-/// @par Algorithm:
-///
-/// @par Where:
-///
-CLDNN_BEGIN_PRIMITIVE_DESC(region_yolo)
-/// @brief paramter coords
-uint32_t coords;
-/// @brief paramter classes
-uint32_t classes;
-/// @brief Number of anchors
-uint32_t num;
-/// @brief Apply softmax after logistic
-uint32_t do_softmax;
-/// @brief Number of really used anchors
-uint32_t mask_size;
-CLDNN_END_PRIMITIVE_DESC(region_yolo)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(region_yolo);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/reorder.h b/inference-engine/thirdparty/clDNN/api/C/reorder.h
deleted file mode 100644 (file)
index 63204fe..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Changes how data is ordered in memory. Value type is not changed & all information is preserved.
-/// @details Corresponding values are bitwise equal before/after reorder.
-/// Also merged with subtraction layer, which can subtract, multiply or divide values based on mean_mode value, while doing reordering.
-/// NOTE THAT THIS WILL SUBTRACT THE SAME VALUES FROM EACH BATCH.
-CLDNN_BEGIN_PRIMITIVE_DESC(reorder)
-/// @brief Requested memory format.
-cldnn_format_type output_format;
-/// @brief Primitive id to get mean subtract values. Ignored if subtract_per_featrue is set.
-cldnn_primitive_id mean_subtract;
-/// @brief Array of mean subtract values.
-cldnn_float_arr subtract_per_feature;
-/// @brief Mode of mean execution
-cldnn_reorder_mean_mode mean_mode;
-CLDNN_END_PRIMITIVE_DESC(reorder)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(reorder);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/reorg_yolo.h b/inference-engine/thirdparty/clDNN/api/C/reorg_yolo.h
deleted file mode 100644 (file)
index 0ec3f36..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief yolo2 topology specific data reorganization primitive
-/// @details
-/// @par Algorithm:
-///
-/// @par Where:
-///
-CLDNN_BEGIN_PRIMITIVE_DESC(reorg_yolo)
-/// @brief paramter stride
-uint32_t stride;
-
-CLDNN_END_PRIMITIVE_DESC(reorg_yolo)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(reorg_yolo);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/reshape.h b/inference-engine/thirdparty/clDNN/api/C/reshape.h
deleted file mode 100644 (file)
index 5218654..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-// Copyright (c) 2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Changes information about inputs's layout effectively creating new memory which share underlaying buffer
-/// but is interpreted in a different way (different shape).
-/// @note reshape primitive is supposed only to reinterpret shape of the memory therefore it's not possible to change
-/// neither data type nor format of the input buffer and total number of elements in input and output (excluding paddings) must match.
-/// Please note that there is no guarantee that underlying data will be in proper format if primitive was explicitly added to output list.
-CLDNN_BEGIN_PRIMITIVE_DESC(reshape)
-/// @brief Requested memory shape.
-cldnn_tensor output_shape;
-CLDNN_END_PRIMITIVE_DESC(reshape)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(reshape);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/reverse_sequence.h b/inference-engine/thirdparty/clDNN/api/C/reverse_sequence.h
deleted file mode 100644 (file)
index bea2d21..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-CLDNN_BEGIN_PRIMITIVE_DESC(reverse_sequence)
-/// @brief The axis which is partially reversed.
-int32_t seq_axis;
-/// @brief The axis along which reversal is performed.
-int32_t batch_axis;
-CLDNN_END_PRIMITIVE_DESC(reverse_sequence)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(reverse_sequence);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/roi_pooling.h b/inference-engine/thirdparty/clDNN/api/C/roi_pooling.h
deleted file mode 100644 (file)
index 6d6667f..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
-// Copyright (c) 2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include <stdbool.h>
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-CLDNN_BEGIN_PRIMITIVE_DESC(roi_pooling)
-/// @brief Pooling method. See #cldnn_pooling_mode.
-int32_t mode;
-/// @brief True, if pooling is position sensitive (PSROIPoolng).
-bool position_sensitive;
-/// @brief Output width.
-int pooled_width;
-/// @brief Output height.
-int pooled_height;
-/// @brief Count of sub bins in x spatial dimension.
-int spatial_bins_x;
-/// @brief Count of sub bins in y spatial dimension.
-int spatial_bins_y;
-/// @brief Output features count (applied for position sensitive case only).
-int output_dim;
-/// @brief Transformation parameter.
-float trans_std;
-/// @brief False, if pooling is deformable (DeformablePSROIPoolng).
-bool no_trans;
-/// @brief Ratio of the coordinates used in RoIs to the width (and height) of the input data.
-float spatial_scale;
-/// @brief Size of pooled part.
-int part_size;
-/// @brief Size of pooled group.
-int group_size;
-CLDNN_END_PRIMITIVE_DESC(roi_pooling)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(roi_pooling);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/scale.h b/inference-engine/thirdparty/clDNN/api/C/scale.h
deleted file mode 100644 (file)
index 7cc65fd..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs elementwise product of input and scale_input.
-/// @details Scale input dimension should be equal to input dimension or be 1 if it is not there.<br>
-/// Input size : 2x3x4x5(BFYX)<br>
-///     Possible scale inputs sizes :<br>
-///     2x3x4x5 - works the same as(axis == 0 == -4) in caffe<br>
-///     1x3x4x5 - works the same as(axis == 1 == -3) in caffe<br>
-///     1x1x4x5 - works the same as(axis == 2 == -2) in caffe<br>
-///     1x1x1x5 - works the same as(axis == 3 == -1) in caffe<br>
-///     1x1x1x1 - works the same as empty shape(scalar) in caffe<br>
-/// When scale_input is the same as input, the behavior is the same as @CLDNN_PRIMITIVE_DESC{eltwise} with product operation.<br>
-/// Performs scale over feature when the scale feature size is equal to input feature size.<br>
-/// Performs scale over feature in batch when the scale feature and scale batch sizes are equal to input feature and input batch sizes.<br>
-/// Optionally it can also add provided biases by setting bias_term.<br>
-CLDNN_BEGIN_PRIMITIVE_DESC(scale)
-/// @brief Primitive id containing bias data.
-cldnn_primitive_id bias;
-CLDNN_END_PRIMITIVE_DESC(scale)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(scale);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/scale_grad_input.h b/inference-engine/thirdparty/clDNN/api/C/scale_grad_input.h
deleted file mode 100644 (file)
index 694d2eb..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs scale primitive backward for input.
-CLDNN_BEGIN_PRIMITIVE_DESC(scale_grad_input)
-
-CLDNN_END_PRIMITIVE_DESC(scale_grad_input)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(scale_grad_input);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/scale_grad_weights.h b/inference-engine/thirdparty/clDNN/api/C/scale_grad_weights.h
deleted file mode 100644 (file)
index 060f095..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs scale layer backward for scale_input and biases.
-CLDNN_BEGIN_PRIMITIVE_DESC(scale_grad_weights)
-/// @brief Scale input primitive id.
-cldnn_primitive_id scale_input;
-/// @brief Primitive id containing bias data.
-cldnn_primitive_id bias;
-/// @brief Primitive id containing scale gradient data calculated in previous iteration.
-cldnn_primitive_id prev_scale_grad;
-/// @brief Primitive id containing bias gradient data calculated in previous iteration.
-cldnn_primitive_id prev_bias_grad;
-/// @brief Primitive id which uses weights and biases updated in this primitive.
-cldnn_primitive_id scale_grad;
-CLDNN_END_PRIMITIVE_DESC(scale_grad_weights)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(scale_grad_weights);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/select.h b/inference-engine/thirdparty/clDNN/api/C/select.h
deleted file mode 100644 (file)
index f50eacb..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs elementwise select operation on two input primitives with selector primitive (mask)
-/// @notes
-/// - both inputs have to have equal sizes in all dimensions
-/// - format of both inputs has to be the same
-/// - mask primitive input have to have equal size in all dimensions with inputs
-CLDNN_BEGIN_PRIMITIVE_DESC(select)
-
-CLDNN_END_PRIMITIVE_DESC(select)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(select);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/shuffle_channels.h b/inference-engine/thirdparty/clDNN/api/C/shuffle_channels.h
deleted file mode 100644 (file)
index bd0887b..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-CLDNN_BEGIN_PRIMITIVE_DESC(shuffle_channels)
-/// @brief The number of groups to split the channel dimension. This number must evenly divide the channel dimension size.
-int32_t group;
-/// @brief The index of the channel dimension (default is 1).
-int32_t axis;
-CLDNN_END_PRIMITIVE_DESC(shuffle_channels)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(shuffle_channels);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/softmax.h b/inference-engine/thirdparty/clDNN/api/C/softmax.h
deleted file mode 100644 (file)
index 042a876..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-// Copyright (c) 2016-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Enum type to specify softmax's normalization scope (see cldnn_softmax_desc::dimension).
-typedef enum {
-    cldnn_softmax_normalize_f,
-    cldnn_softmax_normalize_x,
-    cldnn_softmax_normalize_y,
-    cldnn_softmax_normalize_z,
-    cldnn_softmax_normalize_fyx,
-    cldnn_softmax_normalize_all,
-} cldnn_softmax_dimension;
-
-/// @brief Normalizes results so they sum to 1. The scope of normalization is defined by a member @p dimension.
-/// @details
-/// @par Algorithm:
-///   b = e^a/sum(N-1; j=0; e^j)
-/// @par Where:
-///   @li N : number of values to normalize
-///   @li b : value after normalization
-///   @li a : value before normalization
-CLDNN_BEGIN_PRIMITIVE_DESC(softmax)
-/// @brief Defines a scope of a single softmax normalization.
-/// @details
-/// Being given a 4-dimensional input, which consists of b,f,y,x dimensions, softmax normalizes data which are divided into multiple independent sets.
-/// Specific behavior is determined by this parameter, as follows:
-/// - when set to @link cldnn_softmax_dimension cldnn_softmax_normalize_x @endlink each input row is normalized independently,
-/// - when set to @link cldnn_softmax_dimension cldnn_softmax_normalize_y @endlink each input column is normalized independently,
-/// - when set to @link cldnn_softmax_dimension cldnn_softmax_normalize_f @endlink each in-depth vector of input is normalized independently,
-/// - when set to @link cldnn_softmax_dimension cldnn_softmax_normalize_fyx @endlink each 3d image within input is normalized independently,
-/// - when set to @link cldnn_softmax_dimension cldnn_softmax_normalize_all @endlink everything is normalized,
-cldnn_softmax_dimension dimension;
-CLDNN_END_PRIMITIVE_DESC(softmax)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(softmax);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/softmax_loss_grad.h b/inference-engine/thirdparty/clDNN/api/C/softmax_loss_grad.h
deleted file mode 100644 (file)
index b982c62..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Backward pass for Softmax log loss.
-/// @details The output values are the same as input_prob, except for the correct one based on the label which is subtracted by 1.
-CLDNN_BEGIN_PRIMITIVE_DESC(softmax_loss_grad)
-
-CLDNN_END_PRIMITIVE_DESC(softmax_loss_grad)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(softmax_loss_grad);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/split.h b/inference-engine/thirdparty/clDNN/api/C/split.h
deleted file mode 100644 (file)
index 144f8fd..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs split operation on input.
-/// @details splits the input data into n parts, for each user provides name and offsets.
-/// @n User cannot use split primitive directly.
-/// @n It is needed to refer to the output ids with the name "<split_prim_id>:<split_output_id>".
-/// @n
-/// @n\b Assumptions
-/// @n - offsets1 < offsets2 < offsets3 < ...
-/// @n - size[n] = offsets[n+1] - offsets[n];
-/// @n - last element: size[n] = split_input.size - offsets[n];
-/// @n - no buffer overlapping, as the output size is calculated using offset and input size
-/// @n - split primitive id cannot be used by any other primitive (user needs to use output_ids only)
-/// @n Breaking any of this conditions will cause exeption throw.
-/// @n
-/// @n\b Example:
-/// @n Splitting output to 2 parts by the features:
-/// @n input_size = { 2, 4, 3, 5 };
-/// @n split_id = "split";
-/// @n output_ids_offsets[0] = { "out0", { 0,0,0,0 } };
-/// @n output_ids_offsets[1] = { "out1", { 0,2,0,0 } };
-/// @n After split there would be 2 primitives: "split:out0" and "split:out1" which contain 2 feature maps (lower and upper)
-
-CLDNN_BEGIN_PRIMITIVE_DESC(split)
-/// @brief List of output_ids.
-cldnn_primitive_id_arr output_ids;
-/// @brief Array of tensors with offsets.
-cldnn_tensor_arr output_offsets;
-CLDNN_END_PRIMITIVE_DESC(split)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(split);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/strided_slice.h b/inference-engine/thirdparty/clDNN/api/C/strided_slice.h
deleted file mode 100644 (file)
index 33218cf..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-CLDNN_BEGIN_PRIMITIVE_DESC(strided_slice)
-/// @brief Array of bits, that provide replace begin[i] to max possible range in that dimension.
-cldnn_uint8_t_arr begin_mask;
-/// @brief Array of bits, that provide replace end[i] to max possible range in that dimension.
-cldnn_uint8_t_arr end_mask;
-/// @brief Array of bits, that provide adding a new length 1 dimension at ith position in the output tensor.
-cldnn_uint8_t_arr new_axis_mask;
-/// @brief Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i].
-cldnn_uint8_t_arr shrink_axis_mask;
-CLDNN_END_PRIMITIVE_DESC(strided_slice)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(strided_slice);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/tile.h b/inference-engine/thirdparty/clDNN/api/C/tile.h
deleted file mode 100644 (file)
index 21d3d0e..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-    cldnn_tile_along_b = 0,
-    cldnn_tile_along_f = CLDNN_TENSOR_BATCH_DIM_MAX,
-    cldnn_tile_along_x = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX,
-    cldnn_tile_along_y = cldnn_tile_along_x + 1,
-    cldnn_tile_along_z = cldnn_tile_along_y + 1
-} cldnn_tile_axis;
-
-CLDNN_BEGIN_PRIMITIVE_DESC(tile)
-/// @brief Tiling axis
-cldnn_tile_axis axis;
-/// @brief Tiles number across an axis
-int tiles;
-CLDNN_END_PRIMITIVE_DESC(tile)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(tile);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/C/upsampling.h b/inference-engine/thirdparty/clDNN/api/C/upsampling.h
deleted file mode 100644 (file)
index 87727ef..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Sample mode for upsampling layer ( @CLDNN_PRIMITIVE_DESC{upsampling} â€‹).
-typedef enum /*:int32_t*/ {
-    /// @brief upsampling nearest neighbor.
-    cldnn_upsampling_nearest,
-    /// @brief upsampling bilinear.
-    cldnn_upsampling_bilinear,
-} cldnn_upsampling_sample_type;
-
-/// @brief Performs nearest neighbor/bilinear upsampling
-/// Also supports built-in Relu @ref activation available by setting it in arguments.
-CLDNN_BEGIN_PRIMITIVE_DESC(upsampling)
-/// @param scale Upsampling scale.
-float scale;
-/// @param num_filter Input filter. Only used by bilinear sample_type.
-uint32_t num_filter;
-/// @param sample_type Upsampling method (nearest neighbor/bilinear).
-int32_t sample_type; /*cldnn_sample_type*/
-/// @brief Enables Relu activation.
-uint32_t with_activation;
-/// @brief Relu activation slope.
-float activation_negative_slope;
-CLDNN_END_PRIMITIVE_DESC(upsampling)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(upsampling);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
-
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/compounds.h b/inference-engine/thirdparty/clDNN/api/CPP/compounds.h
deleted file mode 100644 (file)
index bc05b8d..0000000
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-#pragma once
-
-#include <vector>
-#include <cassert>
-#include <iterator>
-#include <cstring>
-#include <string>
-#include <stdexcept>
-
-#include "meta_utils.hpp"
-
-namespace cldnn {
-
-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @cond CPP_HELPERS
-
-/// @defgroup cpp_helpers Helpers
-/// @{
-
-template <typename T>
-class mutable_array_ref {
-public:
-    typedef size_t size_type;
-
-    mutable_array_ref() : _data(nullptr), _size(0) {}
-    explicit mutable_array_ref(T& val) : _data(&val), _size(1) {}
-    mutable_array_ref(T* data, size_t size) : _data(data), _size(size) {}
-
-    template <size_t N>
-    explicit mutable_array_ref(T (&arr)[N]) : _data(arr), _size(N) {}
-
-    mutable_array_ref(const mutable_array_ref& other) : _data(other._data), _size(other._size) {}
-
-    mutable_array_ref& operator=(const mutable_array_ref& other) {
-        if (this == &other)
-            return *this;
-        _data = other._data;
-        _size = other._size;
-        return *this;
-    }
-
-    T* data() const { return _data; }
-    size_t size() const { return _size; }
-    bool empty() const { return _size == 0; }
-
-#if defined(_SECURE_SCL) && (_SECURE_SCL > 0)
-    typedef stdext::checked_array_iterator<T*> iterator;
-    typedef stdext::checked_array_iterator<const T*> const_iterator;
-    iterator begin() const { return stdext::make_checked_array_iterator(_data, _size); }
-    iterator end() const { return stdext::make_checked_array_iterator(_data, _size, _size); }
-    const_iterator cbegin() const { return stdext::make_checked_array_iterator(_data, _size); }
-    const_iterator cend() const { return stdext::make_checked_array_iterator(_data, _size, _size); }
-#else
-    typedef T* iterator;
-    typedef T* const_iterator;
-    iterator begin() const { return _data; }
-    iterator end() const { return _data + _size; }
-    const_iterator cbegin() const { return _data; }
-    const_iterator cend() const { return _data + _size; }
-#endif
-
-    T& operator[](size_t idx) const {
-        assert(idx < _size);
-        return _data[idx];
-    }
-
-    T& at(size_t idx) const {
-        if (idx >= _size) throw std::out_of_range("idx");
-        return _data[idx];
-    }
-
-    std::vector<T> vector() const { return std::vector<T>(_data, _data + _size); }
-
-private:
-    T* _data;
-    size_t _size;
-};
-
-template <typename T>
-class array_ref {
-public:
-    typedef size_t size_type;
-
-    array_ref() : _data(nullptr), _size(0) {}
-    explicit array_ref(const T& val) : _data(&val), _size(1) {}
-    array_ref(const T* data, size_t size) : _data(data), _size(size) {}
-
-    template <typename A>
-    explicit array_ref(const std::vector<T, A>& vec) : _data(vec.data()), _size(vec.size()) {}
-
-    template <size_t N>
-    explicit array_ref(const T (&arr)[N]) : _data(arr), _size(N) {}
-
-    explicit array_ref(const mutable_array_ref<T>& other) : _data(other.data()), _size(other.size()) {}
-
-    array_ref(const array_ref& other) : _data(other._data), _size(other._size) {}
-
-    array_ref& operator=(const array_ref& other) {
-        if (this == &other)
-            return *this;
-        _data = other._data;
-        _size = other._size;
-        return *this;
-    }
-
-    const T* data() const { return _data; }
-    size_t size() const { return _size; }
-    bool empty() const { return _size == 0; }
-
-#if defined(_SECURE_SCL) && (_SECURE_SCL > 0)
-    typedef stdext::checked_array_iterator<const T*> iterator;
-    typedef stdext::checked_array_iterator<const T*> const_iterator;
-    iterator begin() const { return stdext::make_checked_array_iterator(_data, _size); }
-    iterator end() const { return stdext::make_checked_array_iterator(_data, _size, _size); }
-    const_iterator cbegin() const { return stdext::make_checked_array_iterator(_data, _size); }
-    const_iterator cend() const { return stdext::make_checked_array_iterator(_data, _size, _size); }
-#else
-    typedef const T* iterator;
-    typedef const T* const_iterator;
-    iterator begin() const { return _data; }
-    iterator end() const { return _data + _size; }
-    const_iterator cbegin() const { return _data; }
-    const_iterator cend() const { return _data + _size; }
-#endif
-
-    const T& operator[](size_t idx) const {
-        assert(idx < _size);
-        return _data[idx];
-    }
-
-    const T& at(size_t idx) const {
-        if (idx >= _size) throw std::out_of_range("idx");
-        return _data[idx];
-    }
-
-    std::vector<T> vector() const { return std::vector<T>(_data, _data + _size); }
-
-private:
-    const T* _data;
-    size_t _size;
-};
-
-// NOTE: It seems that clang before version 3.9 has bug that treates non-member template function with deleted function
-//       body as non-template or non-specializable (specializations are treated as redefinitions).
-// template<typename Char> size_t basic_strlen(const Char* str) = delete;
-template <typename Char>
-size_t basic_strlen(const Char*) {
-    static_assert(meta::always_false<Char>::value, "basic_strlen<Char> for selected Char type is deleted.");
-    return 0;
-}
-
-template <>
-inline size_t basic_strlen(const char* str) { return std::strlen(str); }
-
-template <>
-inline size_t basic_strlen(const wchar_t* str) { return std::wcslen(str); }
-
-template <typename Char>
-class basic_string_ref {
-public:
-    typedef const Char* iterator;
-    typedef const Char* const_iterator;
-    typedef size_t size_type;
-
-private:
-    const Char* _data;
-    size_t _size;
-
-public:
-    basic_string_ref() : _data(nullptr), _size(0) {}
-    explicit basic_string_ref(const Char* str) : _data(str), _size(basic_strlen(str)) {}
-
-    template <typename T, typename A>
-    explicit basic_string_ref(const std::basic_string<Char, T, A>& str) : _data(str.c_str()), _size(str.size()) {}
-
-    basic_string_ref(const basic_string_ref& other) : _data(other._data), _size(other._size) {}
-
-    basic_string_ref& operator=(const basic_string_ref& other) {
-        if (this == &other)
-            return *this;
-        _data = other._data;
-        _size = other._size;
-        return *this;
-    }
-
-    const Char* data() const { return _data; }
-    const Char* c_str() const { return _data; }
-    size_t size() const { return _size; }
-    size_t length() const { return _size; }
-    bool empty() const { return _size == 0; }
-
-    iterator begin() const { return _data; }
-    iterator end() const { return _data + _size; }
-    const_iterator cbegin() const { return begin(); }
-    const_iterator cend() const { return end(); }
-
-    const Char& operator[](size_t idx) {
-        assert(idx < _size);
-        return _data[idx];
-    }
-
-    std::basic_string<Char> str() const { return std::basic_string<Char>(_data, _size); }
-    operator std::basic_string<Char>() const { return str(); }
-};
-
-typedef basic_string_ref<char> string_ref;
-typedef basic_string_ref<wchar_t> wstring_ref;
-
-/// @}
-
-/// @endcond
-
-/// @}
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/event.hpp b/inference-engine/thirdparty/clDNN/api/CPP/event.hpp
deleted file mode 100644 (file)
index 81b863c..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
-// Copyright (c) 2016-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn_defs.h"
-#include "engine.hpp"
-#include "profiling.hpp"
-#include <algorithm>
-#include <cassert>
-#include <vector>
-#include <memory>
-
-namespace cldnn {
-
-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @addtogroup cpp_event Events Support
-/// @{
-
-/// @brief Represents an clDNN Event object
-struct event {
-    /// @brief Create an event which can be set to 'completed' by user.
-    static event create_user_event(const engine& engine, uint16_t stream_id) {
-        event status = (event) check_status<cldnn_event>("create user event failed", [&](status_t* status) {
-            return cldnn_create_user_event(engine.get(), stream_id, status);
-        });
-        return status;
-    }
-
-    /// @brief Construct from C API handler @ref ::cldnn_event.
-    explicit event(cldnn_event impl) : _impl(impl) {
-        if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null");
-    }
-
-    event(const event& other) : _impl(other._impl) {
-        retain();
-    }
-
-    event& operator=(const event& other) {
-        if (_impl == other._impl) return *this;
-        release();
-        _impl = other._impl;
-        retain();
-        return *this;
-    }
-
-    ~event() {
-        release();
-    }
-
-    friend bool operator==(const event& lhs, const event& rhs) { return lhs._impl == rhs._impl; }
-    friend bool operator!=(const event& lhs, const event& rhs) { return !(lhs == rhs); }
-
-    /// @brief Wait for event completion.
-    void wait() const {
-        check_status<void>("wait event failed", [=](status_t* status) { cldnn_wait_for_event(_impl, status); });
-    }
-
-    /// @brief Set event status to 'completed'.
-    void set() const {
-        check_status<void>("set event failed", [=](status_t* status) { cldnn_set_event(_impl, status); });
-    }
-
-    /// @brief Register call back to be called on event completion.
-    void set_event_handler(cldnn_event_handler handler, void* param) const {
-        check_status<void>("set event handler failed", [=](status_t* status) { cldnn_add_event_handler(_impl, handler, param, status); });
-    }
-
-    /// @brief Get profiling info for the event associated with network output.
-    std::vector<instrumentation::profiling_interval> get_profiling_info() const {
-        using namespace instrumentation;
-        wait();
-        size_t size_ret = 0;
-        status_t err_invalid_arg = CLDNN_SUCCESS;
-        cldnn_get_event_profiling_info(_impl, nullptr, 0, &size_ret, &err_invalid_arg);
-
-        if (size_ret == 0) {
-            return {};
-        }
-
-        std::vector<cldnn_profiling_interval> profiling_info_ref(size_ret);
-
-        check_status<void>("get event profiling info failed", [&](status_t* status) {
-            cldnn_get_event_profiling_info(_impl, profiling_info_ref.data(), profiling_info_ref.size(), &size_ret, status);
-        });
-        assert(profiling_info_ref.size() == size_ret);
-
-        std::vector<profiling_interval> result(profiling_info_ref.size());
-        std::transform(
-            std::begin(profiling_info_ref),
-            std::end(profiling_info_ref),
-            std::begin(result),
-            [](const cldnn_profiling_interval& ref) -> profiling_interval {
-                return {
-                    ref.name,
-                    std::make_shared<profiling_period_basic>(std::chrono::nanoseconds(ref.nanoseconds))};
-            });
-        return result;
-    }
-
-    /// @brief Returns C API event handler.
-    cldnn_event get() const { return _impl; }
-
-private:
-    cldnn_event _impl;
-    void retain() {
-        check_status<void>("retain event failed", [=](status_t* status) { cldnn_retain_event(_impl, status); });
-    }
-    void release() {
-        check_status<void>("retain event failed", [=](status_t* status) { cldnn_release_event(_impl, status); });
-    }
-};
-CLDNN_API_CLASS(event)
-
-/// @}
-/// @}
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/network.hpp b/inference-engine/thirdparty/clDNN/api/CPP/network.hpp
deleted file mode 100644 (file)
index 492a98a..0000000
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
-// Copyright (c) 2016-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "cldnn_defs.h"
-#include "compounds.h"
-#include "memory.hpp"
-#include "program.hpp"
-#include "event.hpp"
-
-#include <cstdint>
-#include <algorithm>
-#include <map>
-#include <vector>
-#include <utility>
-#include <string>
-
-namespace cldnn {
-
-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @defgroup cpp_network Network Execution
-/// @{
-
-/// @brief Represents network output returned by @ref network::get_output().
-struct network_output {
-    /// @brief Returns @ref event associated with the output.
-    event get_event() const { return _event; }
-
-    /// @brief Returns @ref memory object of the output. Blocked until associated @ref event is not complete.
-    memory get_memory() const {
-        _event.wait();
-        return _result;
-    }
-
-private:
-    event _event;
-    memory _result;
-    network_output(event evt, memory mem) : _event(evt), _result(mem) {}
-    network_output(cldnn_event evt, cldnn_memory mem) : _event(evt), _result(mem) {}
-    friend struct network;
-};
-
-/// @brief Executable network allocated from @ref program.
-struct network {
-    /// @brief Allocate network
-    /// @param program The program object which contains compiled primitives this network should allocate memory for.
-    network(program const& program, uint16_t stream_id)
-        : _impl(check_status<cldnn_network>("network allocation failed", [&](status_t* status) {
-              return cldnn_allocate_network(program.get(), stream_id, status);
-          })) {}
-
-    /// @brief Constructs network object from implicitly created program object. This is a shorthand for network(program(engine, topology, options))
-    /// @param engine
-    /// @param topology
-    /// @param options
-    network(const engine& engine,
-            const topology& topology,
-            const build_options& options = build_options(),
-            uint16_t stream_id = 0)
-        : network(program(engine, topology, options), stream_id) {}
-
-    /// @brief Constructs network object from C API @ref cldnn_network.
-    explicit network(cldnn_network impl) : _impl(impl) {
-        if (_impl == nullptr)
-            throw std::invalid_argument("implementation pointer should not be null");
-    }
-
-    /// @brief Copy construction.
-    network(const network& other) : _impl(other._impl) { retain(); }
-
-    /// @brief Copy assignment.
-    network& operator=(const network& other) {
-        if (_impl == other._impl)
-            return *this;
-        release();
-        _impl = other._impl;
-        retain();
-        return *this;
-    }
-
-    /// @brief Releases wrapped C API @ref cldnn_network.
-    ~network() { release(); }
-
-    friend bool operator==(const network& lhs, const network& rhs) { return lhs._impl == rhs._impl; }
-    friend bool operator!=(const network& lhs, const network& rhs) { return !(lhs == rhs); }
-
-    /// @brief Returns @ref engine by which network was built.
-    engine get_engine() const {
-        engine status = (engine) check_status<cldnn_engine>("get network engine failed",
-                                          [&](status_t* status) { return cldnn_get_network_engine(_impl, status); });
-        return status;
-    }
-
-    /// @brief Returns network internal @ref program.
-    program get_program() const {
-        program status = (program) check_status<cldnn_program>("get network program failed",
-                                           [&](status_t* status) { return cldnn_get_network_program(_impl, status); });
-        return status;
-    }
-
-    /// @brief Provides @ref memory for @ref input_layout primitives defined by user in source @ref topology.
-    void set_input_data(const primitive_id& id, const memory& mem) const {
-        check_status<void>("set network input failed",
-                           [&](status_t* status) { cldnn_set_network_input(_impl, id.c_str(), mem.get(), status); });
-    }
-
-    /// @brief Sets learning rate for training primitives.
-    void set_learning_rate(const float lr) {
-        check_status<void>("set learning rate failed",
-                           [&](status_t* status) { cldnn_set_learning_rate(_impl, lr, status); });
-    }
-
-    /// @brief Return learning rate.
-    float get_learning_rate() {
-        return check_status<float>("get learning rate failed",
-                                   [&](status_t* status) { return cldnn_get_learning_rate(_impl, status); });
-    }
-
-    /// @brief Return stream id.
-    uint16_t get_stream_id() {
-        return check_status<uint16_t>("get stream id failed",
-                                      [&](status_t* status) { return cldnn_get_network_stream_id(_impl, status); });
-    }
-
-    std::string get_primitive_info(const primitive_id& id) const {
-        size_t size_ret = 0;
-        status_t err_invalid_arg = CLDNN_SUCCESS;
-
-        cldnn_get_primitive_info(_impl, id.c_str(), nullptr, 0, &size_ret, &err_invalid_arg);
-        assert(err_invalid_arg == CLDNN_INVALID_ARG);
-        assert(size_ret > 0);
-        std::vector<char> names_buf(size_ret);
-
-        check_status<void>("get primitive info failed", [&](status_t* status) {
-            cldnn_get_primitive_info(_impl, id.c_str(), names_buf.data(), names_buf.size(), &size_ret, status);
-        });
-        assert(names_buf.size() == size_ret);
-
-        std::string result(names_buf.begin(), names_buf.end());
-        return result;
-    }
-
-    /// @brief Returns description of final runtime graph
-    std::vector<primitive_info> get_primitives_info() {
-        size_t size_ret = 0;
-        status_t err_invalid_arg = CLDNN_SUCCESS;
-        cldnn_get_primitives_info(_impl, nullptr, 0, &size_ret, &err_invalid_arg);
-        assert(size_ret > 0);
-        std::vector<const cldnn_primitive_info*> buf(size_ret);
-
-        check_status<void>("get network primitives info extended", [&](status_t* status) {
-            cldnn_get_primitives_info(_impl, buf.data(), buf.size(), &size_ret, status);
-        });
-
-        std::vector<primitive_info> res;
-        for (auto& pi : buf) {
-            res.emplace_back(pi);
-        }
-        return res;
-    }
-
-    /// @brief Returns description of all optimization stages
-    std::vector<std::pair<std::string, std::vector<primitive_info>>> get_optimization_steps_info() {
-        size_t total_size_ret = 0;
-        size_t steps_count_ret = 0;
-        size_t step_names_size_ret = 0;
-        status_t err_invalid_arg = CLDNN_SUCCESS;
-        cldnn_get_optimizer_passes_info(_impl,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        0,
-                                        &total_size_ret,
-                                        &steps_count_ret,
-                                        &step_names_size_ret,
-                                        &err_invalid_arg);
-        assert(total_size_ret > 0);
-        std::vector<const cldnn_primitive_info*> buf(total_size_ret);
-        std::vector<int> info_size(steps_count_ret);
-        std::vector<char> info_names(step_names_size_ret);
-
-        check_status<void>("get primitives info for each optimization step", [&](status_t* status) {
-            cldnn_get_optimizer_passes_info(_impl,
-                                            buf.data(),
-                                            info_size.data(),
-                                            info_names.data(),
-                                            buf.size(),
-                                            &total_size_ret,
-                                            &steps_count_ret,
-                                            &step_names_size_ret,
-                                            status);
-        });
-
-        std::vector<std::pair<std::string, std::vector<primitive_info>>> res;
-        std::vector<primitive_id> names;
-        for (auto buf_ptr = info_names.data(); *buf_ptr != 0; buf_ptr += names.back().size() + 1) {
-            names.emplace_back(buf_ptr);
-        }
-
-        assert(names.size() == steps_count_ret);
-
-        int j = 0;
-        for (size_t i = 0; i < steps_count_ret; i++) {
-            int sz = info_size[i];
-            std::vector<primitive_info> opt_step;
-            for (int k = 0; k < sz; k++) {
-                opt_step.emplace_back(buf[j]);
-                j++;
-            }
-            res.emplace_back(names[i], opt_step);
-        }
-        return res;
-    }
-
-    /// @brief Returns the list of executed primitives.
-    std::vector<primitive_id> get_executed_primitive_ids() const {
-        return get_prim_ids(cldnn_get_network_executed_primitive_names);
-    }
-
-    /// @brief Returns the list of all primitives ids in network.
-    std::vector<primitive_id> get_all_primitive_ids() const {
-        return get_prim_ids(cldnn_get_network_all_primitive_names);
-    }
-
-    /// @brief Returns the list of all primitives ids in network before graph optimization.
-    std::vector<primitive_id> get_all_primitive_org_ids() const {
-        return get_prim_ids(cldnn_get_network_all_primitive_org_names);
-    }
-
-    /// @brief Returns the list of available network outputs.
-    std::vector<primitive_id> get_output_ids() const { return get_prim_ids(cldnn_get_network_output_names); }
-
-    /// @brief Returns @ref network_output object for particular @p output. Can't be called before network execution
-    network_output get_output(const primitive_id& output_id) const {
-        cldnn_network_output output = check_status<cldnn_network_output>(
-            "get network output failed",
-            [&](status_t* status) { return cldnn_get_network_output(_impl, output_id.c_str(), status); });
-        return network_output(output.event, output.memory);
-    }
-
-    /// @brief Returns @ref memory object for particular @p output. Can be called before network execution
-    memory get_output_memory(const primitive_id& output_id) const {
-        memory output = (memory) check_status<cldnn_memory>("get output memory failed", [&](status_t* status) {
-            return cldnn_get_network_output_memory(_impl, output_id.c_str(), status);
-        });
-        return output;
-    }
-
-    /// @brief Returns @ref event object for particular @p primitive. Can't be called before network execution
-    event get_primitive_event(const primitive_id& output_id) const {
-        event output = (event) check_status<cldnn_event>("get output event failed", [&](status_t* status) {
-            return cldnn_get_network_output_event(_impl, output_id.c_str(), status);
-        });
-        return output;
-    }
-
-    /// @brief Returns the list of @ref event for the primitives that were executed in network.
-    std::map<primitive_id, event> get_executed_primitives() const {
-        auto primitive_ids = get_executed_primitive_ids();
-        auto all_primitive_ids = get_all_primitive_ids();
-        auto all_primitive_org_ids = get_all_primitive_org_ids();
-        // Get list of optimized prmitives
-        std::vector<primitive_id> optimized_primitives;
-        for (decltype(all_primitive_org_ids.size()) i = 0; i < all_primitive_org_ids.size(); i++) {
-            if (all_primitive_ids[i] == "_optimized_")
-                optimized_primitives.push_back(all_primitive_org_ids[i]);
-        }
-        std::map<primitive_id, event> result;
-        for (auto& id : primitive_ids) {
-            if (std::find(optimized_primitives.begin(), optimized_primitives.end(), id) == optimized_primitives.end())
-                result.emplace(id, get_primitive_event(id));
-        }
-        return result;
-    }
-
-    /// @brief Returns the list of primitive ids before and after graph optimization.
-    /// @details If primitive was not optimized, the old and actual id will be the same.
-    /// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_".
-    std::map<primitive_id, primitive_id> get_all_primitives() const {
-        auto primitive_ids = get_all_primitive_ids();
-        auto primitive_org_ids = get_all_primitive_org_ids();
-        std::map<primitive_id, primitive_id> result;
-        for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) {
-            result.emplace(primitive_org_ids[i], primitive_ids[i]);
-        }
-        return result;
-    }
-
-    /// @brief Executes network and returns the list of @ref network_output.
-    /// @param dependencies List of @ref event objects to be waited before network execution.
-    /// @note User should call set_input_data() for every @ref input_layout defined in source @ref topology
-    /// before network execution.
-    std::map<primitive_id, network_output> execute(const std::vector<event>& dependencies = {}) const {
-        std::vector<cldnn_event> dep_refs(dependencies.size());
-        for (decltype(dependencies.size()) i = 0; i < dependencies.size(); i++) {
-            dep_refs[i] = dependencies[i].get();
-        }
-
-        check_status<void>("network execute failed", [&](status_t* status) {
-            return cldnn_execute_network(_impl, dep_refs.data(), dep_refs.size(), status);
-        });
-
-        auto output_ids = get_output_ids();
-        std::map<primitive_id, network_output> result;
-        for (auto& id : output_ids) {
-            result.emplace(id, get_output(id));
-        }
-        return result;
-    }
-
-    /// @brief Returns wrapped C API @ref cldnn_network handler.
-    cldnn_network get() const { return _impl; }
-
-private:
-    cldnn_network _impl;
-
-    typedef void (
-        *get_prim_ids_func_t)(cldnn_network network, char* names, size_t size, size_t* size_ret, cldnn_status* status);
-
-    void retain() {
-        check_status<void>("retain topology failed", [=](status_t* status) { cldnn_retain_network(_impl, status); });
-    }
-    void release() {
-        check_status<void>("retain topology failed", [=](status_t* status) { cldnn_release_network(_impl, status); });
-    }
-
-    std::vector<primitive_id> get_prim_ids(get_prim_ids_func_t func) const {
-        size_t size_ret = 0;
-        status_t err_invalid_arg = CLDNN_SUCCESS;
-        func(_impl, nullptr, 0, &size_ret, &err_invalid_arg);
-        assert(err_invalid_arg == CLDNN_INVALID_ARG);
-        assert(size_ret > 0);
-        std::vector<char> names_buf(size_ret);
-
-        check_status<void>("get network output ids failed", [&](status_t* status) {
-            func(_impl, names_buf.data(), names_buf.size(), &size_ret, status);
-        });
-        assert(names_buf.size() == size_ret);
-
-        std::vector<primitive_id> result;
-        for (auto buf_ptr = names_buf.data(); *buf_ptr != 0; buf_ptr += result.back().size() + 1) {
-            result.emplace_back(buf_ptr);
-        }
-        return result;
-    }
-};
-CLDNN_API_CLASS(network)
-/// @}
-/// @}
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/primitive.hpp b/inference-engine/thirdparty/clDNN/api/CPP/primitive.hpp
deleted file mode 100644 (file)
index 22d2b21..0000000
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-#include "cldnn_defs.h"
-#include "compounds.h"
-#include "layout.hpp"
-
-#include <algorithm>
-#include <string>
-#include <vector>
-#include <iostream>
-#include <memory>
-#include <utility>
-
-namespace cldnn {
-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @addtogroup cpp_topology Network Topology
-/// @{
-
-/// @brief Globally unique primitive type id.
-using primitive_type_id = cldnn_primitive_type_id;
-/// @brief C API compatible unique @p id of a primitive within a topology.
-using primitive_id_ref = cldnn_primitive_id;
-/// @brief Unique @p id of a primitive within a topology.
-using primitive_id = std::string;
-
-/// @brief Dynamic cast to specified primitive description type.
-template <class PType>
-typename PType::dto* as_dto(CLDNN_PRIMITIVE_DESC(primitive) * dto) {
-    if (dto->type != PType::type_id())
-        throw std::invalid_argument("type");
-    return reinterpret_cast<typename PType::dto*>(dto);
-}
-
-/// @brief Dynamic cast to specified primitive description type.
-template <class PType>
-const typename PType::dto* as_dto(const CLDNN_PRIMITIVE_DESC(primitive) * dto) {
-    if (dto->type != PType::type_id())
-        throw std::invalid_argument("type");
-    return reinterpret_cast<const typename PType::dto*>(dto);
-}
-
-struct primitive_info;
-
-/// @brief Base class of network primitive description.
-struct primitive {
-    /// @brief Initialize fields common for all primitives.
-    struct fixed_size_vector_ref {
-    private:
-        std::vector<primitive_id>& vref;
-
-    public:
-        explicit fixed_size_vector_ref(std::vector<primitive_id>& ref) : vref(ref) {}
-
-        auto size() const -> decltype(vref.size()) { return vref.size(); }
-        auto empty() const -> decltype(vref.empty()) { return vref.empty(); }
-        auto begin() const -> decltype(vref.begin()) { return vref.begin(); }
-        auto end() const -> decltype(vref.end()) { return vref.end(); }
-        auto cbegin() const -> decltype(vref.cbegin()) { return vref.cbegin(); }
-        auto cend() const -> decltype(vref.cend()) { return vref.cend(); }
-
-        primitive_id& operator[](size_t idx) { return vref[idx]; }
-        primitive_id const& operator[](size_t idx) const { return vref[idx]; }
-
-        primitive_id& at(size_t idx) { return vref.at(idx); }
-        primitive_id const& at(size_t idx) const { return vref.at(idx); }
-
-        primitive_id* data() { return vref.data(); }
-        const primitive_id* data() const { return vref.data(); }
-
-        const std::vector<primitive_id>& ref() const { return vref; }
-    };
-
-public:
-    primitive(const primitive_type_id& type,
-              const primitive_id& id,
-              const std::vector<primitive_id>& input,
-              const padding& output_padding = padding(),
-              const optional_data_type output_data_type = optional_data_type())
-        : type(type),
-          id(id),
-          input(_input.cpp_ids),
-          output_padding(output_padding),
-          output_data_type(output_data_type),
-          _input(input) {}
-
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{primitive}
-    explicit primitive(const CLDNN_PRIMITIVE_DESC(primitive) * dto)
-        : type(dto->type),
-          id(dto->id),
-          input(_input.cpp_ids),
-          output_padding(dto->output_padding),
-          output_data_type(dto->output_data_type.enabled
-                               ? optional_data_type{static_cast<data_types>(dto->output_data_type.data_type)}
-                               : optional_data_type{}),
-          _input(dto->input) {}
-
-    virtual ~primitive() = default;
-
-    /// @brief Requested output padding.
-    /// @brief Requested output padding.
-    /// @brief Returns pointer to a C API primitive descriptor casted to @CLDNN_PRIMITIVE_DESC{primitive}.
-    virtual const CLDNN_PRIMITIVE_DESC(primitive) * get_dto() const = 0;
-
-    /// @brief Returns references to all primitive ids on which this primitive depends - inputs, weights, biases, etc.
-    std::vector<std::reference_wrapper<primitive_id>> dependencies() {
-        std::vector<std::reference_wrapper<primitive_id>> result;
-        auto&& deps = get_dependencies();
-
-        result.reserve(_input.size() + deps.size());
-        for (auto& pid : _input.cpp_ids) result.push_back(std::ref(pid));
-        for (auto& pid : deps) result.push_back(std::ref(const_cast<primitive_id&>(pid.get())));
-
-        return result;
-    }
-
-    /// @brief Returns copy of all primitive ids on which this primitive depends - inputs, weights, biases, etc.
-    std::vector<primitive_id> dependencies() const {
-        auto result = input.ref();
-        auto deps = get_dependencies();
-        result.insert(result.end(), deps.begin(), deps.end());
-        return result;
-    }
-
-    virtual primitive_id type_string() const = 0;
-
-    /// @brief Implicit conversion to primiitive id.
-    operator primitive_id() const { return id; }
-
-    /// @brief Primitive's type id.
-    const primitive_type_id type;
-
-    /// @brief Primitive's id.
-    const primitive_id id;
-
-    /// @brief List of ids of input primitives.
-    fixed_size_vector_ref input;
-
-    /// @brief Requested output padding.
-    padding output_padding;
-
-    /// @brief Requested output precision, if any.
-    optional_data_type output_data_type;
-
-protected:
-    struct primitive_id_arr {
-        explicit primitive_id_arr(std::vector<primitive_id> const& vec) : cpp_ids(vec) {}
-
-        explicit primitive_id_arr(std::vector<primitive_id>&& vec) : cpp_ids(std::move(vec)) {}
-
-        // create from C API id array
-        explicit primitive_id_arr(cldnn_primitive_id_arr c_id_arr) {
-            cpp_ids.resize(c_id_arr.size);
-            for (size_t i = 0; i < c_id_arr.size; ++i) cpp_ids[i] = c_id_arr.data[i];
-        }
-
-        std::vector<primitive_id> cpp_ids;
-        mutable std::vector<cldnn_primitive_id> c_ids;
-        // get C API id array
-        auto ref() const -> decltype(cldnn_primitive_id_arr{c_ids.data(), c_ids.size()}) {
-            c_ids.resize(cpp_ids.size());
-            for (size_t i = 0; i < cpp_ids.size(); ++i) c_ids[i] = cpp_ids[i].c_str();
-
-            return cldnn_primitive_id_arr{c_ids.data(), c_ids.size()};
-        }
-
-        size_t size() const { return cpp_ids.size(); }
-    };
-
-    primitive_id_arr _input;
-
-    virtual std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const { return {}; }
-
-    friend primitive_info;
-};
-
-/// @brief base class for all primitives implementations.
-template <class PType, class DTO>
-class primitive_base : public primitive {
-public:
-    /// @brief Returns pointer to a C API primitive descriptor casted to @CLDNN_PRIMITIVE_DESC{primitive}.
-    const CLDNN_PRIMITIVE_DESC(primitive) * get_dto() const override {
-        // update common dto fields
-        _dto.id = id.c_str();
-        _dto.type = type;
-        _dto.input = _input.ref();
-        _dto.output_padding = output_padding;
-        _dto.output_data_type.enabled = static_cast<bool>(output_data_type);
-        _dto.output_data_type.data_type = static_cast<cldnn_data_type>(*output_data_type);
-
-        // call abstract method to update primitive-specific fields
-        update_dto(_dto);
-        return reinterpret_cast<const CLDNN_PRIMITIVE_DESC(primitive)*>(&_dto);
-    }
-
-protected:
-    explicit primitive_base(const primitive_id& id,
-                            const std::vector<primitive_id>& input,
-                            const padding& output_padding = padding(),
-                            optional_data_type output_data_type = optional_data_type())
-        : primitive(PType::type_id(), id, input, output_padding, output_data_type) {}
-
-    explicit primitive_base(const DTO* dto) : primitive(reinterpret_cast<const CLDNN_PRIMITIVE_DESC(primitive) *>(dto)) {
-        if (dto->type != PType::type_id())
-            throw std::invalid_argument("DTO type mismatch");
-    }
-
-private:
-    mutable DTO _dto;
-
-    virtual void update_dto(DTO& dto) const = 0;
-};
-
-struct primitive_info {
-    primitive_info(const primitive_id& original_id,
-                   const std::string& type_id,
-                   const std::vector<primitive_id>& dependencies,
-                   const std::vector<primitive_id>& users,
-                   const std::vector<primitive_id>& fused_ids,
-                   const layout& output_layout,
-                   const std::string& layout_str,
-                   const std::string& kernel_id,
-                   bool is_cpu,
-                   int exec_id)
-        : original_id(original_id),
-          type_id(type_id),
-          c_dependencies(dependencies),
-          c_users(users),
-          c_fused_ids(fused_ids),
-          output_layout(output_layout),
-          layout_str(layout_str),
-          kernel_id(kernel_id),
-          is_cpu(is_cpu),
-          exec_id(exec_id) {}
-
-    explicit primitive_info(const cldnn_primitive_info* c_info)
-        : original_id(c_info->original_id),
-          type_id(c_info->type_id),
-          c_dependencies(c_info->dependencies),
-          c_users(c_info->users),
-          c_fused_ids(c_info->fused_ids),
-          output_layout(c_info->output_layout),
-          layout_str(c_info->layout_str),
-          kernel_id(c_info->kernel_id),
-          is_cpu(c_info->is_cpu != 0),
-          exec_id(c_info->exec_id) {}
-
-    primitive_id original_id;
-    std::string type_id;
-    primitive::primitive_id_arr c_dependencies;
-    primitive::primitive_id_arr c_users;
-    primitive::primitive_id_arr c_fused_ids;
-    layout output_layout;
-    std::string layout_str;
-    std::string kernel_id;
-    bool is_cpu;
-    int exec_id;
-
-    const cldnn_primitive_info* get_dto() const {
-        dto.original_id = original_id.c_str();
-        dto.type_id = type_id.c_str();
-        dto.dependencies = c_dependencies.ref();
-        dto.users = c_users.ref();
-        dto.fused_ids = c_fused_ids.ref();
-        dto.output_layout = output_layout;
-        dto.layout_str = layout_str.c_str();
-        dto.kernel_id = kernel_id.c_str();
-        dto.is_cpu = is_cpu;
-        dto.exec_id = exec_id;
-
-        return &dto;
-    }
-
-    mutable cldnn_primitive_info dto;
-};
-
-#define CLDNN_DEFINE_TYPE_ID(PType)                                                                               \
-    static primitive_type_id type_id() {                                                                          \
-        return check_status<primitive_type_id>(#PType " type id failed",                                          \
-                                               [](status_t* status) { return cldnn_##PType##_type_id(status); }); \
-    }
-
-#define CLDNN_DEFINE_TYPE_STRING(PType)                 \
-    primitive_id type_string() const override {         \
-        static constexpr const char* type_str = #PType; \
-        return std::string(type_str);                   \
-    }
-
-#define CLDNN_DECLARE_PRIMITIVE(PType)       \
-    typedef CLDNN_PRIMITIVE_DESC(PType) dto; \
-    CLDNN_DEFINE_TYPE_ID(PType)              \
-    CLDNN_DEFINE_TYPE_STRING(PType)
-
-/// @}
-/// @}
-}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/activation.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -28,6 +27,60 @@ namespace cldnn {
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
+/// @brief activation functions
+enum class activation_func {
+    none,                 // val
+    logistic,             // 1/(1 + exp(-val))
+    hyperbolic_tan,       // tanh(val)
+    relu,                 // max(0, val)
+    relu_negative_slope,  // max(0, val) + a * min(0, val)    (a is additional param)
+    clamp,                // max(a, min(b, val)               (a,b are additional param)
+    softrelu,             // log(1 + exp(val))
+    abs,                  // abs(val)
+    linear,               // a*val + b                        (a,b are additional params)
+    square,               // val*val
+    sqrt,                 // sqrt(val)
+    elu,                  // max(0, val) + a * (exp(min(0, val) - 1) (a is additional param)
+    sin,                  // sin(val)
+    asin,                 // asin(val)
+    sinh,                 // sinh(val)
+    asinh,                // asinh(val)
+    cos,                  // cos(val)
+    acos,                 // acos(val)
+    cosh,                 // cosh(val)
+    acosh,                // acosh(val)
+    log,                  // log(val)
+    log2,                 // log2(val)
+    exp,                  // exp(val)
+    tan,                  // tan(val)
+    atan,                 // atan(val)
+    atanh,                // atanh(val)
+    floor,                // floor(val)
+    ceil,                 // ceil(val)
+    negative,             // -val
+    negation,             // !val
+    pow,                  // pow(val, a)
+    reciprocal,           // (1/val)
+    erf,                  // Gauss error function
+    hard_sigmoid,         // max(0, min(1, a * val + b))       (a,b are additional params)
+    selu,                 // for val <= 0: b * (a * e^val - a); for val > 0: b * val (a,b are additional params)
+    sign,                 // val > 0: 1; val < 0: -1; val == 0: 0
+    softplus,             // ln(exp(val) + 1)
+    softsign              // (val/(1+|val|))
+};
+
+/// @brief activation gradient functions
+enum class activation_grad_func {
+    none,                 // val
+    relu,                 // val * (input > 0)
+    relu_negative_slope,  // val * ((input > 0) + a * (input <= 0)    (a is additional param)
+};
+
+/// @brief activation additional params
+struct activation_additional_params {
+    float a, b;
+};
+
 /// @brief Activation using rectified linear unit or parameterized rectified linear unit.
 /// @details Can get one negative slope or negative slope per channel.
 /// @par Algorithm:
@@ -36,7 +89,7 @@ namespace cldnn {
 ///   @li out(i,x,y) : value at x, y from i-th feature map after activation.
 ///   @li in(i,x,y) : value at x, y from i-th feature map before activation.
 ///   @li slope(i) : the slope value of the i-th feature map (can be shared across channels or one slope per channel).
-struct activation : public primitive_base<activation, CLDNN_PRIMITIVE_DESC(activation)> {
+struct activation : public primitive_base<activation> {
     CLDNN_DECLARE_PRIMITIVE(activation)
 
     /// @brief Constructs Relu primitive.
@@ -46,11 +99,11 @@ struct activation : public primitive_base<activation, CLDNN_PRIMITIVE_DESC(activ
     /// @param additional_params additional params (slope/max_val/linear a,b).
     activation(const primitive_id& id,
                const primitive_id& input,
-               cldnn_activation_func activation_func,
-               cldnn_activation_additional_params additional_params = {0.f, 0.f},
+               activation_func activation_function,
+               activation_additional_params additional_params = {0.f, 0.f},
                const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          activation_func(activation_func),
+          activation_function(activation_function),
           additional_params(additional_params),
           additional_params_input("") {}
 
@@ -63,25 +116,18 @@ struct activation : public primitive_base<activation, CLDNN_PRIMITIVE_DESC(activ
     activation(const primitive_id& id,
                const primitive_id& input,
                const primitive_id& additional_params_input,
-               cldnn_activation_func activation_func,
+               activation_func activation_function,
                const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          activation_func(activation_func),
+          activation_function(activation_function),
           additional_params({0, 0}),
           additional_params_input(additional_params_input) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{activation}
-    activation(const dto* dto)
-        : primitive_base(dto),
-          activation_func(dto->activation_func),
-          additional_params(dto->additional_params),
-          additional_params_input(dto->additional_params_input) {}
-
     /// @brief activation function.
-    cldnn_activation_func activation_func;
+    activation_func activation_function;
 
     /// @brief activation additional params.
-    cldnn_activation_additional_params additional_params;
+    activation_additional_params additional_params;
 
     /// @brief PRelu activation slope input primitive id.
     /// Input x dimension should be equal to input feature size (one slope per channel).
@@ -94,14 +140,8 @@ protected:
             return {};
         return {additional_params_input};
     }
-
-    void update_dto(dto& dto) const override {
-        dto.activation_func = activation_func;
-        dto.additional_params = additional_params;
-        dto.additional_params_input = additional_params_input.c_str();
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,8 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/activation_grad.h"
 #include "primitive.hpp"
+#include "activation.hpp"
 #include <vector>
 
 namespace cldnn {
@@ -35,7 +35,7 @@ namespace cldnn {
 ///   @li out(i,x,y) : value at x, y from i-th feature map after activation.
 ///   @li in(i,x,y) : value at x, y from i-th feature map before activation.
 ///   @li slope(i) : the slope value of the i-th feature map (can be shared across channels or one slope per channel).
-struct activation_grad : public primitive_base<activation_grad, CLDNN_PRIMITIVE_DESC(activation_grad)> {
+struct activation_grad : public primitive_base<activation_grad> {
     CLDNN_DECLARE_PRIMITIVE(activation_grad)
 
     /// @brief Constructs Relu grad primitive.
@@ -47,11 +47,11 @@ struct activation_grad : public primitive_base<activation_grad, CLDNN_PRIMITIVE_
     activation_grad(const primitive_id& id,
                     const primitive_id& input_grad,
                     const primitive_id& input,
-                    cldnn_activation_grad_func activation_grad_func,
-                    cldnn_activation_additional_params additional_params = {0.f, 0.f},
+                    activation_grad_func activation_grad_function,
+                    activation_additional_params additional_params = {0.f, 0.f},
                     const padding& output_padding = padding())
         : primitive_base(id, {input_grad, input}, output_padding),
-          activation_grad_func(activation_grad_func),
+          activation_grad_function(activation_grad_function),
           additional_params(additional_params),
           additional_params_input("") {}
 
@@ -65,25 +65,18 @@ struct activation_grad : public primitive_base<activation_grad, CLDNN_PRIMITIVE_
                     const primitive_id& input_grad,
                     const primitive_id& input,
                     const primitive_id& additional_params_input,
-                    cldnn_activation_grad_func activation_grad_func,
+                    activation_grad_func activation_grad_function,
                     const padding& output_padding = padding())
         : primitive_base(id, {input_grad, input}, output_padding),
-          activation_grad_func(activation_grad_func),
+          activation_grad_function(activation_grad_function),
           additional_params({0, 0}),
           additional_params_input(additional_params_input) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{activation_grad}
-    activation_grad(const dto* dto)
-        : primitive_base(dto),
-          activation_grad_func(dto->activation_grad_func),
-          additional_params(dto->additional_params),
-          additional_params_input(dto->additional_params_input) {}
-
     /// @brief activation_grad function.
-    cldnn_activation_grad_func activation_grad_func;
+    activation_grad_func activation_grad_function;
 
     /// @brief activation_grad additional params.
-    cldnn_activation_additional_params additional_params;
+    activation_additional_params additional_params;
 
     /// @brief PRelu activation slope input primitive id.
     /// Input x dimension should be equal to input feature size (one slope per channel).
@@ -96,14 +89,8 @@ protected:
             return {};
         return {additional_params_input};
     }
-
-    void update_dto(dto& dto) const override {
-        dto.activation_grad_func = activation_grad_func;
-        dto.additional_params = additional_params;
-        dto.additional_params_input = additional_params_input.c_str();
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/apply_adam.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -40,7 +39,7 @@ namespace cldnn {
 /// @n float v[t] = beta2 * v[t-1] + (1 - beta2) * grad[t] * grad[t];
 /// @n float result = result - lr[t] * m[t] / (sqrt(v[t]) + epsilon);
 
-struct apply_adam : public primitive_base<apply_adam, CLDNN_PRIMITIVE_DESC(apply_adam)> {
+struct apply_adam : public primitive_base<apply_adam> {
     CLDNN_DECLARE_PRIMITIVE(apply_adam)
 
     /// @brief Constructs apply Adam primitive.
@@ -78,19 +77,6 @@ struct apply_adam : public primitive_base<apply_adam, CLDNN_PRIMITIVE_DESC(apply
           epsilon(epsilon),
           dependency_id(dependency_id) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{apply_adam}
-    apply_adam(const dto* dto)
-        : primitive_base(dto),
-          m(dto->m),
-          v(dto->v),
-          beta1_power(dto->beta1_power),
-          beta2_power(dto->beta2_power),
-          lr(dto->lr),
-          beta1(dto->beta1),
-          beta2(dto->beta2),
-          epsilon(dto->epsilon),
-          dependency_id(dto->dependency_id) {}
-
     /// @brief Primitive id containing m data.
     primitive_id m;
     /// @brief Primitive id containing v data.
@@ -118,18 +104,6 @@ protected:
             ret.push_back(dependency_id);
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.m = m.c_str();
-        dto.v = v.c_str();
-        dto.beta1_power = beta1_power.c_str();
-        dto.beta2_power = beta2_power.c_str();
-        dto.lr = lr;
-        dto.beta1 = beta1;
-        dto.beta2 = beta2;
-        dto.epsilon = epsilon;
-        dto.dependency_id = dependency_id.c_str();
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/arg_max_min.h"
 #include "primitive.hpp"
 #include <algorithm>
 #include <vector>
@@ -34,7 +33,7 @@ namespace cldnn {
 /// We use f32, as bigger indices could not fit in smaller data types.
 /// If you want to use output as indices outside of network (inside just use lookup table primitive),
 /// you will need to firstly cast it to int (look into tests for example).
-struct arg_max_min : public primitive_base<arg_max_min, CLDNN_PRIMITIVE_DESC(arg_max_min)> {
+struct arg_max_min : public primitive_base<arg_max_min> {
     CLDNN_DECLARE_PRIMITIVE(arg_max_min)
 
     /// @brief Enum type to specify axis to return values from.
@@ -72,16 +71,6 @@ struct arg_max_min : public primitive_base<arg_max_min, CLDNN_PRIMITIVE_DESC(arg
           with_axis(axis == axis_name::xyf ? false : true),
           values_first(values_first) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{arg_max_min}
-    arg_max_min(const dto* dto)
-        : primitive_base(dto),
-          top_k(dto->top_k),
-          output_type(static_cast<out_type>(dto->output_type)),
-          axis(static_cast<axis_name>(dto->axis)),
-          sort(static_cast<sort_type>(dto->sort)),
-          with_axis(dto->with_axis != 0),
-          values_first(dto->values_first != 0) {}
-
     /// @brief Number of indices to output.
     uint32_t top_k;
     /// @brief Type of output - max or mix.
@@ -94,18 +83,8 @@ struct arg_max_min : public primitive_base<arg_max_min, CLDNN_PRIMITIVE_DESC(arg
     bool with_axis;
     /// @brief Sets output order: if True than first output contains values and second (optional) - indices.
     bool values_first;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.top_k = top_k;
-        dto.output_type = static_cast<cldnn_arg_max_min_out>(output_type);
-        dto.with_axis = with_axis;
-        dto.axis = static_cast<cldnn_arg_max_min_axis>(axis);
-        dto.sort = static_cast<cldnn_arg_max_min_axis>(sort);
-        dto.values_first = values_first;
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/average_unpooling.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -30,7 +29,7 @@ namespace cldnn {
 /// @brief Performs "average_unpooling" operation.
 /// @details Reverse operation of average pooling.
 /// Each element in every pooling window is filled with output / window size value. In case of window overlap the elements are added.
-struct average_unpooling : public primitive_base<average_unpooling, CLDNN_PRIMITIVE_DESC(average_unpooling)> {
+struct average_unpooling : public primitive_base<average_unpooling> {
     CLDNN_DECLARE_PRIMITIVE(average_unpooling)
 
     /// @brief Constructs average_unpooling primitive.
@@ -48,25 +47,14 @@ struct average_unpooling : public primitive_base<average_unpooling, CLDNN_PRIMIT
         const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), stride(stride), size(size), output_size(output_size) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{average_unpooling}
-    average_unpooling(const dto* dto)
-        : primitive_base(dto), stride(dto->stride), size(dto->size), output_size(dto->output_size) {}
-
     /// @brief Defines shift in output buffer.
     tensor stride;
     /// @brief Pooling kernel size.
     tensor size;
     /// @brief Output size of this primitive.
     tensor output_size;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.stride = stride;
-        dto.size = size;
-        dto.output_size = output_size;
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/batch_norm.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -37,7 +36,7 @@ namespace cldnn {
 /// @n global stats can be computed as:
 /// @n out[i] = ( (in[i] - mean[b]) / sqrt(variance[b] + epsilon) ) * scale[b] + shift[b]
 
-struct batch_norm : public primitive_base<batch_norm, CLDNN_PRIMITIVE_DESC(batch_norm)> {
+struct batch_norm : public primitive_base<batch_norm> {
     CLDNN_DECLARE_PRIMITIVE(batch_norm)
 
     /// @brief Constructs batch normalization primitive.
@@ -146,16 +145,6 @@ struct batch_norm : public primitive_base<batch_norm, CLDNN_PRIMITIVE_DESC(batch
           inv_variance(inv_variance),
           epsilon(epsilon) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{batch_norm}
-    batch_norm(const dto* dto)
-        : primitive_base(dto),
-          mean(dto->mean),
-          variance(dto->variance),
-          scale(dto->scale),
-          shift(dto->shift),
-          inv_variance(dto->inv_variance),
-          epsilon(dto->epsilon) {}
-
     /// @brief Primitive id containing mean data.
     primitive_id mean;
     /// @brief Primitive id containing variance.
@@ -188,15 +177,6 @@ protected:
 
         return deps;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.mean = mean.c_str();
-        dto.variance = variance.c_str();
-        dto.inv_variance = inv_variance.c_str();
-        dto.scale = scale.c_str();
-        dto.shift = shift.c_str();
-        dto.epsilon = epsilon;
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/batch_norm_grad.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -31,7 +30,7 @@ namespace cldnn {
 /// @brief Performs backward batch normalization layer.
 /// @details Calculates mean gradient and gradient * input for every feature in data,
 /// then output is calculated as inv_variance * (input_grad - mean_grad_input * input - mean_grad)
-struct batch_norm_grad : public primitive_base<batch_norm_grad, CLDNN_PRIMITIVE_DESC(batch_norm_grad)> {
+struct batch_norm_grad : public primitive_base<batch_norm_grad> {
     CLDNN_DECLARE_PRIMITIVE(batch_norm_grad)
 
     /// @brief Constructs batch normalization backward layer.
@@ -48,11 +47,6 @@ struct batch_norm_grad : public primitive_base<batch_norm_grad, CLDNN_PRIMITIVE_
         : primitive_base(id, {input_grad, input}, output_padding), inv_variance(inv_variance) {
     }
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{batch_norm_grad}
-    batch_norm_grad(const dto* dto)
-        : primitive_base(dto), inv_variance(dto->inv_variance) {
-    }
-
     /// @brief Primitive id containing inverted variance from forward pass.
     primitive_id inv_variance;
 
@@ -60,12 +54,8 @@ protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         return {inv_variance};
     }
-
-    void update_dto(dto& dto) const override {
-        dto.inv_variance = inv_variance.c_str();
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/binary_convolution.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -29,7 +28,7 @@ namespace cldnn {
 /// @{
 
 /// @brief Performs forward spatial binary_convolution with weight sharing.
-struct binary_convolution : public primitive_base<binary_convolution, CLDNN_PRIMITIVE_DESC(binary_convolution)> {
+struct binary_convolution : public primitive_base<binary_convolution> {
     CLDNN_DECLARE_PRIMITIVE(binary_convolution)
 
     /// @brief Constructs binary_convolution primitive.
@@ -59,29 +58,14 @@ struct binary_convolution : public primitive_base<binary_convolution, CLDNN_PRIM
                        data_types calc_precision = data_types::f32,
                        const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding, optional_data_type {calc_precision}),
-          weights(_weights.cpp_ids),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
           output_size(output_size),
           groups(groups),
           pad_value(pad_value),
-          _weights(weights) {}
+          weights(weights) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{binary_convolution}
-    binary_convolution(const dto* dto)
-        : primitive_base(dto),
-          weights(_weights.cpp_ids),
-          input_offset(dto->input_offset),
-          stride(dto->stride),
-          dilation(dto->dilation),
-          output_size(dto->output_size),
-          groups(dto->groups),
-          pad_value(dto->pad_value),
-          _weights(dto->weights) {}
-
-    /// @brief List of primitive ids containing weights data.
-    fixed_size_vector_ref weights;
     /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the binary_convolution window should start calculations.
     tensor input_offset;
     /// @brief Defines shift in input buffer between adjacent calculations of output values.
@@ -96,29 +80,17 @@ struct binary_convolution : public primitive_base<binary_convolution, CLDNN_PRIM
     int groups;
     /// @brief Logical value of padding. Can be one of 3 values: 1 - pad bits equal to 1; -1 -> pad bits equal to 0; 0 -> pad is not counted
     float pad_value;
+    /// @brief List of primitive ids containing weights data.
+    const primitive_id_arr weights;
 
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
 
-protected:
-    primitive_id_arr _weights;
-
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         ret.reserve(weights.size());
-        for (auto& w : weights) ret.push_back(w);
+        for (auto& w : weights) ret.push_back(std::ref(w));
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = _weights.ref();
-        dto.input_offset = input_offset;
-        dto.stride = stride;
-        dto.split = split();
-        dto.dilation = dilation;
-        dto.output_size = output_size;
-        dto.groups = groups;
-        dto.pad_value = pad_value;
-    }
 };
 /// @}
 /// @}
@@ -14,8 +14,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-
-#include "../C/border.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -29,20 +27,20 @@ namespace cldnn {
 /// @brief Type of border that will be added to the input by border layer / primitive.
 enum class border_type : std::int32_t {
     /// @brief All points in the border are set to constant value.
-    constant = cldnn_border_constant,
-    zero = cldnn_border_zero,
+    constant,
+    zero,
     /// @brief Border is constructed as an mirror of image (edge is also mirrored).
     /// @details Size of border in any dimension cannot be larger than size of
     ///          input in the same dimension.
-    mirror = cldnn_border_mirror,
+    mirror,
     /// @brief Border is constructed as an mirror of image (edge is NOT mirrored).
     /// @details Size of border in any dimension cannot be larger than size of
     ///          input in the same dimension decreased by @c 1.
-    mirror_101 = cldnn_border_mirror_101,
+    mirror_101,
     /// @brief Border is constructed as an replication of edge.
     /// @details Size of border in any dimension cannot be larger than size of
     ///          input in the same dimension.
-    edge = cldnn_border_edge
+    edge
 };
 
 /// @brief Adds border around input.
@@ -58,7 +56,7 @@ enum class border_type : std::int32_t {
 /// @n - For @c border_type equal to @c cldnn_border_mirror_101, @c left_top_sizes and @c right_bottom_sizes
 ///      must be lower than size of input on corresponding dimension (for all dimensions)
 /// @n Breaking any of this conditions will cause exeption throw.
-struct border : public primitive_base<border, CLDNN_PRIMITIVE_DESC(border)> {
+struct border : public primitive_base<border> {
     CLDNN_DECLARE_PRIMITIVE(border)
 
     /// @brief Constructs border primitive / layer.
@@ -104,14 +102,6 @@ struct border : public primitive_base<border, CLDNN_PRIMITIVE_DESC(border)> {
            const padding& output_padding = padding())
         : border(id, input, x_y_sizes, x_y_sizes, type, 0.0f, output_padding) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{border}
-    border(const dto* dto)
-        : primitive_base(dto),
-          left_top_sizes(dto->left_top_sizes),
-          right_bottom_sizes(dto->right_bottom_sizes),
-          type(static_cast<border_type>(dto->border_type)),
-          border_value(dto->border_value) {}
-
     /// @brief Sizes of border that needs to be added from left (in X dimension) and from top (in Y dimension).
     tensor left_top_sizes;
     /// @brief Sizes of border that needs to be added from right (in X dimension) and from bottom (in Y dimension).
@@ -120,14 +110,6 @@ struct border : public primitive_base<border, CLDNN_PRIMITIVE_DESC(border)> {
     border_type type;
     /// @brief Border value that is used in constant mode.
     float border_value;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.left_top_sizes = left_top_sizes;
-        dto.right_bottom_sizes = right_bottom_sizes;
-        dto.border_type = static_cast<cldnn_border_type>(type);
-        dto.border_value = border_value;
-    }
 };
 /// @}
 /// @}
@@ -15,7 +15,6 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "../C/broadcast.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -67,7 +66,7 @@ namespace cldnn {
 /// @n - @p output_shape must be greater (dividable) than or equal to reinterpreted
 ///      input on all dimensions.
 /// @n Breaking any of these conditions will raise an exception.
-struct broadcast : public primitive_base<broadcast, CLDNN_PRIMITIVE_DESC(broadcast)> {
+struct broadcast : public primitive_base<broadcast> {
     CLDNN_DECLARE_PRIMITIVE(broadcast)
 
     /// @brief Constructs broadcast primitive / layer.
@@ -91,25 +90,11 @@ struct broadcast : public primitive_base<broadcast, CLDNN_PRIMITIVE_DESC(broadca
           broadcast_sizes(broadcast_sizes),
           broadcast_axes(broadcast_axes) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{broadcast}
-    broadcast(const dto* dto)
-        : primitive_base(dto),
-          broadcast_sizes(dto->broadcast_sizes),
-          broadcast_axes(uint16_t_arr_to_vector(dto->broadcast_axes))
-
-    {}
-
     /// @brief Expected sizes of output from broadcast primitive.
     tensor broadcast_sizes;
     /// @brief Array of axes positions from output shape (0-based, from left to right)
     ///        along which broadcast should happen.
     std::vector<uint16_t> broadcast_axes;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.broadcast_sizes = broadcast_sizes;
-        dto.broadcast_axes = uint16_t_vector_to_arr(broadcast_axes);
-    }
 };
 /// @}
 /// @}
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <functional>
+#include <stdint.h>
+#include <stddef.h>
+#include <memory>
 #include <string>
 #include <type_traits>
-#include <utility>
-#include <vector>
-#include <stdexcept>
-
-#include "../C/cldnn.h"
 
 namespace cldnn {
+
+/// @addtogroup cpp_api C++ API
+/// @{
+
+/// @defgroup cpp_version Version Information
+/// @{
+
+/// @brief Represents version information of API.
+struct version_t {
+    int32_t major;     ///< Major version component (major version of clDNN API interface).
+    int32_t minor;     ///< Minor version component (minor version of API interface - correlated with IE API version).
+    int32_t build;     ///< Build version component (version/revision of official Open Source drop of clDNN library).
+    int32_t revision;  ///< Revision version component (incremental identifier of current build/compilation).
+};
+
+/// @brief Get information about version of clDNN.
+version_t get_version();
+
+/// @}
+
+float half_to_float(uint16_t value);
+uint16_t float_to_half(float value);
+
 // There is no portable half precision floating point support.
 // Using wrapped integral type with the same size and alignment restrictions.
 class half_impl {
@@ -150,99 +170,30 @@ public:
 
     operator uint16_t() const { return _data; }
     operator float() const {
-        cldnn_status status = CLDNN_SUCCESS;
-        auto value = cldnn_half_to_float(_data, &status);
-        if (status != CLDNN_SUCCESS)
-            throw std::runtime_error("Conversion from half failed");
-        return value;
-    }
-    explicit half_impl(float value) {
-        cldnn_status status = CLDNN_SUCCESS;
-        _data = cldnn_float_to_half(value, &status);
-        if (status != CLDNN_SUCCESS)
-            throw std::runtime_error("Conversion to half failed");
+        return half_to_float(_data);
     }
 
+    explicit half_impl(float value)
+        : _data(float_to_half(value))
+    {}
+
 private:
     uint16_t _data;
 };
-}  // namespace cldnn
+
 // Use complete implementation if necessary.
 #if defined HALF_HALF_HPP
-typedef half half_t;
+using half_t = half;
 #else
-typedef cldnn::half_impl half_t;
+using half_t = half_impl;
 #endif
 
-namespace cldnn {
-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @defgroup cpp_error Error Handling
-/// @{
-
-using status_t = ::cldnn_status;
-
-/// @brief clDNN specific exception type.
-class error : public std::runtime_error {
-public:
-    explicit error(const std::string& _Message, status_t status = CLDNN_ERROR)
-        : runtime_error(_Message), _status(status) {
-    }
-
-    explicit error(const char* _Message, status_t status = CLDNN_ERROR)
-        : runtime_error(_Message), _status(status) {
-    }
-
-    /// @brief Returns clDNN status code.
-    const status_t& status() const { return _status; }
-
-private:
-    status_t _status;
-};
-
-#define CLDNN_THROW(msg, status) throw cldnn::error(msg, status);
-
-template <class T>
-T check_status(std::string err_msg, std::function<T(status_t*)> func) {
-    status_t status = CLDNN_SUCCESS;
-    auto result = func(&status);
-    if (status != CLDNN_SUCCESS)
-        CLDNN_THROW(err_msg.append(": ").append(cldnn_get_last_error_message()), status);
-    return result;
-}
-
-template <>
-inline void check_status<void>(std::string err_msg, std::function<void(status_t*)> func) {
-    status_t status = CLDNN_SUCCESS;
-    func(&status);
-    if (status != CLDNN_SUCCESS)
-        CLDNN_THROW(err_msg.append(": ").append(cldnn_get_last_error_message()), status);
-}
-
-/// @}
-
-/// @defgroup cpp_version Version Information
-/// @{
-
-using version_t = ::cldnn_version;
-
-/// @brief Get information about version of clDNN.
-inline version_t get_version() {
-    return check_status<version_t>("get_version: fetching version information failed",
-                                   [](status_t* status) {
-                                       return ::cldnn_get_version(status);
-                                   });
-}
-
-/// @}
-
 /// @cond CPP_HELPERS
 
 /// @defgroup cpp_helpers Helpers
 /// @{
 
-#define CLDNN_API_CLASS(the_class) static_assert(std::is_standard_layout<the_class>::value, #the_class " has to be 'standart layout' class");
+#define CLDNN_API_CLASS(the_class) static_assert(std::is_standard_layout<the_class>::value, #the_class " has to be 'standard layout' class");
 
 template <typename T>
 typename std::enable_if<std::is_integral<T>::value, T>::type align_to(T size, size_t align) {
@@ -275,8 +226,8 @@ typename std::enable_if<std::is_integral<T>::value, bool>::type is_aligned_to(T
 ///           division, except each operand is converted to unsigned type if necessary.
 template <typename T1, typename T2>
 constexpr auto ceil_div(T1 val, T2 divider)
-    -> typename std::enable_if<std::is_integral<T1>::value && std::is_integral<T2>::value,
-                               decltype(std::declval<typename std::make_unsigned<T1>::type>() / std::declval<typename std::make_unsigned<T2>::type>())>::type {
+-> typename std::enable_if<std::is_integral<T1>::value && std::is_integral<T2>::value,
+    decltype(std::declval<typename std::make_unsigned<T1>::type>() / std::declval<typename std::make_unsigned<T2>::type>())>::type {
     typedef typename std::make_unsigned<T1>::type UT1;
     typedef typename std::make_unsigned<T2>::type UT2;
     typedef decltype(std::declval<UT1>() / std::declval<UT2>()) RetT;
@@ -299,8 +250,8 @@ constexpr auto ceil_div(T1 val, T2 divider)
 ///           division, except each operand is converted to unsigned type if necessary.
 template <typename T1, typename T2>
 constexpr auto round_up_to(T1 val, T2 rounding)
-    -> typename std::enable_if<std::is_integral<T1>::value && std::is_integral<T2>::value,
-                               decltype(std::declval<typename std::make_unsigned<T1>::type>() / std::declval<typename std::make_unsigned<T2>::type>())>::type {
+-> typename std::enable_if<std::is_integral<T1>::value && std::is_integral<T2>::value,
+    decltype(std::declval<typename std::make_unsigned<T1>::type>() / std::declval<typename std::make_unsigned<T2>::type>())>::type {
     typedef typename std::make_unsigned<T1>::type UT1;
     typedef typename std::make_unsigned<T2>::type UT2;
     typedef decltype(std::declval<UT1>() / std::declval<UT2>()) RetT;
@@ -308,81 +259,7 @@ constexpr auto round_up_to(T1 val, T2 rounding)
     return static_cast<RetT>(ceil_div(val, rounding) * static_cast<UT2>(rounding));
 }
 
-///
-/// \brief Converts C API float array to std::vector<float>
-///
-inline std::vector<float> float_arr_to_vector(const cldnn_float_arr& arr) {
-    std::vector<float> result(arr.size);
-    for (size_t i = 0; i < arr.size; i++) {
-        result[i] = arr.data[i];
-    }
-    return result;
-}
-
-///
-/// \brief Converts C API float array to std::vector<uint16_t>
-///
-inline std::vector<uint16_t> uint16_t_arr_to_vector(const cldnn_uint16_t_arr& arr) {
-    std::vector<uint16_t> result(arr.size);
-    for (size_t i = 0; i < arr.size; i++) {
-        result[i] = arr.data[i];
-    }
-    return result;
-}
-
-///
-/// \brief Converts C API uint8_t array to std::vector<uint8_t>
-///
-inline std::vector<uint8_t> uint8_t_arr_to_vector(const cldnn_uint8_t_arr& arr) {
-    std::vector<uint8_t> result(arr.size);
-    for (size_t i = 0; i < arr.size; i++) {
-        result[i] = arr.data[i];
-    }
-    return result;
-}
-
-///
-/// \brief Converts std::vector<float> to C API float_array
-///
-inline cldnn_float_arr float_vector_to_arr(const std::vector<float>& stor) {
-    return {stor.data(), stor.size()};
-}
-
-///
-/// \brief Converts std::vector<uint16_t> to C API float_array
-///
-inline cldnn_uint16_t_arr uint16_t_vector_to_arr(const std::vector<uint16_t>& stor) {
-    return {stor.data(), stor.size()};
-}
-
-///
-/// \brief Converts std::vector<uint8_t> to C API uint8_t array
-///
-inline cldnn_uint8_t_arr uint8_t_vector_to_arr(const std::vector<uint8_t>& stor) {
-    return {stor.data(), stor.size()};
-}
-
-///
-/// \brief Converts std::vector<tensor> to C API tensor_array
-///
-inline cldnn_tensor_arr tensor_vector_to_arr(const std::vector<cldnn_tensor>& stor) {
-    return cldnn_tensor_arr{stor.data(), stor.size()};
-}
-
-///
-/// \brief Converts C API tensor_array to std::vector of C API tensor
-///
-inline std::vector<cldnn_tensor> tensor_arr_to_cldnn_vector(const cldnn_tensor_arr& arr) {
-    std::vector<cldnn_tensor> result(arr.size);
-    for (size_t i = 0; i < arr.size; i++)
-        result[i] = arr.data[i];
-
-    return result;
-}
-
 /// @}
-
 /// @endcond
-
 /// @}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/compounds.h b/inference-engine/thirdparty/clDNN/api/compounds.h
new file mode 100644 (file)
index 0000000..ba6966f
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+#pragma once
+
+#include <vector>
+#include <cassert>
+#include <iterator>
+#include <cstring>
+#include <string>
+#include <stdexcept>
+
+#include "meta_utils.hpp"
+
+namespace cldnn {
+
+/// @addtogroup cpp_api C++ API
+/// @{
+
+/// @cond CPP_HELPERS
+
+/// @defgroup cpp_helpers Helpers
+/// @{
+
+template <typename T>
+class mutable_array_ref {
+public:
+    typedef size_t size_type;
+
+    mutable_array_ref() : _data(nullptr), _size(0) {}
+    explicit mutable_array_ref(T& val) : _data(&val), _size(1) {}
+    mutable_array_ref(T* data, size_t size) : _data(data), _size(size) {}
+
+    template <size_t N>
+    explicit mutable_array_ref(T (&arr)[N]) : _data(arr), _size(N) {}
+
+    mutable_array_ref(const mutable_array_ref& other) : _data(other._data), _size(other._size) {}
+
+    mutable_array_ref& operator=(const mutable_array_ref& other) {
+        if (this == &other)
+            return *this;
+        _data = other._data;
+        _size = other._size;
+        return *this;
+    }
+
+    T* data() const { return _data; }
+    size_t size() const { return _size; }
+    bool empty() const { return _size == 0; }
+
+#if defined(_SECURE_SCL) && (_SECURE_SCL > 0)
+    typedef stdext::checked_array_iterator<T*> iterator;
+    typedef stdext::checked_array_iterator<const T*> const_iterator;
+    iterator begin() const { return stdext::make_checked_array_iterator(_data, _size); }
+    iterator end() const { return stdext::make_checked_array_iterator(_data, _size, _size); }
+    const_iterator cbegin() const { return stdext::make_checked_array_iterator(_data, _size); }
+    const_iterator cend() const { return stdext::make_checked_array_iterator(_data, _size, _size); }
+#else
+    typedef T* iterator;
+    typedef T* const_iterator;
+    iterator begin() const { return _data; }
+    iterator end() const { return _data + _size; }
+    const_iterator cbegin() const { return _data; }
+    const_iterator cend() const { return _data + _size; }
+#endif
+
+    T& operator[](size_t idx) const {
+        assert(idx < _size);
+        return _data[idx];
+    }
+
+    T& at(size_t idx) const {
+        if (idx >= _size) throw std::out_of_range("idx");
+        return _data[idx];
+    }
+
+    std::vector<T> vector() const { return std::vector<T>(_data, _data + _size); }
+
+private:
+    T* _data;
+    size_t _size;
+};
+
+/// @}
+
+/// @endcond
+
+/// @}
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/concatenation.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -50,16 +49,16 @@ namespace cldnn {
 ///   @li output : data structure holding output data for this primitive
 ///   @li i.features : number of features in currently processed input
 ///   @li outputIdx : index of destination feature
-struct concatenation : public primitive_base<concatenation, CLDNN_PRIMITIVE_DESC(concatenation)> {
+struct concatenation : public primitive_base<concatenation> {
     CLDNN_DECLARE_PRIMITIVE(concatenation)
 
     enum concatenation_axis {
-        along_b = cldnn_concatenation_along_b,
-        along_f = cldnn_concatenation_along_f,
-        along_x = cldnn_concatenation_along_x,
-        along_y = cldnn_concatenation_along_y,
-        along_z = cldnn_concatenation_along_z,
-        along_w = cldnn_concatenation_along_w
+        along_b,
+        along_f,
+        along_x,
+        along_y,
+        along_z,
+        along_w
     };
 
     /// @li Constructs concatenation primitive.
@@ -73,17 +72,8 @@ struct concatenation : public primitive_base<concatenation, CLDNN_PRIMITIVE_DESC
         const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), axis(axis) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC(depth_concatenate)
-    concatenation(const dto* dto)
-        : primitive_base(dto), axis(static_cast<concatenation_axis>(dto->axis)) {}
-
     /// @brief Dimension along which concatenation should take place
     concatenation_axis axis;
-
-private:
-    void update_dto(dto& dto) const override {
-        dto.axis = static_cast<cldnn_concatenation_axis>(axis);
-    }
 };
 /// @}
 /// @}
@@ -14,8 +14,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-
-#include "../C/condition.h"
 #include "primitive.hpp"
 #include "topology.hpp"
 #include <vector>
@@ -36,7 +34,7 @@ enum cond_functions : int32_t { EQUAL, GREATER, LESS };
 /// @n   Applies comparision between 2 inputs.
 /// @n   Compare data - sizes of that input specifes the range of the comparison.
 /// @n   Offset - offset in memory, when comparing values.
-struct condition : public primitive_base<condition, CLDNN_PRIMITIVE_DESC(condition)> {
+struct condition : public primitive_base<condition> {
     CLDNN_DECLARE_PRIMITIVE(condition)
 
     /// @brief Constructs condition primitive / layer.
@@ -67,15 +65,6 @@ struct condition : public primitive_base<condition, CLDNN_PRIMITIVE_DESC(conditi
           function(func),
           offset(offset) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{condition}
-    condition(const dto* dto)
-        : primitive_base(dto),
-          topology_true(dto->topology_true),
-          topology_false(dto->topology_false),
-          compare_data(dto->compare_data),
-          function(static_cast<cond_functions>(dto->function)),
-          offset(dto->offset) {}
-
     /// @brief An identifier of topology, which will be executed when comparison returns true.
     topology topology_true;
     /// @brief An identifier of topology, which will be executed when comparison returns false.
@@ -88,14 +77,6 @@ struct condition : public primitive_base<condition, CLDNN_PRIMITIVE_DESC(conditi
     tensor offset;
 
 protected:
-    void update_dto(dto& dto) const override {
-        dto.compare_data = compare_data.c_str();
-        dto.function = static_cast<cldnn_cond_functions>(function);
-        dto.offset = offset;
-        dto.topology_true = topology_true.get();
-        dto.topology_false = topology_false.get();
-    }
-
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {compare_data}; }
 };
 }  // namespace cldnn
@@ -14,8 +14,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-
-#include "../C/contract.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -30,15 +28,15 @@ namespace cldnn {
 /// @brief Select mode for the @ref contract layer.
 enum class contract_mode : int32_t {
     /// @brief Sum reduction.
-    sum = cldnn_contract_sum,
+    sum,
     /// @brief Product reduction.
-    prod = cldnn_contract_product,
+    prod,
     /// @brief All reduction.
-    all = cldnn_contract_all,
+    all,
     /// @brief Any reduction.
-    any = cldnn_contract_any,
+    any,
     /// @brief Max reduction.
-    max = cldnn_contract_max
+    max
 };
 
 /// @brief Reduces input with an operation defined by @p mode along defined
@@ -63,7 +61,7 @@ enum class contract_mode : int32_t {
 /// @n - @p reduction_axes mustn't have duplicate values.
 /// @n - Values of @p reduction_axes must be within (inclusive) range 0 - 3
 /// @n Breaking any of these conditions will raise an exception.
-struct contract : public primitive_base<contract, CLDNN_PRIMITIVE_DESC(contract)> {
+struct contract : public primitive_base<contract> {
     CLDNN_DECLARE_PRIMITIVE(contract)
 
     /// @brief Constructs contract primitive / layer.
@@ -85,25 +83,11 @@ struct contract : public primitive_base<contract, CLDNN_PRIMITIVE_DESC(contract)
           mode(mode),
           reduction_axes(reduction_axes) {
     }
-
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{contract}
-    contract(const dto* dto)
-        : primitive_base(dto),
-          mode(static_cast<contract_mode>(dto->mode)),
-          reduction_axes(uint16_t_arr_to_vector(dto->reduction_axes)) {
-    }
-
     /// @param mode Contract mode.
     contract_mode mode;
     /// @brief Array of axes positions from input shape (0-based, from left to right)
     ///        along which reduction should happen.
     std::vector<uint16_t> reduction_axes;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.mode = static_cast<cldnn_contract_mode>(mode);
-        dto.reduction_axes = uint16_t_vector_to_arr(reduction_axes);
-    }
 };
 /// @}
 /// @}
@@ -16,9 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/convolution.h"
-#include "../C/deformable_interp.h"
-#include "../C/deformable_conv.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -33,7 +30,7 @@ namespace cldnn {
 /// @brief Performs forward spatial convolution with weight sharing.
 /// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} available by setting it in arguments.
 /// @details Parameters are defined in context of "direct" convolution, but actual algorithm is not implied.
-struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(convolution)> {
+struct convolution : public primitive_base<convolution> {
     CLDNN_DECLARE_PRIMITIVE(convolution)
 
     /// @brief Constructs convolution primitive.
@@ -57,31 +54,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride = {1, 1, 1, 1},
                 tensor input_offset = tensor(0),
                 tensor dilation = {1, 1, 1, 1},
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
           deformable_groups(1),
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
     }
@@ -110,31 +99,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor dilation,
                 tensor padding_above,
                 tensor padding_below,
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
           deformable_groups(1),
           padding_above(padding_above),
           padding_below(padding_below),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
     }
@@ -165,31 +146,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor dilation,
                 tensor padding_above,
                 tensor padding_below,
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(groups),
           deformable_groups(1),
           padding_above(padding_above),
           padding_below(padding_below),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
     }
@@ -217,22 +190,14 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride,
                 tensor input_offset,
                 tensor dilation,
-                bool with_activation,
-                float activation_slp,
                 tensor output_size,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(true),
           output_size(output_size),
           groups(groups),
@@ -240,10 +205,10 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
     }
@@ -272,31 +237,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride = {1, 1, 1, 1},
                 tensor input_offset = tensor(0),
                 tensor dilation = {1, 1, 1, 1},
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(groups),
           deformable_groups(1),
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
         if ((groups > 1) && ((weights.size() != 1) || ((bias.size() != 0) && (bias.size() != 1))))
@@ -331,31 +288,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride = {1, 1, 1, 1},
                 tensor input_offset = tensor(0),
                 tensor dilation = {1, 1, 1, 1},
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(i_quantization_factor),
           output_quantization_factor(o_quantization_factor),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
           deformable_groups(1),
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(w_quantization_factor),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(w_quantization_factor),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
         if ((weights.size() != 0) && (weights.size() != weights_quantization_factors.size()))
@@ -389,30 +338,22 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride = {1, 1, 1, 1},
                 tensor input_offset = tensor(0),
                 tensor dilation = {1, 1, 1, 1},
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding, optional_data_type {output_data_type}),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(quantization_factors),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(quantization_factors),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
         validate_quantized();
@@ -446,31 +387,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride = {1, 1, 1, 1},
                 tensor input_offset = tensor(0),
                 tensor dilation = {1, 1, 1, 1},
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(i_quantization_factor),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
           deformable_groups(1),
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(w_quantization_factor),
-          _output_calibration_factors(output_calibration_factors) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(w_quantization_factor),
+          output_calibration_factors(output_calibration_factors) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
         if ((weights.size() != 0) && (weights.size() != weights_quantization_factors.size()))
@@ -497,31 +430,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride = {1, 1, 1, 1},
                 tensor input_offset = tensor(0),
                 tensor dilation = {1, 1, 1, 1},
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
           deformable_groups(1),
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs convolution primitive (w/o bias).
     /// @param id This primitive id.
@@ -547,31 +472,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor dilation,
                 tensor padding_above,
                 tensor padding_below,
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
           deformable_groups(1),
           padding_above(padding_above),
           padding_below(padding_below),
           deformable_mode(false),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs convolution primitive (w/o bias).
     /// @param id This primitive id.
@@ -599,31 +516,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor dilation,
                 tensor padding_above,
                 tensor padding_below,
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(groups),
           deformable_groups(1),
           padding_above(padding_above),
           padding_below(padding_below),
           deformable_mode(false),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs convolution primitive (w/o bias).
     /// @param id This primitive id.
@@ -647,31 +556,23 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride = {1, 1, 1, 1},
                 tensor input_offset = tensor(0),
                 tensor dilation = {1, 1, 1, 1},
-                bool with_activation = false,
-                float activation_slp = 0.0f,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(groups),
           deformable_groups(1),
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs convolution primitive (computes input paddings to match output size).
     /// @param id This primitive id.
@@ -696,22 +597,14 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride,
                 tensor input_offset,
                 tensor dilation,
-                bool with_activation,
-                float activation_slp,
                 tensor output_size,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(true),
           output_size(output_size),
           groups(1),
@@ -719,10 +612,10 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
     }
@@ -748,22 +641,14 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor stride,
                 tensor input_offset,
                 tensor dilation,
-                bool with_activation,
-                float activation_slp,
                 tensor output_size,
                 const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(true),
           output_size(output_size),
           groups(1),
@@ -771,10 +656,10 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(false),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /*
     /// @brief Constructs convolution primitive.
@@ -807,17 +692,11 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                 tensor output_size,
                 const padding& output_padding = padding())
         : primitive_base(id, {input, trans}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
           input_quantization_factor(1.0f),
           output_quantization_factor(1.0f),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(false),
-          activation_negative_slope(0.0f),
           with_output_size(true),
           output_size(output_size),
           groups(groups),
@@ -825,45 +704,16 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
           padding_above(tensor(0)),
           padding_below(tensor(0)),
           deformable_mode(true),
-          _weights(weights),
-          _bias(bias),
-          _weights_quantization_factors(std::vector<primitive_id>(0)),
-          _output_calibration_factors(std::vector<primitive_id>(0)) {
+          weights(weights),
+          bias(bias),
+          weights_quantization_factors(std::vector<primitive_id>(0)),
+          output_calibration_factors(std::vector<primitive_id>(0)) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
         if ((groups > 1) && ((weights.size() != 1) || ((bias.size() != 0) && (bias.size() != 1))))
             throw std::runtime_error("grouped convolution's weights/bias count must be 1");
     }
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{convolution}
-    convolution(const dto* dto)
-        : primitive_base(dto),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          weights_quantization_factors(_weights_quantization_factors.cpp_ids),
-          output_calibration_factors(_output_calibration_factors.cpp_ids),
-          input_quantization_factor(dto->input_quantization_factor),
-          output_quantization_factor(dto->output_quantization_factor),
-          input_offset(dto->input_offset),
-          stride(dto->stride),
-          dilation(dto->dilation),
-          with_activation(dto->with_activation != 0),
-          activation_negative_slope(dto->activation_negative_slope),
-          with_output_size(dto->with_output_size != 0),
-          output_size(dto->output_size),
-          groups(dto->groups),
-          deformable_groups(dto->deformable_groups),
-          padding_above(dto->padding_above),
-          padding_below(dto->padding_below),
-          deformable_mode(dto->deformable_mode != 0),
-          _weights(dto->weights),
-          _bias(dto->bias),
-          _weights_quantization_factors(dto->weights_quantization_factors),
-          _output_calibration_factors(dto->output_calibration_factors) {
-        if (!dto->split || (weights.size() != bias.size() && bias.size() != 0) || dto->split != weights.size())
-            throw std::invalid_argument("Invalid convolution dto: bad split value");
-    }
-
     /// @brief Constructs convolution primitive (computes input paddings to match output size).
     /// @param id This primitive id.
     /// @param input Input primitive id.
@@ -889,8 +739,6 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                                                tensor stride = {1, 1, 1, 1},
                                                tensor input_offset = tensor(0),
                                                tensor dilation = {1, 1, 1, 1},
-                                               bool with_activation = false,
-                                               float activation_slp = 0.0f,
                                                const padding& output_padding = padding()) {
         return convolution(id,
                            input,
@@ -899,8 +747,6 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                            stride,
                            input_offset,
                            dilation,
-                           with_activation,
-                           activation_slp,
                            output_size,
                            output_padding);
     }
@@ -928,8 +774,6 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                                                tensor stride = {1, 1, 1, 1},
                                                tensor input_offset = tensor(0),
                                                tensor dilation = {1, 1, 1, 1},
-                                               bool with_activation = false,
-                                               float activation_slp = 0.0f,
                                                const padding& output_padding = padding()) {
         return convolution(id,
                            input,
@@ -937,20 +781,10 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
                            stride,
                            input_offset,
                            dilation,
-                           with_activation,
-                           activation_slp,
                            output_size,
                            output_padding);
     }
 
-    /// @brief List of primitive ids containing weights data.
-    fixed_size_vector_ref weights;
-    /// @brief List of primitive ids containing bias data.
-    fixed_size_vector_ref bias;
-    /// @brief List of primitive ids containing weights quanitization factors per output feature map.
-    fixed_size_vector_ref weights_quantization_factors;
-    /// @brief List of primitive ids containing output quanitization factors per output feature map.
-    fixed_size_vector_ref output_calibration_factors;
     /// @brief Input quantization factor
     float input_quantization_factor;
     /// @brief Output quantization factor
@@ -963,10 +797,6 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
     /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
     /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
     tensor dilation;
-    /// @brief Enable Relu activation.
-    bool with_activation;
-    /// @brief Relu activation slope.
-    float activation_negative_slope;
     /// @brief Indicates that the primitive has user-defined output size (non-zero value).
     bool with_output_size;
     /// @brief User-defined output data size of the primitive (w/o padding).
@@ -982,49 +812,29 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
     tensor padding_below;
     /// @param deformable_mode.
     bool deformable_mode;
+    /// @brief List of primitive ids containing weights data.
+    const primitive_id_arr weights;
+    /// @brief List of primitive ids containing bias data.
+    const primitive_id_arr bias;
+    /// @brief List of primitive ids containing weights quanitization factors per output feature map.
+    const primitive_id_arr weights_quantization_factors;
+    /// @brief List of primitive ids containing output quanitization factors per output feature map.
+    const primitive_id_arr output_calibration_factors;
 
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
 
-protected:
-    primitive_id_arr _weights;
-    primitive_id_arr _bias;
-    primitive_id_arr _weights_quantization_factors;
-    primitive_id_arr _output_calibration_factors;
-
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         ret.reserve(weights.size() + bias.size() + weights_quantization_factors.size() +
                     output_calibration_factors.size());
-        for (auto& w : weights) ret.push_back(w);
-        for (auto& b : bias) ret.push_back(b);
-        for (auto& q : weights_quantization_factors) ret.push_back(q);
-        for (auto& q : output_calibration_factors) ret.push_back(q);
+        for (auto& w : weights) ret.push_back(std::ref(w));
+        for (auto& b : bias) ret.push_back(ref(b));
+        for (auto& q : weights_quantization_factors) ret.push_back(std::ref(q));
+        for (auto& q : output_calibration_factors) ret.push_back(std::ref(q));
         return ret;
     }
 
-    void update_dto(dto& dto) const override {
-        dto.weights = _weights.ref();
-        dto.bias = _bias.ref();
-        dto.weights_quantization_factors = _weights_quantization_factors.ref();
-        dto.output_calibration_factors = _output_calibration_factors.ref();
-        dto.input_quantization_factor = input_quantization_factor;
-        dto.output_quantization_factor = output_quantization_factor;
-        dto.input_offset = input_offset;
-        dto.stride = stride;
-        dto.split = split();
-        dto.with_activation = with_activation;
-        dto.activation_negative_slope = activation_negative_slope;
-        dto.dilation = dilation;
-        dto.with_output_size = with_output_size;
-        dto.output_size = output_size;
-        dto.groups = groups;
-        dto.deformable_groups = deformable_groups;
-        dto.padding_above = padding_above;
-        dto.padding_below = padding_below;
-        dto.deformable_mode = deformable_mode ? 1 : 0;
-    }
-
 private:
     // TODO: validate_quantized -> validate ?
     void validate_quantized() {
@@ -1033,18 +843,13 @@ private:
             throw std::runtime_error(
                 "convolution's weights count does not "
                 "match quantization factors count");
-        if (with_activation && output_data_type) {
+        if (output_data_type) {
             // Use explicit switch to get compiler warning if new data_types would become supported.
             switch (*output_data_type) {
-                case data_types::u8:
-                    if (activation_negative_slope != 0.0f)
-                        throw std::runtime_error(
-                            "Negative slope in activation is meaningless for the "
-                            "unsigned type!");
-                    break;
                 case data_types::bin:
                     throw std::runtime_error("Binary convolution is a separate primitive.");
                     break;
+                case data_types::u8:
                 case data_types::i8:
                 case data_types::i32:
                 case data_types::i64:
@@ -1057,7 +862,7 @@ private:
     }
 };
 
-struct deformable_interp : public primitive_base<deformable_interp, CLDNN_PRIMITIVE_DESC(deformable_interp)> {
+struct deformable_interp : public primitive_base<deformable_interp> {
     CLDNN_DECLARE_PRIMITIVE(deformable_interp)
 
     deformable_interp(const primitive_id& id,
@@ -1082,31 +887,6 @@ struct deformable_interp : public primitive_base<deformable_interp, CLDNN_PRIMIT
               padding_above(tensor(0)),
               padding_below(tensor(0)) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{deformable_interp}
-    deformable_interp(const dto* dto)
-            : primitive_base(dto),
-              input_offset(dto->input_offset),
-              stride(dto->stride),
-              dilation(dto->dilation),
-              output_size(dto->output_size),
-              kernel_size(dto->kernel_size),
-              groups(dto->groups),
-              deformable_groups(dto->deformable_groups),
-              padding_above(dto->padding_above),
-              padding_below(dto->padding_below) { }
-
-    void update_dto(dto& dto) const override {
-        dto.input_offset = input_offset;
-        dto.stride = stride;
-        dto.dilation = dilation;
-        dto.groups = groups;
-        dto.output_size = output_size;
-        dto.kernel_size = kernel_size;
-        dto.deformable_groups = deformable_groups;
-        dto.padding_above = padding_above;
-        dto.padding_below = padding_below;
-    }
-
     /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
     tensor input_offset;
     /// @brief Defines shift in input buffer between adjacent calculations of output values.
@@ -1130,7 +910,7 @@ struct deformable_interp : public primitive_base<deformable_interp, CLDNN_PRIMIT
     tensor padding_below;
 };
 
-struct deformable_conv : public primitive_base<deformable_conv, CLDNN_PRIMITIVE_DESC(deformable_conv)> {
+struct deformable_conv : public primitive_base<deformable_conv> {
     CLDNN_DECLARE_PRIMITIVE(deformable_conv)
 
     deformable_conv(const primitive_id& id,
@@ -1141,21 +921,19 @@ struct deformable_conv : public primitive_base<deformable_conv, CLDNN_PRIMITIVE_
                     tensor output_size,
                     const padding& output_padding = padding())
             : primitive_base(id, {input}, output_padding),
-              weights(_weights.cpp_ids),
-              bias(_bias.cpp_ids),
               output_size(output_size),
               groups(groups),
-              _weights(weights),
-              _bias(biases) {}
+              weights(weights),
+              bias(biases) {}
 
-    /// @brief List of primitive ids containing weights data.
-    fixed_size_vector_ref weights;
-    /// @brief List of primitive ids containing bias data.
-    fixed_size_vector_ref bias;
     /// @brief User-defined output data size of the primitive (w/o padding).
     tensor output_size;
     /// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
     uint32_t groups;
+    /// @brief List of primitive ids containing weights data.
+    const primitive_id_arr weights;
+    /// @brief List of primitive ids containing bias data.
+    const primitive_id_arr  bias;
 
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
@@ -1163,31 +941,10 @@ struct deformable_conv : public primitive_base<deformable_conv, CLDNN_PRIMITIVE_
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         ret.reserve(weights.size() + bias.size());
-        for (auto& w : weights) ret.push_back(w);
-        for (auto& b : bias) ret.push_back(b);
+        for (auto& w : weights) ret.push_back(std::ref(w));
+        for (auto& b : bias) ret.push_back(std::ref(b));
         return ret;
     }
-
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{convolution}
-    deformable_conv(const dto* dto)
-            : primitive_base(dto),
-              weights(_weights.cpp_ids),
-              bias(_bias.cpp_ids),
-              output_size(dto->output_size),
-              groups(dto->groups),
-              _weights(dto->weights),
-              _bias(dto->bias) {
-    }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = _weights.ref();
-        dto.bias = _bias.ref();
-        dto.output_size = output_size;
-        dto.groups = groups;
-    }
-protected:
-    primitive_id_arr _weights;
-    primitive_id_arr _bias;
 };
 
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/deconvolution.h"
 #include "deconvolution.hpp"
 #include "primitive.hpp"
 #include <vector>
@@ -48,7 +47,7 @@ struct convolution_grad_input : public deconvolution {
                            tensor stride = {1, 1, 1, 1},
                            tensor input_offset = {0, 0, 0, 0},
                            const padding& output_padding = padding())
-        : deconvolution(id, input, {weights}, stride, input_offset, false, 0.0f, output_padding, true) {}
+        : deconvolution(id, input, {weights}, stride, input_offset, output_padding, true) {}
 
     /// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
     /// @param id This primitive id.
@@ -67,10 +66,7 @@ struct convolution_grad_input : public deconvolution {
                            tensor input_offset,
                            tensor output_size,
                            const padding& output_padding = padding())
-        : deconvolution(id, input, {weights}, stride, input_offset, false, 0.0f, output_size, output_padding, true) {}
-
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{convolution_grad_input}
-    explicit convolution_grad_input(const dto* dto) : deconvolution(dto) {}
+        : deconvolution(id, input, {weights}, stride, input_offset, output_size, output_padding, true) {}
 
     /// @brief Constructs convolution_grad_input primitive (computes input paddings to match output size).
     /// @param id This primitive id.
@@ -96,4 +92,4 @@ struct convolution_grad_input : public deconvolution {
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/convolution_grad_weights.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -32,7 +31,7 @@ namespace cldnn {
 /// @details convolution_grad_weights updates weights and bias mutable data for training purposes.
 /// @details Please note that this primitive was not heavily tested and currently only batch=1 is enabled for this primitive.
 struct convolution_grad_weights
-    : public primitive_base<convolution_grad_weights, CLDNN_PRIMITIVE_DESC(convolution_grad_weights)> {
+    : public primitive_base<convolution_grad_weights> {
     CLDNN_DECLARE_PRIMITIVE(convolution_grad_weights)
 
     /// @brief Constructs convolution_grad_weights primitive.
@@ -58,19 +57,15 @@ struct convolution_grad_weights
                              const primitive_id& conv_grad = "",
                              const padding& output_padding = padding())
         : primitive_base(id, {input_grad, input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          prev_weights_grad(_prev_weights_grad.cpp_ids),
-          prev_bias_grad(_prev_bias_grad.cpp_ids),
           conv_grad(conv_grad),
           stride(stride),
           input_offset(input_offset),
           dilation(dilation),
           output_grad_w(false),
-          _weights(weights),
-          _bias(bias),
-          _prev_weights_grad(std::vector<primitive_id>(0)),
-          _prev_bias_grad(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(bias),
+          prev_weights_grad(std::vector<primitive_id>(0)),
+          prev_bias_grad(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs convolution_grad_weights primitive (w/o bias).
     /// @param id This primitive id.
@@ -95,19 +90,15 @@ struct convolution_grad_weights
                              const primitive_id& conv_grad = "",
                              const padding& output_padding = padding())
         : primitive_base(id, {input_grad, input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          prev_weights_grad(_prev_weights_grad.cpp_ids),
-          prev_bias_grad(_prev_bias_grad.cpp_ids),
           conv_grad(conv_grad),
           stride(stride),
           input_offset(input_offset),
           dilation(dilation),
           output_grad_w(output_grad_w),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
-          _prev_weights_grad(std::vector<primitive_id>(0)),
-          _prev_bias_grad(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
+          prev_weights_grad(std::vector<primitive_id>(0)),
+          prev_bias_grad(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs convolution_grad_weights primitive (w/o bias).
     /// @param id This primitive id.
@@ -130,19 +121,15 @@ struct convolution_grad_weights
                              const primitive_id& conv_grad = "",
                              const padding& output_padding = padding())
         : primitive_base(id, {input_grad, input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          prev_weights_grad(_prev_weights_grad.cpp_ids),
-          prev_bias_grad(_prev_bias_grad.cpp_ids),
           conv_grad(conv_grad),
           stride(stride),
           input_offset(input_offset),
           dilation(dilation),
           output_grad_w(false),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
-          _prev_weights_grad(std::vector<primitive_id>(0)),
-          _prev_bias_grad(std::vector<primitive_id>(0)) {}
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
+          prev_weights_grad(std::vector<primitive_id>(0)),
+          prev_bias_grad(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs convolution_grad_weights primitive with momentum optimizer.
     /// @param id This primitive id.
@@ -171,50 +158,16 @@ struct convolution_grad_weights
                              const primitive_id& conv_grad = "",
                              const padding& output_padding = padding())
         : primitive_base(id, {input_grad, input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          prev_weights_grad(_prev_weights_grad.cpp_ids),
-          prev_bias_grad(_prev_bias_grad.cpp_ids),
           conv_grad(conv_grad),
           stride(stride),
           input_offset(input_offset),
           dilation(dilation),
           output_grad_w(false),
-          _weights(weights),
-          _bias(bias),
-          _prev_weights_grad(prev_weights_grad),
-          _prev_bias_grad(prev_bias_grad) {}
+          weights(weights),
+          bias(bias),
+          prev_weights_grad(prev_weights_grad),
+          prev_bias_grad(prev_bias_grad) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{convolution_grad_weights}
-    convolution_grad_weights(const dto* dto)
-        : primitive_base(dto),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          prev_weights_grad(_prev_weights_grad.cpp_ids),
-          prev_bias_grad(_prev_bias_grad.cpp_ids),
-          conv_grad(dto->conv_grad),
-          stride(dto->stride),
-          input_offset(dto->input_offset),
-          dilation(dto->dilation),
-          output_grad_w(dto->output_grad_w),
-          _weights(dto->weights),
-          _bias(dto->bias),
-          _prev_weights_grad(dto->prev_weights_grad),
-          _prev_bias_grad(dto->prev_bias_grad) {
-        if (!dto->split || (weights.size() != bias.size() && bias.size() != 0) || dto->split != weights.size())
-            throw std::invalid_argument("Invalid convolution_grad_weights dto: bad split value");
-    }
-
-    /// @brief List of primitive ids containing weights data.
-    fixed_size_vector_ref weights;
-    /// @brief List of primitive ids containing bias data.
-    fixed_size_vector_ref bias;
-    /// @brief Array of primitive ids containing weights gradient data calculated in previous iteration.
-    /// Amount of primitives and their memory sizes should be same as weights.
-    fixed_size_vector_ref prev_weights_grad;
-    /// @brief Array of primitive ids containing bias gradient data calculated in previous iteration.
-    /// Amount of primitives and their memory sizes should be same as biases.
-    fixed_size_vector_ref prev_bias_grad;
     /// @brief Primitive id containing convolution gradient data.
     primitive_id conv_grad;
     /// @brief Defines shift in input buffer between adjacent calculations of output values.
@@ -228,45 +181,37 @@ struct convolution_grad_weights
     tensor dilation;
     /// @brief Should primitive give weights gradient (delta) as an output
     bool output_grad_w;
+    /// @brief List of primitive ids containing weights data.
+    const primitive_id_arr weights;
+    /// @brief List of primitive ids containing bias data.
+    const primitive_id_arr bias;
+    /// @brief Array of primitive ids containing weights gradient data calculated in previous iteration.
+    /// Amount of primitives and their memory sizes should be same as weights.
+    const primitive_id_arr prev_weights_grad;
+    /// @brief Array of primitive ids containing bias gradient data calculated in previous iteration.
+    /// Amount of primitives and their memory sizes should be same as biases.
+    const primitive_id_arr prev_bias_grad;
 
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
 
 protected:
-    primitive_id_arr _weights;
-    primitive_id_arr _bias;
-    primitive_id_arr _prev_weights_grad;
-    primitive_id_arr _prev_bias_grad;
-
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         ret.reserve(weights.size() + bias.size() + !conv_grad.empty() + prev_weights_grad.size() +
                     prev_bias_grad.size());
-        for (auto& w : weights) ret.push_back(w);
-        for (auto& b : bias) ret.push_back(b);
+        for (auto& w : weights) ret.push_back(std::ref(w));
+        for (auto& b : bias) ret.push_back(std::ref(b));
 
-        for (auto& g : prev_weights_grad) ret.push_back(g);
-        for (auto& g : prev_bias_grad) ret.push_back(g);
+        for (auto& g : prev_weights_grad) ret.push_back(std::ref(g));
+        for (auto& g : prev_bias_grad) ret.push_back(std::ref(g));
         if (!conv_grad.empty())
             ret.push_back(conv_grad);
 
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = _weights.ref();
-        dto.bias = _bias.ref();
-        dto.input_offset = input_offset;
-        dto.dilation = dilation;
-        dto.split = split();
-        dto.stride = stride;
-        dto.output_grad_w = output_grad_w;
-        dto.conv_grad = conv_grad.c_str();
-        dto.prev_bias_grad = _prev_bias_grad.ref();
-        dto.prev_weights_grad = _prev_weights_grad.ref();
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/crop.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -59,7 +58,7 @@ constexpr auto crop_borders = crop_borders_t{};
 /// @n - Sum of sizes of opposite borders must be lower than input size (on all non-ignored dimensions).
 /// @n
 /// @n Breaking any of this conditions will cause exception throw.
-struct crop : public primitive_base<crop, CLDNN_PRIMITIVE_DESC(crop)> {
+struct crop : public primitive_base<crop> {
     CLDNN_DECLARE_PRIMITIVE(crop)
 
     /// @brief Constructs crop primitive.
@@ -111,19 +110,10 @@ struct crop : public primitive_base<crop, CLDNN_PRIMITIVE_DESC(crop)> {
          const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), reference_input(xy_borders.negate()), offsets(xy_borders) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{crop}
-    crop(const dto* dto) : primitive_base(dto), reference_input(dto->reference_input), offsets(dto->offsets) {}
-
     /// @brief Reference input tensor with the required dimensions.
     tensor reference_input;
     /// @brief Input offsets.
     tensor offsets;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.reference_input = reference_input;
-        dto.offsets = offsets;
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/custom_gpu_primitive.h"
 #include "primitive.hpp"
 #include "memory.hpp"
 #include <vector>
@@ -33,9 +32,24 @@ namespace cldnn {
 /// @brief This primitive executes a custom kernel provided by the application
 /// @details The application is required to provide all relevant details for executing the custom kernel
 /// such as: sources, entry point, work sizes and parameter bindings.
-struct custom_gpu_primitive : public primitive_base<custom_gpu_primitive, CLDNN_PRIMITIVE_DESC(custom_gpu_primitive)> {
+struct custom_gpu_primitive : public primitive_base<custom_gpu_primitive> {
     CLDNN_DECLARE_PRIMITIVE(custom_gpu_primitive)
 
+    /// @brief Custom primitive kernel argument type
+    enum arg_type {
+        arg_input,
+        arg_output,
+    };
+    //
+    /// @brief Custom primitive kernel argument index
+    using arg_index = uint32_t;
+    //
+    /// @brief Custom primitive kernel argument description
+    struct arg_desc {
+        arg_type type;
+        arg_index index;
+    };
+
     /// @brief Constructs custom_gpu_primitive primitive
     /// @param id This primitive id.
     /// @param input Input primitive ids.
@@ -50,39 +64,24 @@ struct custom_gpu_primitive : public primitive_base<custom_gpu_primitive, CLDNN_
                          const std::vector<primitive_id>& input,
                          const std::vector<std::string>& kernels_code,
                          const std::string& kernel_entry_point,
-                         const std::vector<cldnn_arg>& kernel_arguments,
+                         const std::vector<arg_desc>& kernel_arguments,
                          const std::string& build_options,
                          const layout& output_layout,
                          const std::vector<size_t>& gws = {},
                          const std::vector<size_t>& lws = {})
         : primitive_base(id, {input}, output_layout.data_padding),
-          kernels_code(_kernels_code.cpp_ids),
           kernel_entry_point(kernel_entry_point),
           kernel_arguments(kernel_arguments),
           build_options(build_options),
           output_layout(output_layout),
           gws(gws.size() ? gws : std::vector<size_t>{output_layout.count()}),
           lws(lws),
-          _kernels_code(kernels_code) {}
-
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{custom_gpu_primitive}
-    custom_gpu_primitive(const dto* dto)
-        : primitive_base(dto),
-          kernels_code(_kernels_code.cpp_ids),
-          kernel_entry_point(dto->kernel_entry_point),
-          kernel_arguments(dto->kernel_arguments, dto->kernel_arguments + dto->kernel_arguments_num),
-          build_options(dto->build_options),
-          output_layout(dto->output_layout),
-          gws(dto->gws, dto->gws + dto->gws_num),
-          lws(dto->lws, dto->lws + dto->lws_num),
-          _kernels_code(dto->kernels_code) {}
+          kernels_code(kernels_code) {}
 
-    /// @brief Source code for the kernel
-    fixed_size_vector_ref kernels_code;
     /// @brief The name of the entry point function in the kernel
     const std::string kernel_entry_point;
     /// @brief Argument bindings for the entry point function
-    const std::vector<cldnn_arg> kernel_arguments;
+    const std::vector<arg_desc> kernel_arguments;
     /// @brief The kernel's build options
     const std::string build_options;
     /// @brief The output layout declared by the primitive
@@ -91,22 +90,8 @@ struct custom_gpu_primitive : public primitive_base<custom_gpu_primitive, CLDNN_
     const std::vector<size_t> gws;
     /// @brief The local working sizes
     const std::vector<size_t> lws;
-
-protected:
-    primitive_id_arr _kernels_code;
-
-    void update_dto(dto& dto) const override {
-        dto.kernels_code = _kernels_code.ref();
-        dto.kernel_entry_point = kernel_entry_point.c_str();
-        dto.kernel_arguments = kernel_arguments.data();
-        dto.kernel_arguments_num = static_cast<int>(kernel_arguments.size());
-        dto.build_options = build_options.c_str();
-        dto.output_layout = (cldnn_layout)output_layout;
-        dto.gws = gws.data();
-        dto.gws_num = static_cast<int>(gws.size());
-        dto.lws = lws.data();
-        dto.lws_num = static_cast<int>(lws.size());
-    }
+    /// @brief Source code for the kernel
+    const primitive_id_arr kernels_code;
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/data.h"
 #include "primitive.hpp"
 #include "memory.hpp"
 
@@ -32,7 +31,7 @@ namespace cldnn {
 /// @details This primitive allows to pass data which is known at topology creation.
 /// For example, weights and biases for scoring networks.
 /// @note Passing data at topology may improve network performance if data optimization is enabled.
-struct data : public primitive_base<data, CLDNN_PRIMITIVE_DESC(data)> {
+struct data : public primitive_base<data> {
     CLDNN_DECLARE_PRIMITIVE(data)
 
     /// @brief Constructs data primitive.
@@ -42,20 +41,9 @@ struct data : public primitive_base<data, CLDNN_PRIMITIVE_DESC(data)> {
     data(const primitive_id& id, const memory& mem)
         : primitive_base(id, {}, padding()), mem(mem) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{data}
-    explicit data(const dto* dto)
-        : primitive_base(dto), mem(dto->mem) {
-        mem.retain();
-    }
-
     /// @brief @ref memory object which contains data.
     /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build.
     memory mem;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.mem = mem.get();
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/deconvolution.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -32,9 +31,8 @@ namespace cldnn {
 /// Also supports built-in Relu @ref activation available by setting it in arguments.
 /// @details Deconvolution is similar to convolution layer with the weights flipped on the axis
 /// and stride and input padding parameters used in opposite sense as in convolution.
-struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC(deconvolution)> {
+struct deconvolution : public primitive_base<deconvolution> {
     CLDNN_DECLARE_PRIMITIVE(deconvolution)
-
     /// @brief Constructs deconvolution primitive.
     /// @param id This primitive id.
     /// @param input Input primitive id.
@@ -51,20 +49,14 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                   const std::vector<primitive_id>& bias,
                   tensor stride = {1, 1, 1, 1},
                   tensor input_offset = {0, 0, 0, 0},
-                  bool with_activation = false,
-                  float activation_slp = 0.0f,
                   const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
-          _weights(weights),
-          _bias(bias),
+          weights(weights),
+          bias(bias),
           _gradient(false) {}
     /// @brief Constructs deconvolution primitive.
     /// @param id This primitive id.
@@ -84,20 +76,14 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                   uint32_t groups,
                   tensor stride = {1, 1, 1, 1},
                   tensor input_offset = {0, 0, 0, 0},
-                  bool with_activation = false,
-                  float activation_slp = 0.0f,
                   const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(groups),
-          _weights(weights),
-          _bias(bias),
+          weights(weights),
+          bias(bias),
           _gradient(false) {}
 
     /// @brief Constructs deconvolution primitive (w/o bias).
@@ -114,21 +100,15 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                   const std::vector<primitive_id>& weights,
                   tensor stride = {1, 1, 1, 1},
                   tensor input_offset = {0, 0, 0, 0},
-                  bool with_activation = false,
-                  float activation_slp = 0.0f,
                   const padding& output_padding = padding(),
                   bool gradient = false)
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(1),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
           _gradient(gradient) {}
 
     /// @brief Constructs deconvolution primitive (w/o bias).
@@ -147,21 +127,15 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                   uint32_t groups,
                   tensor stride = {1, 1, 1, 1},
                   tensor input_offset = {0, 0, 0, 0},
-                  bool with_activation = false,
-                  float activation_slp = 0.0f,
                   const padding& output_padding = padding(),
                   bool gradient = false)
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           groups(groups),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
           _gradient(gradient) {}
 
     /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
@@ -181,22 +155,16 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                   const std::vector<primitive_id>& bias,
                   tensor stride,
                   tensor input_offset,
-                  bool with_activation,
-                  float activation_slp,
                   tensor output_size,
                   const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(true),
           output_size(output_size),
           groups(1),
-          _weights(weights),
-          _bias(bias),
+          weights(weights),
+          bias(bias),
           _gradient(false) {}
 
     /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
@@ -218,22 +186,16 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                   uint32_t groups,
                   tensor stride,
                   tensor input_offset,
-                  bool with_activation,
-                  float activation_slp,
                   tensor output_size,
                   const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(true),
           output_size(output_size),
           groups(groups),
-          _weights(weights),
-          _bias(bias),
+          weights(weights),
+          bias(bias),
           _gradient(false) {}
 
     /// @brief Constructs deconvolution primitive (w/o bias, computes input paddings to match output size).
@@ -251,44 +213,19 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                   const std::vector<primitive_id>& weights,
                   tensor stride,
                   tensor input_offset,
-                  bool with_activation,
-                  float activation_slp,
                   tensor output_size,
                   const padding& output_padding = padding(),
                   bool gradient = false)
         : primitive_base(id, {input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(true),
           output_size(output_size),
           groups(1),
-          _weights(weights),
-          _bias(std::vector<primitive_id>(0)),
+          weights(weights),
+          bias(std::vector<primitive_id>(0)),
           _gradient(gradient) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{deconvolution}
-    deconvolution(const dto* dto)
-        : primitive_base(dto),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          input_offset(dto->input_offset),
-          stride(dto->stride),
-          with_activation(dto->with_activation != 0),
-          activation_negative_slope(dto->activation_negative_slope),
-          with_output_size(dto->with_output_size != 0),
-          output_size(dto->output_size),
-          groups(dto->groups),
-          _weights(dto->weights),
-          _bias(dto->bias),
-          _gradient(dto->gradient != 0) {
-        if (!dto->split || (weights.size() != bias.size() && bias.size() != 0) || dto->split != weights.size())
-            throw std::invalid_argument("Invalid deconvolution dto: bad split value");
-    }
-
     /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
     /// @param id This primitive id.
     /// @param input Input primitive id.
@@ -308,8 +245,6 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                                                  tensor output_size,
                                                  tensor stride = {1, 1, 1, 1},
                                                  tensor input_offset = {0, 0, 0, 0},
-                                                 bool with_activation = false,
-                                                 float activation_slp = 0.0f,
                                                  const padding& output_padding = padding()) {
         return deconvolution(id,
                              input,
@@ -317,8 +252,6 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                              bias,
                              stride,
                              input_offset,
-                             with_activation,
-                             activation_slp,
                              output_size,
                              output_padding);
     }
@@ -340,38 +273,30 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
                                                  tensor output_size,
                                                  tensor stride = {1, 1, 1, 1},
                                                  tensor input_offset = {0, 0, 0, 0},
-                                                 bool with_activation = false,
-                                                 float activation_slp = 0.0f,
                                                  const padding& output_padding = padding()) {
         return deconvolution(id,
                              input,
                              weights,
                              stride,
                              input_offset,
-                             with_activation,
-                             activation_slp,
                              output_size,
                              output_padding);
     }
 
-    /// @brief List of primitive ids containing weights data.
-    fixed_size_vector_ref weights;
-    /// @brief List of primitive ids containing bias data.
-    fixed_size_vector_ref bias;
     /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the deconvolution window should start calculations.
     tensor input_offset;
     /// @brief Defines shift in input buffer between adjacent calculations of output values.
     tensor stride;
-    /// @brief Enables Relu activation.
-    bool with_activation;
-    /// @brief Relu activation slope.
-    float activation_negative_slope;
     /// @brief Indicates that the primitive has user-defined output size (non-zero value).
     bool with_output_size;
     /// @brief User-defined output data size of the primitive (w/o padding).
     tensor output_size;
     /// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
     uint32_t groups;
+    /// @brief List of primitive ids containing weights data.
+    const primitive_id_arr weights;
+    /// @brief List of primitive ids containing bias data.
+    const primitive_id_arr bias;
 
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
@@ -379,32 +304,16 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
     bool gradient() const { return _gradient; }
 
 protected:
-    primitive_id_arr _weights;
-    primitive_id_arr _bias;
     bool _gradient;
 
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         ret.reserve(weights.size() + bias.size());
-        for (auto& w : weights) ret.push_back(w);
-        for (auto& b : bias) ret.push_back(b);
+        for (auto& w : weights) ret.push_back(std::ref(w));
+        for (auto& b : bias) ret.push_back(std::ref(b));
 
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = _weights.ref();
-        dto.bias = _bias.ref();
-        dto.input_offset = input_offset;
-        dto.split = split();
-        dto.stride = stride;
-        dto.with_activation = with_activation;
-        dto.activation_negative_slope = activation_negative_slope;
-        dto.with_output_size = with_output_size;
-        dto.output_size = output_size;
-        dto.gradient = _gradient;
-        dto.groups = groups;
-    }
 };
 /// @}
 /// @}
@@ -16,8 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-
-#include "../C/depth_to_space.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -30,7 +28,7 @@ namespace cldnn {
 
 /// @brief
 /// @details
-struct depth_to_space : public primitive_base<depth_to_space, CLDNN_PRIMITIVE_DESC(depth_to_space)> {
+struct depth_to_space : public primitive_base<depth_to_space> {
     CLDNN_DECLARE_PRIMITIVE(depth_to_space)
 
     /// @brief Constructs depth_to_space primitive.
@@ -43,14 +41,8 @@ struct depth_to_space : public primitive_base<depth_to_space, CLDNN_PRIMITIVE_DE
                    const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), block_size(block_size) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{depth_to_space}
-    depth_to_space(const dto* dto) : primitive_base(dto), block_size(dto->block_size) {}
-
     /// @brief Block size.
     size_t block_size;
-
-protected:
-    void update_dto(dto& dto) const override { dto.block_size = block_size; }
 };
 /// @}
 /// @}
@@ -17,8 +17,6 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include <limits>
-#include "../C/detection_output.h"
-#include "../C/detection_output_sort.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -31,15 +29,15 @@ namespace cldnn {
 
 /// @brief Select method for coding the prior-boxes in the @ref detection output layer.
 enum class prior_box_code_type : int32_t {
-    corner = cldnn_code_type_corner,
-    center_size = cldnn_code_type_center_size,
-    corner_size = cldnn_code_type_corner_size
+    corner,
+    center_size,
+    corner_size
 };
 
 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
-struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIVE_DESC(detection_output)> {
+struct detection_output : public primitive_base<detection_output> {
     CLDNN_DECLARE_PRIMITIVE(detection_output)
 
     /// @brief Constructs detection output primitive.
@@ -104,32 +102,6 @@ struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIV
                 "Cannot use decrease_label_id and background_label_id parameter simultaneously.");
     }
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{detection_output}
-    detection_output(const dto* dto)
-        : primitive_base(dto),
-          num_classes(dto->num_classes),
-          keep_top_k(dto->keep_top_k),
-          share_location(dto->share_location != 0),
-          background_label_id(dto->background_label_id),
-          nms_threshold(dto->nms_threshold),
-          top_k(dto->top_k),
-          eta(dto->eta),
-          code_type(static_cast<prior_box_code_type>(dto->code_type)),
-          variance_encoded_in_target(dto->variance_encoded_in_target != 0),
-          confidence_threshold(dto->confidence_threshold),
-          prior_info_size(dto->prior_info_size),
-          prior_coordinates_offset(dto->prior_coordinates_offset),
-          prior_is_normalized(dto->prior_is_normalized != 0),
-          input_width(dto->input_width),
-          input_height(dto->input_height),
-          decrease_label_id(dto->decrease_label_id != 0),
-          clip_before_nms(dto->clip_before_nms != 0),
-          clip_after_nms(dto->clip_after_nms != 0) {
-        if (decrease_label_id && background_label_id != 0)
-            throw std::invalid_argument(
-                "Cannot use decrease_label_id and background_label_id parameter simultaneously.");
-    }
-
     /// @brief Number of classes to be predicted.
     const uint32_t num_classes;
     /// @brief Number of total bounding boxes to be kept per image after NMS step.
@@ -168,33 +140,13 @@ struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIV
     const bool clip_after_nms;
 
 protected:
-    void update_dto(dto& dto) const override {
-        dto.num_classes = num_classes;
-        dto.share_location = share_location;
-        dto.background_label_id = background_label_id;
-        dto.nms_threshold = nms_threshold;
-        dto.top_k = top_k;
-        dto.eta = eta;
-        dto.code_type = static_cast<int32_t>(code_type);
-        dto.variance_encoded_in_target = variance_encoded_in_target;
-        dto.keep_top_k = keep_top_k;
-        dto.confidence_threshold = confidence_threshold;
-        dto.prior_info_size = prior_info_size;
-        dto.prior_coordinates_offset = prior_coordinates_offset;
-        dto.prior_is_normalized = prior_is_normalized;
-        dto.input_width = input_width;
-        dto.input_height = input_height;
-        dto.decrease_label_id = decrease_label_id;
-        dto.clip_before_nms = clip_before_nms;
-        dto.clip_after_nms = clip_after_nms;
-    }
 };
 
 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
 /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
 struct detection_output_sort
-    : public primitive_base<detection_output_sort, CLDNN_PRIMITIVE_DESC(detection_output_sort)> {
+    : public primitive_base<detection_output_sort> {
     CLDNN_DECLARE_PRIMITIVE(detection_output_sort)
 
     /// @brief Constructs detection output primitive.
@@ -223,16 +175,6 @@ struct detection_output_sort
           top_k(top_k),
           background_label_id(background_label_id) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{detection_output}
-    detection_output_sort(const dto* dto)
-        : primitive_base(dto),
-          num_images(dto->num_images),
-          num_classes(dto->num_classes),
-          keep_top_k(dto->keep_top_k),
-          share_location(dto->share_location != 0),
-          top_k(dto->top_k),
-          background_label_id(dto->background_label_id) {}
-
     /// @brief Number of classes to be predicted.
     const uint32_t num_images;
     /// @brief Number of classes to be predicted.
@@ -245,16 +187,6 @@ struct detection_output_sort
     const int top_k;
     /// @brief Background label id (-1 if there is no background class).
     const int background_label_id;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.num_classes = num_classes;
-        dto.num_images = num_images;
-        dto.keep_top_k = keep_top_k;
-        dto.share_location = share_location;
-        dto.top_k = top_k;
-        dto.background_label_id = background_label_id;
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/eltwise.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -31,43 +30,43 @@ namespace cldnn {
 /// @brief Select mode for the @ref eltwise layer.
 enum class eltwise_mode : int32_t {
     /// @brief Eltwise sum.
-    sum = cldnn_eltwise_sum,
+    sum,
     /// @brief Eltwise subtract.
-    sub = cldnn_eltwise_sub,
+    sub,
     /// @brief Eltwise max.
-    max = cldnn_eltwise_max,
+    max,
     /// @brief Eltwise product (Hadamard).
-    prod = cldnn_eltwise_prod,
+    prod,
     /// @brief Eltwise div.
-    div = cldnn_eltwise_div,
+    div,
     /// @brief Eltwise min.
-    min = cldnn_eltwise_min,
+    min,
     /// @brief Eltwise pow.
-    pow = cldnn_eltwise_pow,
+    pow,
     /// @brief Eltwise squared diff.
-    squared_diff = cldnn_eltwise_squared_diff,
+    squared_diff,
     /// @brief Eltwise mod.
-    mod = cldnn_eltwise_mod,
+    mod,
     /// @brief Eltwise equal.
-    eq = cldnn_eltwise_eq,
+    eq,
     /// @brief Eltwise not equal.
-    ne = cldnn_eltwise_ne,
+    ne,
     /// @brief Eltwise less.
-    lt = cldnn_eltwise_lt,
+    lt,
     /// @brief Eltwise less of equal.
-    le = cldnn_eltwise_le,
+    le,
     /// @brief Eltwise greater.
-    gt = cldnn_eltwise_gt,
+    gt,
     /// @brief Eltwise greater or equal.
-    ge = cldnn_eltwise_ge,
+    ge,
     /// @brief Eltwise and.
-    logic_and = cldnn_eltwise_and,
+    logic_and,
     /// @brief Eltwise or.
-    logic_or = cldnn_eltwise_or,
+    logic_or,
     /// @brief Eltwise XOR.
-    logic_xor = cldnn_eltwise_xor,
+    logic_xor,
     /// @brief Eltwise floormod.
-    floor_mod = cldnn_eltwise_floor_mod
+    floor_mod
 };
 
 /// @brief Performs elementwise operations (sum, subtract, max or product) on two input primitives
@@ -77,7 +76,7 @@ enum class eltwise_mode : int32_t {
 ///   to the same shape in which the size of each dimention is a max. of input sizes on this dimension)
 /// - format of both inputs has to be the same
 /// - when using integer types, only following eltwise modes are supported: sum, sub, prod, div
-struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
+struct eltwise : public primitive_base<eltwise> {
     CLDNN_DECLARE_PRIMITIVE(eltwise)
 
     /// @brief Constructs eltwise primitive.
@@ -91,21 +90,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
             const primitive_id& input,
             const primitive_id& input2,
             eltwise_mode mode,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, {input, input2}, output_padding),
           output_calibration_factors(""),
           output_quantization_factor(1.0f),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(std::vector<float>(0)),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(std::vector<tensor>(0)),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {}
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs eltwise primitive.
     /// @param id This primitive id.
@@ -120,21 +113,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
             const primitive_id& input2,
             std::vector<tensor> stride,
             eltwise_mode mode,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, {input, input2}, output_padding),
           output_calibration_factors(""),
           output_quantization_factor(1.0f),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(std::vector<float>(0)),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(stride),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {}
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs eltwise primitive.
     /// @param id This primitive id.
@@ -145,21 +132,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
     eltwise(const primitive_id& id,
             const std::vector<primitive_id>& inputs,
             eltwise_mode mode,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, inputs, output_padding),
           output_calibration_factors(""),
           output_quantization_factor(1.0f),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(std::vector<float>(0)),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(std::vector<tensor>(0)),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {}
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs eltwise primitive.
     /// @param id This primitive id.
@@ -174,21 +155,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
             const primitive_id& input2,
             const primitive_id& output_calibration_factors,
             eltwise_mode mode,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, {input, input2}, output_padding),
           output_calibration_factors(output_calibration_factors),
           output_quantization_factor(1.0f),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(std::vector<float>(0)),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(std::vector<tensor>(0)),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {}
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs eltwise primitive.
     /// @param id This primitive id.
@@ -201,21 +176,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
             const std::vector<primitive_id>& inputs,
             const primitive_id& output_calibration_factors,
             eltwise_mode mode,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, inputs, output_padding),
           output_calibration_factors(output_calibration_factors),
           output_quantization_factor(1.0f),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(std::vector<float>(0)),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(std::vector<tensor>(0)),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {}
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs eltwise primitive.
     /// @param id This primitive id.
@@ -230,21 +199,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
             const primitive_id& input2,
             const float o_quantization_factor,
             eltwise_mode mode,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, {input, input2}, output_padding),
           output_calibration_factors(""),
           output_quantization_factor(o_quantization_factor),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(std::vector<float>(0)),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(std::vector<tensor>(0)),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {}
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs eltwise primitive.
     /// @param id This primitive id.
@@ -257,21 +220,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
             const std::vector<primitive_id>& inputs,
             const float o_quantization_factor,
             eltwise_mode mode,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, inputs, output_padding),
           output_calibration_factors(""),
           output_quantization_factor(o_quantization_factor),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(std::vector<float>(0)),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(std::vector<tensor>(0)),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {}
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {}
 
     /// @brief Constructs eltwise primitive.
     /// @param id This primitive id.
@@ -284,21 +241,15 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
             const std::vector<primitive_id>& inputs,
             eltwise_mode mode,
             const std::vector<float>& coefficients,
-            bool with_activation = false,
-            float activation_slp = 0.0f,
             const padding& output_padding = padding())
         : primitive_base(id, inputs, output_padding),
           output_calibration_factors(""),
           output_quantization_factor(1.0f),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
           input_quantization_factors(0),
           mode(mode),
           coefficients(coefficients),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           stride(std::vector<tensor>(0)),
-          _inputs_calibration_factors(std::vector<primitive_id>(0)),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {
+          inputs_calibration_factors(std::vector<primitive_id>(0)) {
         if (mode == eltwise_mode::sum && !coefficients.empty() && coefficients.size() != inputs.size()) {
             throw std::invalid_argument("Invalid eltwise sum coefficients count (should be equal to 0 or input.size)");
         }
@@ -307,70 +258,31 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)> {
         }
     }
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{eltwise}
-    eltwise(const dto* dto)
-        : primitive_base(dto),
-          output_calibration_factors(dto->output_calibration_factors),
-          output_quantization_factor(dto->output_quantization_factor),
-          inputs_calibration_factors(_inputs_calibration_factors.cpp_ids),
-          input_quantization_factors(float_arr_to_vector(dto->input_quantization_factors)),
-          mode(static_cast<eltwise_mode>(dto->mode)),
-          coefficients(float_arr_to_vector(dto->coefficients)),
-          with_activation(dto->with_activation != 0),
-          activation_negative_slope(dto->activation_negative_slope),
-          stride(tensor_arr_to_vector(dto->stride)),
-          _inputs_calibration_factors(dto->input_calibration_factors),
-          _stride(tensor_vector_to_cldnn_vector(stride)) {
-        if (dto->input.size < 2)
-            throw std::invalid_argument("eltiwise dto should containt at least two inputs");
-        if (dto->coefficients.size != 0 && dto->coefficients.size != dto->input.size)
-            throw std::invalid_argument(
-                "Invalid eltwise coefficients count in dto (should be equal to 0 or input.size)");
-    }
-
     /// @brief Primitive id containing output quanitization factors per output feature map.
     primitive_id output_calibration_factors;
     /// @brief Output quantization factor
     float output_quantization_factor;
-    /// @brief List of primitive ids containing input quantization factors per feature map, one primitive id for each input.
-    fixed_size_vector_ref inputs_calibration_factors;
     /// @brief List of quantization factors per input.
     std::vector<float> input_quantization_factors;
     /// @param mode Eltwise mode.
     eltwise_mode mode;
     /// @param coefficients Blob-wise coefficient for SUM operation.
     std::vector<float> coefficients;
-    /// @brief Enables Relu activation.
-    bool with_activation;
-    /// @brief Relu activation slope.
-    float activation_negative_slope;
     /// @brief Defines shift in input buffers between adjacent calculations of output values.
     std::vector<tensor> stride;
+    /// @brief List of primitive ids containing input quantization factors per feature map, one primitive id for each input.
+    const primitive_id_arr inputs_calibration_factors;
 
 protected:
-    primitive_id_arr _inputs_calibration_factors;
-    std::vector<cldnn_tensor> _stride;
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         if (!output_calibration_factors.empty())
             ret.push_back(output_calibration_factors);
 
-        for (auto& icf : inputs_calibration_factors) ret.push_back(icf);
+        for (auto& icf : inputs_calibration_factors) ret.push_back(std::ref(icf));
 
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.output_calibration_factors = output_calibration_factors.c_str();
-        dto.output_quantization_factor = output_quantization_factor;
-        dto.input_calibration_factors = _inputs_calibration_factors.ref();
-        dto.input_quantization_factors = float_vector_to_arr(input_quantization_factors);
-        dto.mode = static_cast<cldnn_eltwise_mode>(mode);
-        dto.coefficients = float_vector_to_arr(coefficients);
-        dto.with_activation = with_activation;
-        dto.activation_negative_slope = activation_negative_slope;
-        dto.stride = tensor_vector_to_arr(_stride);
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/embed.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -36,7 +35,7 @@ namespace cldnn {
 /// @n output_size = { 8, 75, 15, 1 };
 /// @par Algorithm:
 /// @par Where:
-struct embed : public primitive_base<embed, CLDNN_PRIMITIVE_DESC(embed)> {
+struct embed : public primitive_base<embed> {
     CLDNN_DECLARE_PRIMITIVE(embed)
 
     /// @brief Constructs embed primitive.
@@ -60,11 +59,6 @@ struct embed : public primitive_base<embed, CLDNN_PRIMITIVE_DESC(embed)> {
         const primitive_id& weights)
         : primitive_base(id, {input}), weights(weights), bias("") {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{embed}
-    embed(const dto* dto)
-        : primitive_base(dto), weights(dto->weights), bias(dto->bias) {
-    }
-
     /// @brief Primitive id containing weights data.
     primitive_id weights;
     /// @brief Primitive id containing bias data.
@@ -77,14 +71,9 @@ protected:
         else
             return {weights, bias};
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = weights.c_str();
-        dto.bias = bias.c_str();
-    }
 };
 /// @}
 /// @}
 /// @}
 }  // namespace cldnn
-#pragma once
\ No newline at end of file
+#pragma once
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "cldnn_defs.h"
+#include "cldnn.hpp"
 #include <string>
 
 namespace cldnn {
@@ -29,23 +29,23 @@ namespace cldnn {
 
 /// @brief Defines available engine types
 enum class engine_types : int32_t {
-    ocl = cldnn_engine_ocl
+    ocl
 };
 
 /// @brief Defines available priority mode types
 enum class priority_mode_types : int16_t {
-    disabled = cldnn_priority_disabled,
-    low = cldnn_priority_low,
-    med = cldnn_priority_med,
-    high = cldnn_priority_high
+    disabled,
+    low,
+    med,
+    high
 };
 
 /// @brief Defines available priority mode types
 enum class throttle_mode_types : int16_t {
-    disabled = cldnn_throttle_disabled,
-    low = cldnn_throttle_low,
-    med = cldnn_throttle_med,
-    high = cldnn_throttle_high
+    disabled,
+    low,
+    med,
+    high
 };
 
 /// @brief Configuration parameters for created engine.
@@ -110,44 +110,35 @@ struct engine_configuration {
             throw std::invalid_argument("Invalid streams count set in engine config");
         }
     }
-
-    explicit engine_configuration(const cldnn_engine_configuration& c_conf):
-        enable_profiling(c_conf.enable_profiling != 0),
-        meaningful_kernels_names(c_conf.meaningful_kernels_names != 0),
-        dump_custom_program(c_conf.dump_custom_program != 0),
-        compiler_options(c_conf.compiler_options),
-        single_kernel_name(c_conf.single_kernel_name),
-        enable_parallelisation(c_conf.enable_parallelisation != 0),
-        engine_log(c_conf.engine_log), sources_dumps_dir(c_conf.sources_dumps_dir),
-        priority_mode(static_cast<priority_mode_types>(c_conf.priority_mode)),
-        throttle_mode(static_cast<throttle_mode_types>(c_conf.throttle_mode)),
-        enable_memory_pool(c_conf.enable_memory_pool != 0),
-        n_streams(c_conf.n_streams), context(c_conf.context),
-        tuning_cache_path(c_conf.tuning_cache_path) {}
-
-    /// @brief Implicit conversion to C API @ref ::cldnn_engine_configuration
-    operator ::cldnn_engine_configuration() const {
-        return {
-            enable_profiling,
-            meaningful_kernels_names,
-            dump_custom_program,
-            compiler_options.c_str(),
-            single_kernel_name.c_str(),
-            enable_parallelisation,
-            engine_log.c_str(),
-            sources_dumps_dir.c_str(),
-            static_cast<int16_t>(priority_mode),
-            static_cast<int16_t>(throttle_mode),
-            enable_memory_pool,
-            n_streams,
-            context,
-            tuning_cache_path.c_str()};
-    }
 };
 
 /// @brief Information about the engine properties and capabilities.
-/// @details Look into @ref ::cldnn_engine_info for details.
-using engine_info = ::cldnn_engine_info;
+struct engine_info {
+    uint32_t cores_count;     ///< Number of available HW cores.
+    uint32_t core_frequency;  ///< Clock frequency in MHz.
+
+    uint64_t max_work_group_size;  ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model.
+    uint64_t max_local_mem_size;   ///< Maximum size of local memory arena in bytes.
+    uint64_t max_global_mem_size;  ///< Maximum size of global device memory in bytes.
+    uint64_t max_alloc_mem_size;   ///< Maximum size of memory object allocation in bytes.
+
+    uint64_t max_image2d_width;   ///< Maximum image 2d width supported by the device.
+    uint64_t max_image2d_height;  ///< Maximum image 2d height supported by the device.
+
+    // Flags (for layout compatibility fixed size types are used).
+    uint8_t supports_fp16;             ///< Does engine support FP16.
+    uint8_t supports_fp16_denorms;     ///< Does engine support denormalized FP16.
+    uint8_t supports_subgroups_short;  ///< Does engine support cl_intel_subgroups_short.
+    uint8_t supports_image;            ///< Does engine support images (CL_DEVICE_IMAGE_SUPPORT cap).
+
+    uint8_t supports_imad;   ///< Does engine support int8 mad.
+    uint8_t supports_immad;  ///< Does engine support int8 multi mad.
+
+    std::string dev_name;     ///< Device ID string
+    std::string driver_version;  ///< Version of OpenCL driver
+};
+
+struct engine_impl;
 
 /// @brief Represents clDNN engine object.
 struct engine {
@@ -158,12 +149,8 @@ struct engine {
     /// @brief Construct engine of the specified @p type, @p engine_num, and @p configuration options.
     /// @param[in] type Engine type @ref cldnn_engine_type. Only OCL engine is supported.
     /// @param[in] engine_num Engine index. Should be 0.
-    /// @param[in] configuration Pointer to engine configuration options.
-    engine(engine_types type, uint32_t engine_num, const engine_configuration& configuration = engine_configuration())
-        : _impl(check_status<::cldnn_engine>("failed to create engine", [&](status_t* status) {
-              cldnn_engine_configuration conf = configuration;
-              return cldnn_create_engine(static_cast<int32_t>(type), engine_num, &conf, status);
-          })) {}
+    /// @param[in] configuration Engine configuration options.
+    engine(engine_types type, uint32_t engine_num, const engine_configuration& configuration = engine_configuration());
 
     // TODO add move construction/assignment
     engine(const engine& other) : _impl(other._impl) {
@@ -186,65 +173,35 @@ struct engine {
     friend bool operator!=(const engine& lhs, const engine& rhs) { return !(lhs == rhs); }
 
     /// @brief Returns number of available engines of the particular @p type.
-    static uint32_t engine_count(engine_types type) {
-        return check_status<uint32_t>("engine_count failed", [=](status_t* status) {
-            return cldnn_get_engine_count(static_cast<int32_t>(type), status);
-        });
-    }
+    static uint32_t engine_count(engine_types type);
 
     /// @brief Release pending memory allocated in OpenCL context.
-    void release_pending_memory(uint16_t stream_id) const {
-        check_status<void>("flush_memory failed", [=](status_t* status) {
-            return cldnn_release_pending_memory(_impl, stream_id, status);
-        });
-    }
+    void release_pending_memory(uint16_t stream_id) const;
 
     /// @brief Returns information about properties and capabilities for the engine.
-    engine_info get_info() const {
-        return check_status<engine_info>("engine_count failed", [=](status_t* status) {
-            return cldnn_get_engine_info(_impl, status);
-        });
-    }
+    engine_info get_info() const;
 
     /// @brief Returns total size of all resources allocated using given engine
-    uint64_t get_max_used_device_memory_size() const {
-        return check_status<uint64_t>("get total device memory failed", [=](status_t* status) {
-            return cldnn_get_max_used_device_memory_size(_impl, status);
-        });
-    }
+    uint64_t get_max_used_device_memory_size() const;
 
     /// @brief Returns total size of currently resources allocated using given engine
-    uint64_t get_temp_used_device_memory_size() const {
-        return check_status<uint64_t>("get device memory failed", [=](status_t* status) {
-            return cldnn_get_temp_used_device_memory_size(_impl, status);
-        });
-    }
+    uint64_t get_temp_used_device_memory_size() const;
 
     /// @brief Returns type of the engine.
-    engine_types get_type() const {
-        return check_status<engine_types>("engine_count failed", [=](status_t* status) {
-            return static_cast<engine_types>(cldnn_get_engine_type(_impl, status));
-        });
-    }
+    engine_types get_type() const;
 
     /// @brief get C API engine handler.
-    ::cldnn_engine get() const { return _impl; }
+    engine_impl* get() const { return _impl; }
 
 private:
     friend struct network;
     friend struct memory;
     friend struct event;
-    explicit engine(::cldnn_engine impl) : _impl(impl) {
-        if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null");
-    }
-    ::cldnn_engine _impl;
 
-    void retain() {
-        check_status<void>("retain engine failed", [=](status_t* status) { cldnn_retain_engine(_impl, status); });
-    }
-    void release() {
-        check_status<void>("release engine failed", [=](status_t* status) { cldnn_release_engine(_impl, status); });
-    }
+    engine_impl* _impl;
+
+    void retain();
+    void release();
 };
 CLDNN_API_CLASS(engine)
 
diff --git a/inference-engine/thirdparty/clDNN/api/event.hpp b/inference-engine/thirdparty/clDNN/api/event.hpp
new file mode 100644 (file)
index 0000000..5db6700
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+// Copyright (c) 2016-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "cldnn.hpp"
+#include "engine.hpp"
+#include "profiling.hpp"
+#include <algorithm>
+#include <cassert>
+#include <vector>
+#include <memory>
+#include <functional>
+
+namespace cldnn {
+
+/// @addtogroup cpp_api C++ API
+/// @{
+
+/// @addtogroup cpp_event Events Support
+/// @{
+
+struct event_impl;
+
+/// @brief user-defined event handler callback.
+using event_handler = std::function<void(void*)>;
+
+/// @brief Represents an clDNN Event object
+struct event {
+    /// @brief Create an event which can be set to 'completed' by user.
+    static event create_user_event(const engine& engine, uint16_t stream_id);
+
+    /// @brief Construct from C API handler @ref ::cldnn_event.
+    explicit event(event_impl* impl) : _impl(impl) {
+        if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null");
+    }
+
+    event(const event& other) : _impl(other._impl) {
+        retain();
+    }
+
+    event& operator=(const event& other) {
+        if (_impl == other._impl) return *this;
+        release();
+        _impl = other._impl;
+        retain();
+        return *this;
+    }
+
+    ~event() {
+        release();
+    }
+
+    friend bool operator==(const event& lhs, const event& rhs) { return lhs._impl == rhs._impl; }
+    friend bool operator!=(const event& lhs, const event& rhs) { return !(lhs == rhs); }
+
+    /// @brief Wait for event completion.
+    void wait() const;
+
+    /// @brief Set event status to 'completed'.
+    void set() const;
+
+    /// @brief Register call back to be called on event completion.
+    void set_event_handler(event_handler handler, void* param) const;
+
+    /// @brief Get profiling info for the event associated with network output.
+    std::vector<instrumentation::profiling_interval> get_profiling_info() const;
+
+    /// @brief Returns C API event handler.
+    event_impl* get() const { return _impl; }
+
+private:
+    event_impl* _impl;
+    void retain();
+    void release();
+};
+CLDNN_API_CLASS(event)
+
+/// @}
+/// @}
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/fully_connected.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -50,7 +49,7 @@ namespace cldnn {
 ///        <tr>                            <td >yxfb                   <td>bfyx
 /// </table>
 
-struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_DESC(fully_connected)> {
+struct fully_connected : public primitive_base<fully_connected> {
     CLDNN_DECLARE_PRIMITIVE(fully_connected)
 
     /// @brief Constructs fully connected layer.
@@ -64,8 +63,6 @@ struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_
                     const primitive_id& input,
                     const primitive_id& weights,
                     const primitive_id& bias = "",
-                    bool with_activation = false,
-                    float activation_slp = 0.0f,
                     const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
           weights(weights),
@@ -73,9 +70,7 @@ struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_
           weights_quantization_factors(""),
           output_calibration_factors(""),
           input_quantization_factor(1.0f),
-          output_quantization_factor(1.0f),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp) {}
+          output_quantization_factor(1.0f) {}
 
     /// @brief Constructs fully connected layer.
     /// @param id This primitive id.
@@ -94,8 +89,6 @@ struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_
                     const primitive_id& w_quantization_factor,
                     const float i_quantization_factor,
                     const float o_quantization_factor,
-                    bool with_activation = false,
-                    float activation_slp = 0.0f,
                     const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
           weights(weights),
@@ -103,9 +96,7 @@ struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_
           weights_quantization_factors(w_quantization_factor),
           output_calibration_factors(""),
           input_quantization_factor(i_quantization_factor),
-          output_quantization_factor(o_quantization_factor),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp) {}
+          output_quantization_factor(o_quantization_factor) {}
 
     /// @brief Constructs fully connected layer.
     /// @param id This primitive id.
@@ -124,8 +115,6 @@ struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_
                     const primitive_id& w_quantization_factor,
                     const primitive_id& output_calibration_factors,
                     const float i_quantization_factor,
-                    bool with_activation = false,
-                    float activation_slp = 0.0f,
                     const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
           weights(weights),
@@ -133,21 +122,7 @@ struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_
           weights_quantization_factors(w_quantization_factor),
           output_calibration_factors(output_calibration_factors),
           input_quantization_factor(i_quantization_factor),
-          output_quantization_factor(1.0f),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp) {}
-
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{fully_connected}
-    fully_connected(const dto* dto)
-        : primitive_base(dto),
-          weights(dto->weights),
-          bias(dto->bias),
-          weights_quantization_factors(dto->weights_quantization_factors),
-          output_calibration_factors(dto->output_calibration_factors),
-          input_quantization_factor(dto->input_quantization_factor),
-          output_quantization_factor(dto->output_quantization_factor),
-          with_activation(dto->with_activation != 0),
-          activation_negative_slope(dto->activation_negative_slope) {}
+          output_quantization_factor(1.0f) {}
 
     /// @brief Primitive id containing weights data.
     primitive_id weights;
@@ -161,10 +136,6 @@ struct fully_connected : public primitive_base<fully_connected, CLDNN_PRIMITIVE_
     float input_quantization_factor;
     /// @brief Output quantization factor
     float output_quantization_factor;
-    /// @brief Enable Relu activation.
-    bool with_activation;
-    /// @brief Relu activation slope.
-    float activation_negative_slope;
 
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
@@ -182,19 +153,8 @@ protected:
 
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = weights.c_str();
-        dto.bias = bias.c_str();
-        dto.weights_quantization_factors = weights_quantization_factors.c_str();
-        dto.output_calibration_factors = output_calibration_factors.c_str();
-        dto.input_quantization_factor = input_quantization_factor;
-        dto.output_quantization_factor = output_quantization_factor;
-        dto.with_activation = with_activation;
-        dto.activation_negative_slope = activation_negative_slope;
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/fully_connected_grad_input.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -29,8 +28,7 @@ namespace cldnn {
 /// @{
 
 /// @brief Performs backward fully connected layer (inner product) for input.
-
-struct fully_connected_grad_input : public primitive_base<fully_connected_grad_input, CLDNN_PRIMITIVE_DESC(fully_connected_grad_input)> {
+struct fully_connected_grad_input : public primitive_base<fully_connected_grad_input> {
     CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_input)
 
     /// @brief Constructs fully connected layer grad for input.
@@ -47,11 +45,6 @@ struct fully_connected_grad_input : public primitive_base<fully_connected_grad_i
         : primitive_base(id, {input_grad, input}, output_padding), weights(weights) {
     }
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{fully_connected_grad_input}
-    fully_connected_grad_input(const dto* dto)
-        : primitive_base(dto), weights(dto->weights) {
-    }
-
     /// @brief Primitive id containing weights data.
     primitive_id weights;
 
@@ -59,12 +52,8 @@ protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         return {weights};
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = weights.c_str();
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/fully_connected_grad_weights.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -29,9 +28,8 @@ namespace cldnn {
 /// @{
 
 /// @brief Performs backward fully connected layer (inner product) for weights and biases.
-
 struct fully_connected_grad_weights
-    : public primitive_base<fully_connected_grad_weights, CLDNN_PRIMITIVE_DESC(fully_connected_grad_weights)> {
+    : public primitive_base<fully_connected_grad_weights> {
     CLDNN_DECLARE_PRIMITIVE(fully_connected_grad_weights)
 
     /// @brief Constructs fully connected layer for weights and biases.
@@ -81,15 +79,6 @@ struct fully_connected_grad_weights
           prev_weights_grad(prev_weights_grad),
           prev_bias_grad(prev_bias_grad) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{fully_connected_grad_weights}
-    fully_connected_grad_weights(const dto* dto)
-        : primitive_base(dto),
-          weights(dto->weights),
-          bias(dto->bias),
-          fc_grad(dto->fc_grad),
-          prev_weights_grad(dto->prev_weights_grad),
-          prev_bias_grad(dto->prev_bias_grad) {}
-
     /// @brief Primitive id containing weights data.
     primitive_id weights;
     /// @brief Primitive id containing bias data.
@@ -119,16 +108,8 @@ protected:
 
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = weights.c_str();
-        dto.bias = bias.c_str();
-        dto.fc_grad = fc_grad.c_str();
-        dto.prev_weights_grad = prev_weights_grad.c_str();
-        dto.prev_bias_grad = prev_bias_grad.c_str();
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/gather.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -29,14 +28,14 @@ namespace cldnn {
 
 /// @brief
 /// @details
-struct gather : public primitive_base<gather, CLDNN_PRIMITIVE_DESC(gather)> {
+struct gather : public primitive_base<gather> {
     CLDNN_DECLARE_PRIMITIVE(gather)
 
     enum gather_axis {
-        along_b = cldnn_gather_along_b,
-        along_f = cldnn_gather_along_f,
-        along_x = cldnn_gather_along_x,
-        along_y = cldnn_gather_along_y
+        along_b,
+        along_f,
+        along_x,
+        along_y
     };
 
     /// @brief Constructs gather primitive.
@@ -53,20 +52,10 @@ struct gather : public primitive_base<gather, CLDNN_PRIMITIVE_DESC(gather)> {
            const padding& output_padding = padding())
         : primitive_base(id, {dict, idx}, output_padding), axis(axis), output_shape(output_shape) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{gather}
-    gather(const dto* dto)
-        : primitive_base(dto), axis(static_cast<gather_axis>(dto->axis)), output_shape(dto->output_shape) {}
-
     /// @brief Gathering axis
     gather_axis axis;
     /// @brief Gathering input shape
     tensor output_shape;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.axis = static_cast<cldnn_gather_axis>(axis);
-        dto.output_shape = output_shape;
-    }
 };
 /// @}
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api/gather_tree.hpp b/inference-engine/thirdparty/clDNN/api/gather_tree.hpp
new file mode 100644 (file)
index 0000000..28980cf
--- /dev/null
@@ -0,0 +1,54 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+    /// @addtogroup cpp_api C++ API
+    /// @{
+    /// @addtogroup cpp_topology Network Topology
+    /// @{
+    /// @addtogroup cpp_primitives Primitives
+    /// @{
+
+    /// @brief Performs gather tree
+    ///
+    /// @details Performs gather tree
+struct gather_tree : public primitive_base<gather_tree> {
+    CLDNN_DECLARE_PRIMITIVE(gather_tree)
+
+        /// @brief Constructs gather tree primitive / layer.
+        ///
+        /// @param id                      An identifier of new primitive.
+        /// @param step_input              An identifier of primitive which is an step input
+        /// @param parent_input            An identifier of primitive which is an parent input
+        /// @param step_seq_len_input      An identifier of primitive which is an input that contains
+        ///                                lengths of step sequence (per batch) to perform
+        /// @param end_token               An identifier of primitive which is an input that contains
+        ///                                a value of the end_token
+        /// @param output_padding          Optional padding for output from primitive
+        gather_tree(const primitive_id& id,
+            const primitive_id& step_input,
+            const primitive_id& parent_input,
+            const primitive_id& max_seq_len_input,
+            const primitive_id& end_token,
+            const padding& output_padding = padding())
+        : primitive_base(id, { step_input, parent_input, max_seq_len_input, end_token }, output_padding) {}
+};
+    /// @}
+    /// @}
+    /// @}
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/gemm.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -41,7 +40,7 @@ namespace cldnn {
 /// @n - @c computations with optional params: output = alpha x (input3 x beta + input x input2)
 /// @n - @c transpose params tranposing second matrix <-TODO
 
-struct gemm : public primitive_base<gemm, CLDNN_PRIMITIVE_DESC(gemm)> {
+struct gemm : public primitive_base<gemm> {
     CLDNN_DECLARE_PRIMITIVE(gemm)
 
     /// @brief Constructs gemm layer.
@@ -77,22 +76,6 @@ struct gemm : public primitive_base<gemm, CLDNN_PRIMITIVE_DESC(gemm)> {
     float alpha;
     /// @brief Variable containing BETA parameter
     float beta;
-
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{gemm}
-    gemm(const dto* dto)
-        : primitive_base(dto),
-          transpose_input0(dto->transpose_input0),
-          transpose_input1(dto->transpose_input1),
-          alpha(dto->alpha),
-          beta(dto->beta) {}
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.transpose_input0 = transpose_input0;
-        dto.transpose_input1 = transpose_input1;
-        dto.alpha = alpha;
-        dto.beta = beta;
-    }
 };
 
 }  // namespace cldnn
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "../C/index_select.h"
 #include "primitive.hpp"
 #include <vector>
 
 namespace cldnn {
+
+/// @brief Axis which index_select primitive will index.
+enum class index_select_axis_name {
+    along_b,
+    along_f,
+    along_y,
+    along_x
+};
+
 /// @brief Select index, which will be copied to the output..
 ///
 /// @details Applies index selecting along specified dimension. The indices, which will be copied are specifed by
@@ -43,7 +51,7 @@ namespace cldnn {
 /// @n - @c indices must be a valid primitive_id, which output's layout is: (bfyx/yxfb, i32, {1, 1, indicies_size, 1})
 /// @n - @c axis - valid index_select_axis_name instance.
 /// @n Breaking any of this conditions will cause exeption throw.
-struct index_select : public primitive_base<index_select, CLDNN_PRIMITIVE_DESC(index_select)> {
+struct index_select : public primitive_base<index_select> {
     CLDNN_DECLARE_PRIMITIVE(index_select)
 
     /// @brief Constructs index_select primitive / layer.
@@ -90,21 +98,10 @@ struct index_select : public primitive_base<index_select, CLDNN_PRIMITIVE_DESC(i
         const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), axis(axis), reverse(true) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{broadcast}
-    index_select(const dto* dto)
-        : primitive_base(dto), axis(dto->axis, dto->axis + dto->axis_num), reverse(dto->reverse) {}
-
     /// @brief A list of axes of index selecting
     std::vector<index_select_axis_name> axis;
     /// @brief Do index_select in reverse order on axis/axes.
     bool reverse;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.axis = axis.data();
-        dto.axis_num = static_cast<int>(axis.size());
-        dto.reverse = reverse;
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/input_layout.h"
 #include "primitive.hpp"
 #include "memory.hpp"
 
@@ -35,7 +34,7 @@ namespace cldnn {
 /// @note User should call network::set_input_data() for every @p input_layout primitive before network execution.
 /// @note @p output_padding property of @p input_layout is ignored - its output layout is always equal to input layout defined during object creation.
 /// @sa network::set_input_data(), cldnn::data
-struct input_layout : public primitive_base<input_layout, CLDNN_PRIMITIVE_DESC(input_layout)> {
+struct input_layout : public primitive_base<input_layout> {
     CLDNN_DECLARE_PRIMITIVE(input_layout)
 
     /// @brief Constructs input layout primitive.
@@ -44,23 +43,12 @@ struct input_layout : public primitive_base<input_layout, CLDNN_PRIMITIVE_DESC(i
     input_layout(const primitive_id& id, const layout& layout)
         : primitive_base(id, {}, layout.data_padding), layout(layout) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{input_layout}
-    explicit input_layout(const dto* dto)
-        : primitive_base(dto), layout(dto->layout) {
-        output_padding = layout.data_padding;
-    }
-
     /// @brief Defines layout for the data will be passed to network.
     mutable cldnn::layout layout;
 
-    void change_layout(cldnn::layout new_layout) {
+    void change_layout(const cldnn::layout& new_layout) {
         layout = new_layout;
     }
-
-private:
-    void update_dto(dto& dto) const override {
-        dto.layout = layout;
-    }
 };
 /// @}
 /// @}
@@ -32,15 +32,19 @@ namespace cldnn {
 /// @addtogroup cpp_memory Memory description and management
 /// @{
 
+constexpr size_t float_type_mask = 0x80;
+constexpr size_t uint_type_mask = 0x40;
+constexpr size_t bin_type_mask = 0x20;
+
 /// @brief Possible data types could be stored in memory.
 enum class data_types : size_t {
-    bin = cldnn_bin,
-    i8 = cldnn_i8,  /// Not supported in current HW
-    u8 = cldnn_u8,  ///
-    i32 = cldnn_i32,
-    i64 = cldnn_i64,
-    f16 = cldnn_f16,
-    f32 = cldnn_f32,
+    bin = sizeof(int32_t) | bin_type_mask,
+    u8 = sizeof(uint8_t) | uint_type_mask,
+    i8 = sizeof(int8_t),
+    f16 = sizeof(int16_t) | float_type_mask,
+    f32 = sizeof(float) | float_type_mask,
+    i32 = sizeof(int32_t),
+    i64 = sizeof(int64_t)
 };
 
 class optional_data_type {
@@ -112,11 +116,11 @@ struct data_type_to_type<data_types::f32> { typedef float type; };
 /// Helper class to identify key properties for data_types.
 struct data_type_traits {
     static size_t size_of(data_types data_type) {
-        return (static_cast<uint32_t>(data_type) & ~(CLDNN_FLOAT_TYPE_MASK | CLDNN_UINT_TYPE_MASK | CLDNN_BIN_TYPE_MASK));
+        return (static_cast<uint32_t>(data_type) & ~(float_type_mask | uint_type_mask | bin_type_mask));
     }
 
     static bool is_floating_point(data_types data_type) {
-        return (static_cast<uint32_t>(data_type) & CLDNN_FLOAT_TYPE_MASK) != 0;
+        return (static_cast<uint32_t>(data_type) & float_type_mask) != 0;
     }
 
     static size_t align_of(data_types data_type) {
@@ -248,17 +252,6 @@ struct padding {
     /// @brief Constructs "zero-sized" padding.
     padding() : padding({0, 0, 0, 0}, 0) {}
 
-    /// @brief Copy construction.
-    explicit padding(const cldnn_padding& other)
-        : _lower_size(other.lower_size), _upper_size(other.upper_size), _filling_value(other.filling_value) {}
-
-    /// @brief Implicit conversion to C API @ref cldnn_padding.
-    operator cldnn_padding() const {
-        return {static_cast<cldnn_tensor>(_lower_size),
-                static_cast<cldnn_tensor>(_upper_size),
-                _filling_value};
-    }
-
     /// @brief Returns true if padding size is not zero.
     explicit operator bool() const {
         return std::any_of(_lower_size.raw.begin(), _lower_size.raw.end(), [](const tensor::value_type& el) { return el != 0; }) ||
@@ -310,17 +303,6 @@ struct layout {
     layout(data_types data_type, cldnn::format fmt, tensor size, padding apadding = padding())
         : data_type(data_type), format(fmt), size(size), data_padding(apadding) {}
 
-    /// Construct C++ layout based on C API @p cldnn_layout
-    explicit layout(const cldnn_layout& other) :
-        data_type(static_cast<data_types>(other.data_type)),
-        format(static_cast<cldnn::format::type>(other.format)),
-        size(other.size), data_padding(other.padding) {}
-
-    /// Convert to C API @p cldnn_layout
-    operator cldnn_layout() const {
-        return {static_cast<decltype(cldnn_layout::data_type)>(data_type), static_cast<decltype(cldnn_layout::format)>(format), size, data_padding};
-    }
-
     layout(const layout& other) = default;
 
     layout& operator=(const layout& other) {
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/lookup_table.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -28,7 +27,7 @@ namespace cldnn {
 /// @{
 
 /// @brief Returns values from data on which given indices are pointing at.
-struct lookup_table : public primitive_base<lookup_table, CLDNN_PRIMITIVE_DESC(lookup_table)> {
+struct lookup_table : public primitive_base<lookup_table> {
     CLDNN_DECLARE_PRIMITIVE(lookup_table)
 
     /// @brief Enum type to specify axis to maximize/minimize along.
@@ -48,22 +47,12 @@ struct lookup_table : public primitive_base<lookup_table, CLDNN_PRIMITIVE_DESC(l
           axis(axis),
           with_axis(axis == axis_name::xyf ? false : true) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{lookup_table}
-    lookup_table(const dto* dto)
-        : primitive_base(dto), axis(static_cast<axis_name>(dto->axis)), with_axis(dto->with_axis != 0) {}
-
     /// @brief Axis to return values from. If not set, returns data which index is pointing at in the flattened x, y, f dimensions for each batch.
     axis_name axis;
     /// @brief Indicates that the primitive has user defined axis to return values from.
     bool with_axis;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.with_axis = with_axis;
-        dto.axis = static_cast<cldnn_lookup_table_axis>(axis);
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/lrn.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -27,6 +26,11 @@ namespace cldnn {
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
+typedef enum { /*:int32_t*/
+    lrn_norm_region_across_channel,
+    lrn_norm_region_within_channel
+} lrn_norm_region;
+
 /// @brief Local response normalization
 /// @details LRN layer as described in chapter 3.3 of "ImageNet Classification with Deep Convolutional
 /// Neural Networks" by Khrizevsky, Sutskever, Hinton. @n See: http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
@@ -38,7 +42,7 @@ namespace cldnn {
 ///   @li N : number of feature maps
 ///   @li n : size of normalization
 ///   @li k, alpha, beta : hyper parameters (equal to 2, 10e-4, 0.75 in paper).
-struct lrn : public primitive_base<lrn, CLDNN_PRIMITIVE_DESC(lrn)> {
+struct lrn : public primitive_base<lrn> {
     CLDNN_DECLARE_PRIMITIVE(lrn)
 
     /// @brief Constructs LRN primitive.
@@ -55,7 +59,7 @@ struct lrn : public primitive_base<lrn, CLDNN_PRIMITIVE_DESC(lrn)> {
         float k,
         float alpha,
         float beta,
-        cldnn_lrn_norm_region lrn_norm_region,
+        lrn_norm_region lrn_norm_region,
         const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
           size(size),
@@ -64,15 +68,6 @@ struct lrn : public primitive_base<lrn, CLDNN_PRIMITIVE_DESC(lrn)> {
           beta(beta),
           norm_region(lrn_norm_region) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{normalization}
-    lrn(const dto* dto)
-        : primitive_base(dto),
-          size(dto->size),
-          k(dto->k),
-          alpha(dto->alpha),
-          beta(dto->beta),
-          norm_region(dto->norm_region) {}
-
     /// @brief Size of normalization.
     uint32_t size;
     /// @brief Hyper parameter "k".
@@ -82,18 +77,9 @@ struct lrn : public primitive_base<lrn, CLDNN_PRIMITIVE_DESC(lrn)> {
     /// @brief Hyper parameter "beta".
     float beta;
     /// @brief Normalize across or within channel
-    cldnn_lrn_norm_region norm_region;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.size = size;
-        dto.k = k;
-        dto.alpha = alpha;
-        dto.beta = beta;
-        dto.norm_region = norm_region;
-    }
+    lrn_norm_region norm_region;
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,8 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/lstm.h"
 #include "primitive.hpp"
+#include "activation.hpp"
 #include <vector>
 #include <algorithm>
 
@@ -29,6 +29,34 @@ namespace cldnn {
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
+/// @brief Weights orders
+/// @details Specifies the order in which the weights are concatenated.
+/// e.g. [i, o, f, z] : [input, output, forget, block]
+/// ONNX order: iofz
+/// Caffe order: ifoz
+/// pyTorch order: izof
+/// IE order: fizo
+enum class lstm_weights_order {
+    iofz,
+    ifoz,
+    izof,
+    fizo
+};
+
+/// @brief LSTM Output selection
+/// @details The current implementation allows the use to select the output
+/// of an LSTM node by specifing any of the following options
+enum class lstm_output_selection {
+    /// output the entire hidden sequence
+    sequence = 0,
+    /// output just the last hidden value
+    hidden,
+    /// output the last hidden and last cell values
+    hidden_cell,
+    /// output the hidden sequence concatenated with the last cell
+    sequence_cell
+};
+
 /// @brief Performs forward Long Short-Term Memory (LSTM) layer.
 /// @details The current implementation of LSTM is described the following equations.
 ///   it = f(Xt*(Wi^T) + Ht-1*Ri + Wbi)
@@ -38,7 +66,7 @@ namespace cldnn {
 ///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
 ///   Ht = ot (.) h(Ct)
 /// Where f = Sigmoid, g = Tanh, and h = Tanh.
-struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)> {
+struct lstm : public primitive_base<lstm> {
     CLDNN_DECLARE_PRIMITIVE(lstm)
 
     /// @brief Constructs lstm layer.
@@ -65,10 +93,10 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)> {
          const primitive_id& peepholes = "",
          const float clip = 0,
          const bool input_forget = 0,
-         const std::vector<cldnn_activation_func>& activations = {},
-         const std::vector<cldnn_activation_additional_params> activation_params = {},
-         const cldnn_lstm_output output_selection = cldnn_lstm_output_sequence,
-         const cldnn_lstm_offset_order offset_order = cldnn_lstm_offset_order_iofz,
+         const std::vector<activation_func>& activations = {},
+         const std::vector<activation_additional_params> activation_params = {},
+         const lstm_output_selection output_selection = lstm_output_selection::sequence,
+         const lstm_weights_order offset_order = lstm_weights_order::iofz,
          const padding& output_padding = padding())
         : primitive_base(id, input, output_padding),
           weights(weights),
@@ -84,22 +112,6 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)> {
           output_selection(output_selection),
           offset_order(offset_order) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{lstm}
-    lstm(const dto* dto)
-        : primitive_base(dto),
-          weights(dto->weights),
-          recurrent(dto->recurrent),
-          bias(dto->bias),
-          initial_hidden(dto->initial_hidden),
-          initial_cell(dto->initial_cell),
-          peepholes(dto->peepholes),
-          clip(dto->clip),
-          input_forget(dto->input_forget),
-          activations(dto->activations, std::end(dto->activations)),
-          activation_params(dto->activation_params, std::end(dto->activation_params)),
-          output_selection(dto->output_selection),
-          offset_order(dto->offset_order) {}
-
     /// @brief Primitive id containing weights data.
     primitive_id weights;
     /// @brief Primitive id containing recurrent data.
@@ -117,13 +129,13 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)> {
     /// @brief Couple the input and forget gates if input_forget is 1. Default is 0.
     bool input_forget;
     /// @brief A list of 3 activation functions for the input, output, forget, cell, and hidden.
-    std::vector<cldnn_activation_func> activations;
+    std::vector<activation_func> activations;
     /// @brief Optional scaling values used by some activation functions. The values are consumed in the order of activation functions.
-    std::vector<cldnn_activation_additional_params> activation_params;
+    std::vector<activation_additional_params> activation_params;
     /// @brief Output selection. Default the entire hidden sequence is returned.
-    cldnn_lstm_output output_selection;
+    lstm_output_selection output_selection;
     /// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
-    cldnn_lstm_offset_order offset_order;
+    lstm_weights_order offset_order;
 
     // NOT SUPPORTED YET
     // /// @brief Optional tensor specifying lengths of the sequences in a batch.
@@ -147,30 +159,10 @@ protected:
         }
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = weights.c_str();
-        dto.recurrent = recurrent.c_str();
-        dto.bias = bias.c_str();
-        dto.peepholes = peepholes.c_str();
-        dto.initial_hidden = initial_hidden.c_str();
-        dto.initial_cell = initial_cell.c_str();
-        dto.output_selection = output_selection;
-        dto.offset_order = offset_order;
-        if (activations.size() == 3) {
-            std::copy_n(activations.begin(), 3, dto.activations);
-        }
-        if (activation_params.size() == 3) {
-            std::copy_n(activation_params.begin(), 3, dto.activation_params);
-        }
-        dto.clip = clip;
-        dto.input_forget = input_forget;
-    }
 };
 
-struct lstm_gemm : public primitive_base<lstm_gemm, CLDNN_PRIMITIVE_DESC(lstm_gemm)> {
+struct lstm_gemm : public primitive_base<lstm_gemm> {
     CLDNN_DECLARE_PRIMITIVE(lstm_gemm)
-
     /// @brief Constructs lstm layer.
     /// @param id This primitive id.
     /// @param input input primitive id.
@@ -194,15 +186,6 @@ struct lstm_gemm : public primitive_base<lstm_gemm, CLDNN_PRIMITIVE_DESC(lstm_ge
           hidden(hidden),
           direction(direction) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{lstm}
-    lstm_gemm(const dto* dto)
-        : primitive_base(dto),
-          weights(dto->weights),
-          recurrent(dto->recurrent),
-          bias(dto->bias),
-          hidden(dto->hidden),
-          direction(dto->direction) {}
-
     /// @brief Primitive id containing weights data.
     primitive_id weights;
     /// @brief Primitive id containing recurrent data.
@@ -225,20 +208,12 @@ protected:
             ret.push_back(hidden);
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = weights.c_str();
-        dto.recurrent = recurrent.c_str();
-        dto.bias = bias.c_str();
-        dto.hidden = hidden.c_str();
-        dto.direction = direction;
-    }
 };
 
-struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)> {
+struct lstm_elt : public primitive_base<lstm_elt> {
     CLDNN_DECLARE_PRIMITIVE(lstm_elt)
-    using vec_activation = std::vector<cldnn_activation_func>;
-    using vec_activation_param = std::vector<cldnn_activation_additional_params>;
+    using vec_activation = std::vector<activation_func>;
+    using vec_activation_param = std::vector<activation_additional_params>;
 
     /// @brief Constructs lstm layer.
     /// @param id This primitive id.
@@ -253,9 +228,9 @@ struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)
              const primitive_id& cell = "",
              const float clip = 0,
              const bool input_forget = 0,
-             const std::vector<cldnn_activation_func> activations = {},
-             const std::vector<cldnn_activation_additional_params> activation_params = {},
-             const cldnn_lstm_offset_order offset_order = cldnn_lstm_offset_order_iofz,
+             const std::vector<activation_func> activations = {},
+             const std::vector<activation_additional_params> activation_params = {},
+             const lstm_weights_order offset_order = lstm_weights_order::iofz,
              const uint32_t direction = 0,
              const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
@@ -267,17 +242,6 @@ struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)
           offset_order(offset_order),
           direction(direction) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{lstm}
-    lstm_elt(const dto* dto)
-        : primitive_base(dto),
-          cell(dto->cell),
-          clip(dto->clip),
-          input_forget(dto->input_forget),
-          activations(dto->activations, std::end(dto->activations)),
-          activation_params(dto->activation_params, std::end(dto->activation_params)),
-          offset_order(dto->offset_order),
-          direction(dto->direction) {}
-
     /// @brief Primitive id containing the initial value of the cell state data.
     primitive_id cell;
     /// @brief Cell clip threshold T. It is applied to the input of activations [-T, T]. No clip is applied if it is not specified.
@@ -285,11 +249,11 @@ struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)
     /// @brief Couple the input and forget gates if input_forget is 1. Default is 0.
     bool input_forget;
     /// @brief A list of 3 activation functions for the input, output, forget, cell, and hidden.
-    std::vector<cldnn_activation_func> activations;
+    std::vector<activation_func> activations;
     /// @brief Optional scaling values used by some activation functions. The values are consumed in the order of activation functions.
-    std::vector<cldnn_activation_additional_params> activation_params;
+    std::vector<activation_additional_params> activation_params;
     /// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
-    cldnn_lstm_offset_order offset_order;
+    lstm_weights_order offset_order;
     /// @brief direction default = 0, bidirectional = 1.
     uint32_t direction;
 
@@ -300,23 +264,9 @@ protected:
             ret.push_back(cell);
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.cell = cell.c_str();
-        dto.offset_order = offset_order;
-        dto.clip = clip;
-        dto.input_forget = input_forget;
-        if (activations.size() == 3) {
-            std::copy_n(activations.begin(), 3, dto.activations);
-        }
-        if (activation_params.size() == 3) {
-            std::copy_n(activation_params.begin(), 3, dto.activation_params);
-        }
-        dto.direction = direction;
-    }
 };
 
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/lstm_dynamic.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -37,7 +36,7 @@ namespace cldnn {
 ///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
 ///   Ht = ot (.) h(Ct)
 /// Where f = Sigmoid, g = Tanh, and h = Tanh.
-struct lstm_dynamic : public primitive_base<lstm_dynamic, CLDNN_PRIMITIVE_DESC(lstm_dynamic)> {
+struct lstm_dynamic : public primitive_base<lstm_dynamic> {
     CLDNN_DECLARE_PRIMITIVE(lstm_dynamic)
 
     /// @brief Constructs lstm_dynamic layer.
@@ -78,20 +77,6 @@ struct lstm_dynamic : public primitive_base<lstm_dynamic, CLDNN_PRIMITIVE_DESC(l
           clip(clip),
           input_forget(input_forget) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{lstm_dynamic}
-    lstm_dynamic(const dto* dto)
-        : primitive_base(dto),
-          dyn_length(dto->dyn_length),
-          weights(dto->weights),
-          recurrent(dto->recurrent),
-          last_hidden_state(dto->last_hidden_state),
-          last_cell_state(dto->last_cell_state),
-          bias(dto->bias),
-          initial_hidden(dto->initial_hidden),
-          initial_cell(dto->initial_cell),
-          clip(dto->clip),
-          input_forget(dto->input_forget) {}
-
     /// @brief Primitive id containing the dynamic sequence lengths.
     primitive_id dyn_length;
     /// @brief Primitive id containing weights data.
@@ -137,19 +122,6 @@ protected:
         }
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.dyn_length = dyn_length.c_str();
-        dto.weights = weights.c_str();
-        dto.recurrent = recurrent.c_str();
-        dto.last_hidden_state = last_hidden_state.c_str();
-        dto.last_cell_state = last_cell_state.c_str();
-        dto.bias = bias.c_str();
-        dto.initial_hidden = initial_hidden.c_str();
-        dto.initial_cell = initial_cell.c_str();
-        dto.clip = clip;
-        dto.input_forget = input_forget;
-    }
 };
 
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/max_unpooling.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -30,7 +29,7 @@ namespace cldnn {
 
 /// @brief Performs "max_unpooling" operation.
 /// @details Reverse operation of max pooling, based on the argmax data where indices of each max pooling region are stored.
-struct max_unpooling : public primitive_base<max_unpooling, CLDNN_PRIMITIVE_DESC(max_unpooling)> {
+struct max_unpooling : public primitive_base<max_unpooling> {
     CLDNN_DECLARE_PRIMITIVE(max_unpooling)
 
     /// @brief Constructs max_unpooling primitive.
@@ -73,16 +72,6 @@ struct max_unpooling : public primitive_base<max_unpooling, CLDNN_PRIMITIVE_DESC
           with_output_size(true),
           output_size(output_size) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{max_unpooling}
-    max_unpooling(const dto* dto)
-        : primitive_base(dto),
-          argmax(dto->argmax),
-          input_offset(dto->input_offset),
-          stride(dto->stride),
-          size(dto->size),
-          with_output_size(dto->with_output_size != 0),
-          output_size(dto->output_size) {}
-
     /// @brief Primitive id which contains indices of each max pooling region.
     /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
     primitive_id argmax;
@@ -99,17 +88,8 @@ struct max_unpooling : public primitive_base<max_unpooling, CLDNN_PRIMITIVE_DESC
 
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {argmax}; }
-
-    void update_dto(dto& dto) const override {
-        dto.argmax = argmax.c_str();
-        dto.input_offset = input_offset;
-        dto.stride = stride;
-        dto.size = size;
-        dto.with_output_size = with_output_size;
-        dto.output_size = output_size;
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -17,7 +17,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include <cstdint>
-#include "cldnn_defs.h"
+#include "cldnn.hpp"
 #include "compounds.h"
 #include "layout.hpp"
 #include "engine.hpp"
@@ -36,9 +36,7 @@ namespace cldnn {
 template <typename T>
 struct pointer;
 
-namespace details {
-struct memory_c_to_cpp_converter;
-}
+struct memory_impl;
 
 /// @brief Represents buffer with particular @ref layout.
 /// @details Usually allocated by @ref engine except cases when attached to user-allocated buffer.
@@ -47,18 +45,9 @@ struct memory {
     friend struct mutable_data;
     friend struct network;
     friend struct network_output;
-    friend struct details::memory_c_to_cpp_converter;
 
     /// Allocate memory on @p engine using specified @p layout
-    static memory allocate(const engine& engine, const layout& layout, uint16_t stream_id = 0) {
-        size_t size = layout.bytes_count();
-        if (size == 0)
-            throw std::invalid_argument("size should be more than 0");
-        memory status = (memory) check_status<cldnn_memory>("memory allocation failed", [&](status_t* status) {
-            return cldnn_allocate_memory(engine.get(), layout, stream_id, status);
-        });
-        return status;
-    }
+    static memory allocate(const engine& engine, const layout& layout, uint16_t stream_id = 0);
 
     /// Create memory object attached to the buffer allocated by user.
     /// @param ptr  The pointer to user allocated buffer.
@@ -75,12 +64,16 @@ struct memory {
             throw std::invalid_argument(err_str);
         }
 
-        return (memory) check_status<cldnn_memory>("memory attach failed", [&](status_t* status) {
-            return cldnn_attach_memory(layout, ptr, data_size, stream_id, status);
-        });
+        return attach_impl(layout, static_cast<void*>(ptr), stream_id);
     }
 
-    memory(const memory& other) : _impl(other._impl), _layout(other._layout), _size(other._size), _count(other._count) {
+    explicit memory(memory_impl* data)
+        : _impl(data) {
+        if (_impl == nullptr)
+            throw std::invalid_argument("implementation pointer should not be null");
+    }
+
+    memory(const memory& other) : _impl(other._impl) {
         retain();
     }
 
@@ -89,9 +82,6 @@ struct memory {
             return *this;
         release();
         _impl = other._impl;
-        _layout = other._layout;
-        _size = other._size;
-        _count = other._count;
         retain();
         return *this;
     }
@@ -102,28 +92,19 @@ struct memory {
     friend bool operator!=(const memory& lhs, const memory& rhs) { return !(lhs == rhs); }
 
     /// number of elements of _layout.data_type stored in memory
-    size_t count() const { return _count; }
+    size_t count() const;
 
     /// number of bytes used by memory
-    size_t size() const { return _size; }
+    size_t size() const;
 
     /// Associated @ref layout
-    const layout& get_layout() const { return _layout; }
-    int get_stream_id() const { return get_stream_id_impl(_impl); }
+    const layout& get_layout() const;
+    int get_stream_id() const;
 
     /// Test if memory is allocated by @p engine
-    bool is_allocated_by(const engine& engine) const {
-        auto my_engine = check_status<cldnn_engine>("get memory engine failed", [&](status_t* status) {
-            return cldnn_get_memory_engine(_impl, status);
-        });
-        return my_engine == engine.get();
-    }
+    bool is_allocated_by(const engine& engine) const;
 
-    bool is_the_same_buffer(const memory& other) const {
-        return check_status<bool>("checking if two memories refers to the same buffer failed", [&](status_t* status) {
-            return cldnn_is_the_same_buffer(_impl, other._impl, status) != 0;
-        });
-    }
+    bool is_the_same_buffer(const memory& other) const;
 
     /// Creates the @ref pointer object to get an access memory data
     template <typename T>
@@ -132,72 +113,28 @@ struct memory {
     cldnn::pointer<T> pointer() const;
 
     /// C API memory handle
-    cldnn_memory get() const { return _impl; }
+    memory_impl* get() const { return _impl; }
 
 private:
     friend struct engine;
-    cldnn_memory _impl;
-    layout _layout;
-    size_t _size;
-    size_t _count;
-    int _stream_id;
-
-    static layout get_layout_impl(cldnn_memory mem) {
-        if (!mem)
-            throw std::invalid_argument("mem");
-
-        return check_status<layout>("get memory layout failed",
-                                    [=](status_t* status) { return (layout) cldnn_get_memory_layout(mem, status); });
-    }
-
-    static int get_stream_id_impl(cldnn_memory mem) {
-        if (!mem)
-            throw std::invalid_argument("mem");
-
-        return check_status<int>("get memory layout failed",
-                                 [=](status_t* status) { return cldnn_get_memory_stream_id(mem, status); });
-    }
-
-    explicit memory(cldnn_memory data)
-        : _impl(data),
-          _layout(get_layout_impl(data)),
-          _size(_layout.bytes_count()),
-          _count(_layout.count()),
-          _stream_id(get_stream_id_impl(data)) {
-        if (_impl == nullptr)
-            throw std::invalid_argument("implementation pointer should not be null");
-    }
-
-    void retain() {
-        check_status<void>("retain memory failed", [=](status_t* status) { cldnn_retain_memory(_impl, status); });
-    }
-    void release() {
-        check_status<void>("release memory failed", [=](status_t* status) { cldnn_release_memory(_impl, status); });
-    }
+    memory_impl* _impl;
 
     template <typename T>
     T* lock() const {
-        if (data_type_traits::align_of(_layout.data_type) % alignof(T) != 0) {
+        if (data_type_traits::align_of(get_layout().data_type) % alignof(T) != 0) {
             throw std::logic_error("memory data type alignment do not match");
         }
-        return check_status<T*>("memory lock failed",
-                                [=](status_t* status) { return static_cast<T*>(cldnn_lock_memory(_impl, status)); });
+        return static_cast<T*>(lock_impl());
     }
 
-    void unlock() const {
-        check_status<void>("memory unlock failed",
-                           [=](status_t* status) { return cldnn_unlock_memory(_impl, status); });
-    }
-};
+    void unlock() const;
+
+    void* lock_impl() const;
+    static memory attach_impl(const cldnn::layout& layout, void* ptr, uint16_t stream_id);
 
-namespace details {
-// we need this hackish structure as long as primitives (which are used internally) use c++ api 'memory' (see:
-// cldnn::data)
-struct memory_c_to_cpp_converter {
-    // does not retain @p c_mem
-    static memory convert(cldnn_memory c_mem) { return memory{c_mem}; }
+    void retain();
+    void release();
 };
-}  // namespace details
 
 /// @brief Helper class to get an access @ref memory data
 /// @details
@@ -51,4 +51,4 @@ template <bool Val, bool... Values>
 struct all<Val, Values...> : public std::integral_constant<bool, Val && all<Values...>::value> {};
 
 }  // namespace meta
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/mutable_data.h"
 #include "primitive.hpp"
 #include "memory.hpp"
 #include <vector>
@@ -33,7 +32,7 @@ namespace cldnn {
 /// @details This primitive allows to pass data which can be written to during training.
 /// For example, weights and biases for scoring networks.
 /// This primitive can be also set as other primitive's output. In this case the underlying buffer will be the same in mutable_data and preceding primitive.
-struct mutable_data : public primitive_base<mutable_data, CLDNN_PRIMITIVE_DESC(mutable_data)> {
+struct mutable_data : public primitive_base<mutable_data> {
     CLDNN_DECLARE_PRIMITIVE(mutable_data)
 
     /// @brief Enum type to specify function for data filling.
@@ -59,24 +58,12 @@ struct mutable_data : public primitive_base<mutable_data, CLDNN_PRIMITIVE_DESC(m
                  filler_type fill_type = filler_type::no_fill)
         : primitive_base(id, {input}, padding()), mem(mem), fill_type(fill_type) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{mutable_data}
-    explicit mutable_data(const dto* dto)
-        : primitive_base(dto), mem(dto->mem), fill_type(static_cast<filler_type>(dto->fill_type)) {
-        mem.retain();
-    }
-
     /// @brief @ref memory object which contains data.
     /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build.
     memory mem;
 
     /// @brief Specifies function which will be used to fill weights.
     filler_type fill_type;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.mem = mem.get();
-        dto.fill_type = static_cast<cldnn_filler_type>(fill_type);
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/mvn.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -29,7 +28,7 @@ namespace cldnn {
 
 /// @brief Mean Variance Normalization primitive.
 /// @details Normalizes the input to have 0-mean and/or unit (1) variance.
-struct mvn : public primitive_base<mvn, CLDNN_PRIMITIVE_DESC(mvn)> {
+struct mvn : public primitive_base<mvn> {
     CLDNN_DECLARE_PRIMITIVE(mvn)
 
     /// @brief Constructs mvn primitive.
@@ -49,28 +48,14 @@ struct mvn : public primitive_base<mvn, CLDNN_PRIMITIVE_DESC(mvn)> {
           normalize_variance(normalize_variance),
           epsilon(epsilon) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{mvn}
-    mvn(const dto* dto)
-        : primitive_base(dto),
-          across_channels(dto->across_channels != 0),
-          normalize_variance(dto->normalize_variance != 0),
-          epsilon(dto->epsilon) {}
-
     /// @brief Determines if the normalization is done across or within channels.
     bool across_channels;
     /// @brief Determines if normalize variance is applied.
     bool normalize_variance;
     /// @brief Epsilon for not dividing by zero while normalizing.
     float epsilon;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.across_channels = across_channels;
-        dto.normalize_variance = normalize_variance;
-        dto.epsilon = epsilon;
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/network.hpp b/inference-engine/thirdparty/clDNN/api/network.hpp
new file mode 100644 (file)
index 0000000..f593cd4
--- /dev/null
@@ -0,0 +1,200 @@
+/*
+// Copyright (c) 2016-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "cldnn.hpp"
+#include "compounds.h"
+#include "memory.hpp"
+#include "program.hpp"
+#include "event.hpp"
+
+#include <cstdint>
+#include <algorithm>
+#include <map>
+#include <vector>
+#include <utility>
+#include <string>
+
+namespace cldnn {
+
+/// @addtogroup cpp_api C++ API
+/// @{
+
+/// @defgroup cpp_network Network Execution
+/// @{
+
+/// @brief Represents network output returned by @ref network::get_output().
+struct network_output {
+    /// @brief Returns @ref event associated with the output.
+    event get_event() const { return _event; }
+
+    /// @brief Returns @ref memory object of the output. Blocked until associated @ref event is not complete.
+    memory get_memory() const {
+        _event.wait();
+        return _result;
+    }
+
+private:
+    event _event;
+    memory _result;
+    network_output(event evt, memory mem) : _event(evt), _result(mem) {}
+    friend struct network;
+};
+
+struct network_impl;
+
+/// @brief Executable network allocated from @ref program.
+struct network {
+    /// @brief Allocate network
+    /// @param program The program object which contains compiled primitives this network should allocate memory for.
+    network(program const& program, uint16_t stream_id);
+
+    /// @brief Constructs network object from implicitly created program object. This is a shorthand for network(program(engine, topology, options))
+    /// @param engine
+    /// @param topology
+    /// @param options
+    network(const engine& engine,
+            const topology& topology,
+            const build_options& options = build_options(),
+            uint16_t stream_id = 0)
+        : network(program(engine, topology, options), stream_id) {}
+
+    /// @brief Constructs network object from C API @ref cldnn_network.
+    explicit network(network_impl* impl) : _impl(impl) {
+        if (_impl == nullptr)
+            throw std::invalid_argument("implementation pointer should not be null");
+    }
+
+    /// @brief Copy construction.
+    network(const network& other) : _impl(other._impl) { retain(); }
+
+    /// @brief Copy assignment.
+    network& operator=(const network& other) {
+        if (_impl == other._impl)
+            return *this;
+        release();
+        _impl = other._impl;
+        retain();
+        return *this;
+    }
+
+    /// @brief Releases wrapped C API @ref cldnn_network.
+    ~network() { release(); }
+
+    friend bool operator==(const network& lhs, const network& rhs) { return lhs._impl == rhs._impl; }
+    friend bool operator!=(const network& lhs, const network& rhs) { return !(lhs == rhs); }
+
+    /// @brief Returns @ref engine by which network was built.
+    engine get_engine() const;
+
+    /// @brief Returns network internal @ref program.
+    program get_program() const;
+
+    /// @brief Provides @ref memory for @ref input_layout primitives defined by user in source @ref topology.
+    void set_input_data(const primitive_id& id, const memory& mem) const;
+
+    /// @brief Sets learning rate for training primitives.
+    void set_learning_rate(const float lr);
+
+    /// @brief Return learning rate.
+    float get_learning_rate();
+
+    /// @brief Return stream id.
+    uint16_t get_stream_id();
+
+    std::string get_primitive_info(const primitive_id& id) const;
+
+    /// @brief Returns description of final runtime graph
+    std::vector<primitive_info> get_primitives_info();
+
+    /// @brief Returns description of all optimization stages
+    std::vector<std::pair<std::string, std::vector<primitive_info>>> get_optimization_steps_info();
+
+    /// @brief Returns the list of executed primitives.
+    std::vector<primitive_id> get_executed_primitive_ids() const;
+
+    /// @brief Returns the list of all primitives ids in network.
+    std::vector<primitive_id> get_all_primitive_ids() const;
+
+    /// @brief Returns the list of all primitives ids in network before graph optimization.
+    std::vector<primitive_id> get_all_primitive_org_ids() const;
+
+    /// @brief Returns the list of available network outputs.
+    std::vector<primitive_id> get_output_ids() const;
+
+    /// @brief Returns @ref memory object for particular @p output. Can be called before network execution
+    memory get_output_memory(const primitive_id& output_id) const;
+
+    /// @brief Returns @ref event object for particular @p primitive. Can't be called before network execution
+    event get_primitive_event(const primitive_id& output_id) const;
+
+    /// @brief Returns @ref network_output object for particular @p output. Can't be called before network execution
+    network_output get_output(const primitive_id& output_id) const {
+        return network_output(get_primitive_event(output_id), get_output_memory(output_id));
+    }
+
+    /// @brief Returns the list of @ref event for the primitives that were executed in network.
+    std::map<primitive_id, event> get_executed_primitives() const {
+        auto primitive_ids = get_executed_primitive_ids();
+        auto all_primitive_ids = get_all_primitive_ids();
+        auto all_primitive_org_ids = get_all_primitive_org_ids();
+        // Get list of optimized prmitives
+        std::vector<primitive_id> optimized_primitives;
+        for (decltype(all_primitive_org_ids.size()) i = 0; i < all_primitive_org_ids.size(); i++) {
+            if (all_primitive_ids[i] == "_optimized_")
+                optimized_primitives.push_back(all_primitive_org_ids[i]);
+        }
+        std::map<primitive_id, event> result;
+        for (auto& id : primitive_ids) {
+            if (std::find(optimized_primitives.begin(), optimized_primitives.end(), id) == optimized_primitives.end())
+                result.emplace(id, get_primitive_event(id));
+        }
+        return result;
+    }
+
+    /// @brief Returns the list of primitive ids before and after graph optimization.
+    /// @details If primitive was not optimized, the old and actual id will be the same.
+    /// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_".
+    std::map<primitive_id, primitive_id> get_all_primitives() const {
+        auto primitive_ids = get_all_primitive_ids();
+        auto primitive_org_ids = get_all_primitive_org_ids();
+        std::map<primitive_id, primitive_id> result;
+        for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) {
+            result.emplace(primitive_org_ids[i], primitive_ids[i]);
+        }
+        return result;
+    }
+
+    /// @brief Executes network and returns the list of @ref network_output.
+    /// @param dependencies List of @ref event objects to be waited before network execution.
+    /// @note User should call set_input_data() for every @ref input_layout defined in source @ref topology
+    /// before network execution.
+    std::map<primitive_id, network_output> execute(const std::vector<event>& dependencies = {}) const;
+
+    /// @brief Returns wrapped C API @ref cldnn_network handler.
+    network_impl* get() const { return _impl; }
+
+private:
+    network_impl* _impl;
+
+    void retain();
+    void release();
+};
+CLDNN_API_CLASS(network)
+/// @}
+/// @}
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/normalize.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -44,7 +43,7 @@ namespace cldnn {
 ///   @li in(i,x,y) : value at x, y from i-th feature map before normalization.
 ///   @li norm(i,x,y) : L2 norm as described above.
 ///   @li scale(i) : the scale value of the i-th feature map.
-struct normalize : public primitive_base<normalize, CLDNN_PRIMITIVE_DESC(normalize)> {
+struct normalize : public primitive_base<normalize> {
     CLDNN_DECLARE_PRIMITIVE(normalize)
 
     /// @brief Constructs normalize primitive.
@@ -66,13 +65,6 @@ struct normalize : public primitive_base<normalize, CLDNN_PRIMITIVE_DESC(normali
           across_spatial(across_spatial),
           epsilon(epsilon) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{normalize}
-    normalize(const dto* dto)
-        : primitive_base(dto),
-          scale_input(dto->scale_input),
-          across_spatial(dto->across_spatial != 0),
-          epsilon(dto->epsilon) {}
-
     /// @brief Scale input primitive id with values needed for scaling after the normalization.
     /// Scale x dimension should be 1 (if all channels have the same scale) or equal to input feature size (one scale per channel).
     /// All other dimensions should be 1.
@@ -84,14 +76,8 @@ struct normalize : public primitive_base<normalize, CLDNN_PRIMITIVE_DESC(normali
 
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {scale_input}; }
-
-    void update_dto(dto& dto) const override {
-        dto.scale_input = scale_input.c_str();
-        dto.across_spatial = across_spatial;
-        dto.epsilon = epsilon;
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -14,8 +14,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-
-#include "../C/one_hot.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -49,7 +47,7 @@ namespace cldnn {
 /// @n - input batch size must be equal to 1.
 /// @n
 /// @n Breaking any of this conditions will cause exception throw.
-struct one_hot : public primitive_base<one_hot, CLDNN_PRIMITIVE_DESC(one_hot)> {
+struct one_hot : public primitive_base<one_hot> {
     CLDNN_DECLARE_PRIMITIVE(one_hot)
 
     /// @brief Constructs one-hot primitive layer.
@@ -68,10 +66,6 @@ struct one_hot : public primitive_base<one_hot, CLDNN_PRIMITIVE_DESC(one_hot)> {
         : primitive_base(id, {input}, output_padding), shape(shape), one_hot_axis(one_hot_axis),
           on_value(on_value), off_value(off_value) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{one_hot}
-    one_hot(const dto* dto) : primitive_base(dto), shape(dto->shape), one_hot_axis(dto->one_hot_axis),
-                              on_value(dto->on_value), off_value(dto->off_value) {}
-
     /// @brief Output size reference.
     tensor shape;
     /// @brief One-hot axis position in output shape (0-based, from left to right).
@@ -80,14 +74,6 @@ struct one_hot : public primitive_base<one_hot, CLDNN_PRIMITIVE_DESC(one_hot)> {
     float on_value;
     /// @brief all other locations take value this value.
     float off_value;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.shape = shape;
-        dto.one_hot_axis = one_hot_axis;
-        dto.on_value = on_value;
-        dto.off_value = off_value;
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/permute.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -36,7 +35,7 @@ namespace cldnn {
 /// output_dimensions = { 6, 3, 3, 5 } <br>
 /// <br>
 /// When permute_order is { 0, 1, 2, 3 } then input_dimensions = output_dimensions
-struct permute : public primitive_base<permute, CLDNN_PRIMITIVE_DESC(permute)> {
+struct permute : public primitive_base<permute> {
     CLDNN_DECLARE_PRIMITIVE(permute)
 
     /// @brief Constructs permute primitive.
@@ -49,14 +48,8 @@ struct permute : public primitive_base<permute, CLDNN_PRIMITIVE_DESC(permute)> {
             const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), permute_order(permute_order) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{reorder}
-    permute(const dto* dto) : primitive_base(dto), permute_order(uint16_t_arr_to_vector(dto->permute_order)) {}
-
     /// @brief Array of permuted output order in bfyx format.
     std::vector<uint16_t> permute_order;
-
-protected:
-    void update_dto(dto& dto) const override { dto.permute_order = uint16_t_vector_to_arr(permute_order); }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/pooling.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -31,22 +30,22 @@ namespace cldnn {
 /// @brief Select method for the @ref pooling layer.
 enum class pooling_mode : int32_t {
     /// @brief Maximum-pooling method.
-    max = cldnn_pooling_max,
+    max,
     /// @brief Average-pooling method - values.
-    average = cldnn_pooling_average,
+    average,
     /// @brief Average-pooling method without values which are outside of the input.
-    average_no_padding = cldnn_pooling_average_no_padding,
+    average_no_padding,
     /// @brief Maximum-pooling method with additional buffer to store argmax indices.
-    max_with_argmax = cldnn_pooling_max_with_argmax,
+    max_with_argmax,
     /// @brief Pooling with bilinear interpolation.
-    bilinear = cldnn_pooling_bilinear,
+    bilinear,
     /// @brief Deformable pooling with bilinear interpolation.
-    deformable_bilinear = cldnn_pooling_deformable_bilinear
+    deformable_bilinear
 };
 
 /// @brief Performs "pooling" operation which is a form of non-linear down-sampling.
 /// @details Pools the input image by taking the max, average, etc. within regions.
-struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)> {
+struct pooling : public primitive_base<pooling> {
     CLDNN_DECLARE_PRIMITIVE(pooling)
 
     /// @brief Constructs pooling primitive.
@@ -171,18 +170,6 @@ struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)> {
           size(0, 0, 0, 0),
           with_output_size(false) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{pooling}
-    pooling(const dto* dto)
-        : primitive_base(dto),
-          argmax(dto->argmax),
-          mode(static_cast<pooling_mode>(dto->mode)),
-          global_pooling(dto->global_pooling != 0),
-          input_offset(dto->input_offset),
-          stride(dto->stride),
-          size(dto->size),
-          with_output_size(dto->with_output_size != 0),
-          output_size(dto->output_size) {}
-
     /// @brief Constructs pooling primitive (computes input paddings to match output size).
     /// @param id This primitive id.
     /// @param input Input primitive id.
@@ -251,19 +238,8 @@ protected:
             return {};
         return {argmax};
     }
-
-    void update_dto(dto& dto) const override {
-        dto.mode = static_cast<int32_t>(mode);
-        dto.argmax = argmax.c_str();
-        dto.input_offset = input_offset;
-        dto.stride = stride;
-        dto.size = size;
-        dto.with_output_size = with_output_size;
-        dto.output_size = output_size;
-        dto.global_pooling = global_pooling;
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/primitive.hpp b/inference-engine/thirdparty/clDNN/api/primitive.hpp
new file mode 100644 (file)
index 0000000..314c71d
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "cldnn.hpp"
+#include "compounds.h"
+#include "layout.hpp"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <memory>
+#include <utility>
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+
+/// @addtogroup cpp_topology Network Topology
+/// @{
+
+/// @brief Globally unique primitive's type id
+using primitive_type_id = struct primitive_type *;
+
+/// @brief Unique @p id of a primitive within a topology.
+using primitive_id = std::string;
+
+struct primitive_info;
+
+/// @brief Base class of network primitive description.
+struct primitive {
+public:
+    /// @brief Initialize fields common for all primitives.
+    primitive(const primitive_type_id& type,
+              const primitive_id& id,
+              const std::vector<primitive_id>& input,
+              const padding& output_padding = padding(),
+              const optional_data_type output_data_type = optional_data_type())
+        : type(type),
+          id(id),
+          output_padding(output_padding),
+          output_data_type(output_data_type),
+          input(input) {}
+
+    virtual ~primitive() = default;
+
+    /// @brief Returns references to all primitive ids on which this primitive depends - inputs, weights, biases, etc.
+    std::vector<std::reference_wrapper<primitive_id>> dependencies() {
+        std::vector<std::reference_wrapper<primitive_id>> result;
+        auto&& deps = get_dependencies();
+
+        result.reserve(input.size() + deps.size());
+        for (auto& pid : input) result.push_back(std::ref(pid));
+        for (auto& pid : deps) result.push_back(std::ref(const_cast<primitive_id&>(pid.get())));
+
+        return result;
+    }
+
+    /// @brief Returns copy of all primitive ids on which this primitive depends - inputs, weights, biases, etc.
+    std::vector<primitive_id> dependencies() const {
+        auto result = input;
+        auto deps = get_dependencies();
+        result.insert(result.end(), deps.begin(), deps.end());
+        return result;
+    }
+
+    virtual primitive_id type_string() const = 0;
+
+    /// @brief Implicit conversion to primiitive id.
+    operator primitive_id() const { return id; }
+
+    /// @brief Primitive's type id.
+    const primitive_type_id type;
+
+    /// @brief Primitive's id.
+    const primitive_id id;
+
+    /// @brief Requested output padding.
+    padding output_padding;
+
+    /// @brief Requested output precision, if any.
+    optional_data_type output_data_type;
+
+    size_t input_size() const { return input.size(); }
+
+    using primitive_id_arr = std::vector<primitive_id>;
+
+    /// @brief List of ids of input primitives.
+    primitive_id_arr input;
+
+protected:
+    virtual std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const { return {}; }
+    class condition;
+    friend struct primitive_info;
+};
+
+/// @brief base class for all primitives implementations.
+template <class PType>
+class primitive_base : public primitive {
+protected:
+    explicit primitive_base(const primitive_id& id,
+                            const std::vector<primitive_id>& input,
+                            const padding& output_padding = padding(),
+                            optional_data_type output_data_type = optional_data_type())
+        : primitive(PType::type_id(), id, input, output_padding, output_data_type) {}
+};
+
+struct primitive_info {
+    primitive_info(const primitive_id& original_id,
+                   const std::string& type_id,
+                   const std::vector<primitive_id>& dependencies,
+                   const std::vector<primitive_id>& users,
+                   const std::vector<primitive_id>& fused_ids,
+                   const layout& output_layout,
+                   const std::string& layout_str,
+                   const std::string& kernel_id,
+                   bool is_cpu,
+                   int exec_id)
+        : original_id(original_id),
+          type_id(type_id),
+          c_dependencies(dependencies),
+          c_users(users),
+          c_fused_ids(fused_ids),
+          output_layout(output_layout),
+          layout_str(layout_str),
+          kernel_id(kernel_id),
+          is_cpu(is_cpu),
+          exec_id(exec_id) {}
+
+    primitive_id original_id;
+    std::string type_id;
+    primitive::primitive_id_arr c_dependencies;
+    primitive::primitive_id_arr c_users;
+    primitive::primitive_id_arr c_fused_ids;
+    layout output_layout;
+    std::string layout_str;
+    std::string kernel_id;
+    bool is_cpu;
+    int exec_id;
+};
+
+#define CLDNN_DEFINE_TYPE_ID(PType)     \
+    static primitive_type_id type_id();
+
+#define CLDNN_DEFINE_TYPE_STRING(PType)                 \
+    primitive_id type_string() const override {         \
+        static constexpr const char* type_str = #PType; \
+        return std::string(type_str);                   \
+    }
+
+#define CLDNN_DECLARE_PRIMITIVE(PType)       \
+    CLDNN_DEFINE_TYPE_ID(PType)              \
+    CLDNN_DEFINE_TYPE_STRING(PType)
+
+/// @}
+/// @}
+}  // namespace cldnn
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <cmath>
-
-#include "../C/prior_box.h"
 #include "primitive.hpp"
+
+#include <cmath>
 #include <vector>
 #include <limits>
 
@@ -36,7 +35,7 @@ namespace cldnn {
 /// @details The prior-boxes are shared across all the images in a batch (since they have the same width and height).
 /// First feature stores the mean of each prior coordinate.
 /// Second feature stores the variance of each prior coordinate.
-struct prior_box : public primitive_base<prior_box, CLDNN_PRIMITIVE_DESC(prior_box)> {
+struct prior_box : public primitive_base<prior_box> {
     CLDNN_DECLARE_PRIMITIVE(prior_box)
 
     /// @brief Constructs prior-box primitive.
@@ -107,21 +106,6 @@ struct prior_box : public primitive_base<prior_box, CLDNN_PRIMITIVE_DESC(prior_b
         }
     }
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{prior-box}
-    prior_box(const dto* dto)
-        : primitive_base(dto),
-          img_size(dto->img_size),
-          min_sizes(float_arr_to_vector(dto->min_sizes)),
-          max_sizes(float_arr_to_vector(dto->max_sizes)),
-          aspect_ratios(float_arr_to_vector(dto->aspect_ratios)),
-          flip(dto->flip != 0),
-          clip(dto->clip != 0),
-          variance(float_arr_to_vector(dto->variance)),
-          step_width(dto->step_width),
-          step_height(dto->step_height),
-          offset(dto->offset),
-          scale_all_sizes(dto->scale_all_sizes != 0) {}
-
     /// @brief Image width and height.
     tensor img_size;
     /// @brief  Minimum box sizes in pixels.
@@ -144,31 +128,6 @@ struct prior_box : public primitive_base<prior_box, CLDNN_PRIMITIVE_DESC(prior_b
     float offset;
     /// @broef If false, only first min_size is scaled by aspect_ratios
     bool scale_all_sizes;
-
-private:
-    void update_dto(dto& dto) const override {
-        dto.img_size = img_size;
-        dto.min_sizes = float_vector_to_arr(min_sizes);
-        dto.max_sizes = float_vector_to_arr(max_sizes);
-        dto.aspect_ratios = float_vector_to_arr(aspect_ratios);
-        dto.flip = flip;
-        dto.clip = clip;
-        dto.variance = float_vector_to_arr(variance);
-        dto.step_width = step_width;
-        dto.step_height = step_height;
-        dto.offset = offset;
-        dto.scale_all_sizes = scale_all_sizes;
-    }
-
-    static cldnn_float_arr float_vector_to_arr(const std::vector<float>& stor) { return {stor.data(), stor.size()}; }
-
-    static std::vector<float> float_arr_to_vector(const cldnn_float_arr& arr) {
-        std::vector<float> result(arr.size);
-        for (size_t i = 0; i < arr.size; i++) {
-            result[i] = arr.data[i];
-        }
-        return result;
-    }
 };
 /// @}
 /// @}
@@ -15,7 +15,6 @@
 */
 
 #pragma once
-#include "cldnn_defs.h"
 #include <chrono>
 #include <memory>
 #include <vector>
@@ -68,7 +67,6 @@ private:
 };
 
 /// @brief Represents prifiling interval as its name and value.
-/// @sa @ref ::cldnn_profiling_interval
 struct profiling_interval {
     std::string name;                         ///< @brief Display name.
     std::shared_ptr<profiling_period> value;  ///< @brief Interval value.
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "cldnn_defs.h"
+#include "cldnn.hpp"
 #include "topology.hpp"
 #include "engine.hpp"
 #include <iostream>
@@ -36,23 +36,23 @@ namespace cldnn {
 /// @brief Represents user-provided program build option type.
 enum class build_option_type {
     /// @brief Allow primitives fusing during program build (default: false).
-    fusing = cldnn_build_option_fusing,
+    fusing,
 
     /// @brief Enable implicit reordering for user inputs (default: false).
-    optimize_data = cldnn_build_option_optimize_data,
+    optimize_data,
 
     /// @brief Enable running detection output layer always on gpu, regardless performance
-    detection_output_gpu = cldnn_build_option_detection_output_gpu,
+    detection_output_gpu,
 
     /// @brief Enable debug mode (default: false).
     /// @details This option enforce all program primitives to be accessible as outputs.
-    debug = cldnn_build_option_debug,
+    debug,
 
     /// @brief User selected list of program outputs.
-    outputs = cldnn_build_option_outputs,
+    outputs,
 
     /// @brief User defined learning parameters.
-    learning_config = cldnn_build_option_learning_config,
+    learning_config,
 
     /// @brief Tuning config (default: Tuning is disabled).
     /// @details The tuner will automatically find the optimal kernel/config for each node in the graph,
@@ -60,25 +60,25 @@ enum class build_option_type {
     /// Expect long execution time in the first run.
     /// After the first run a cache with the tuning results will be created in the path provided.
     /// This cache will be used in the next runs.
-    tuning_config = cldnn_build_option_tuning_config,
+    tuning_config,
 
     /// @brief Specifies a directory to which stages of network compilation should be dumped. (default: empty, i.e. no dumping)
-    graph_dumps_dir = cldnn_build_option_graph_dumps_dir,
+    graph_dumps_dir,
     /// @brief Name for serialization process
-    serialize_network = cldnn_build_option_serialization,
-    load_program = cldnn_build_option_load_program
+    serialize_network,
+    load_program
 };
 
 /// @brief Tuning mode.
 enum class tuning_mode {
     /// @brief Tuning is disabled.
-    tuning_disabled = cldnn_tuning_disabled,
+    tuning_disabled,
 
     /// @brief Tuning using the cached data (no on-line tuning for non-existing data).
-    tuning_use_cache = cldnn_tuning_use_cache,
+    tuning_use_cache,
 
     /// @brief Tuning using the cached data if exist, tune and update cache otherwise.
-    tuning_tune_and_cache = cldnn_tuning_tune_and_cache
+    tuning_tune_and_cache
 };
 
 /// @brief Tuning configuration.
@@ -91,8 +91,8 @@ struct tuning_config_options {
 
 /// @brief Learning parameters.
 struct learning_params {
-    float momentum;
-    float weights_decay;
+    float momentum = 0.0;
+    float weights_decay = 0.0;
 
     learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
 };
@@ -141,9 +141,6 @@ private:
     /// @brief Returns option type represented by this object.
     virtual build_option_type get_type() const = 0;
 
-    /// @brief Returns option @ref ::cldnn_build_option::data represented by this object.
-    virtual const void* get_data() const = 0;
-
     friend class build_options;
 };
 
@@ -154,17 +151,11 @@ struct build_option_bool : build_option {
     /// @param value Is option enabled.
     explicit build_option_bool(bool value) : _value(value ? 1 : 0) {}
 
-    /// @brief Constructs from C API @ref ::cldnn_build_option.
-    explicit build_option_bool(const cldnn_build_option& value) : _value(reinterpret_cast<uintptr_t>(value.data)) {
-        assert(value.type == static_cast<int32_t>(OptType));
-    }
-
     /// @brief Is option enabled.
     bool enabled() const { return _value != 0; }
 
 private:
     build_option_type get_type() const override { return OptType; }
-    const void* get_data() const override { return reinterpret_cast<const void*>(_value); }
     uintptr_t _value;
 };
 
@@ -176,47 +167,14 @@ struct build_option_outputs : build_option {
     /// @brief Constructs option.
     /// @param outs List of ouput ids (names)
     explicit build_option_outputs(const std::vector<primitive_id>& outs)
-        : outputs(outs), _ref_store(to_refs(outputs)), _outputs_ref({_ref_store.data(), _ref_store.size()}) {}
-
-    /// @brief Constructs from C API @ref ::cldnn_build_option.
-    explicit build_option_outputs(const cldnn_build_option& value)
-        : build_option_outputs(make_outputs_from_ref(value)) {
-        assert(value.type == static_cast<int32_t>(cldnn_build_option_outputs));
-    }
+        : outputs(outs) {}
 
 private:
     /// @brief Returns build_option_type::outputs.
     build_option_type get_type() const override { return build_option_type::outputs; }
-    /// @brief Returns pointer to @ref cldnn_primitive_is_arr
-    const void* get_data() const override { return &_outputs_ref; }
 
     build_option_outputs(const build_option_outputs& other) = delete;
     build_option_outputs& operator=(const build_option_outputs& other) = delete;
-
-    const std::vector<cldnn_primitive_id> _ref_store;
-    const cldnn_primitive_id_arr _outputs_ref;
-
-    static std::vector<cldnn_primitive_id> to_refs(const std::vector<primitive_id>& stor) {
-        std::vector<cldnn_primitive_id> result(stor.size());
-        for (size_t i = 0; i < stor.size(); i++) {
-            result[i] = stor[i].c_str();
-        }
-        return result;
-    }
-
-    static std::vector<primitive_id> make_outputs_from_ref(const cldnn_build_option& value) {
-        if (value.type != cldnn_build_option_outputs)
-            throw std::invalid_argument("option type does not match: should be 'output'");
-        if (value.data == nullptr)
-            throw std::invalid_argument("output data is empty");
-        auto refs = reinterpret_cast<const cldnn_primitive_id_arr*>(value.data);
-        std::vector<primitive_id> result;
-        result.reserve(refs->size);
-        for (decltype(refs->size) i = 0; i < refs->size; i++) {
-            result.push_back(refs->data[i]);
-        }
-        return result;
-    }
 };
 
 /// @brief @ref build_option specialization for learning config.
@@ -227,36 +185,14 @@ struct build_option_learning_config : build_option {
     /// @brief Constructs learning config build option.
     /// @param learning_params Parameters for learning.
     explicit build_option_learning_config(const learning_params& params)
-        : params(params), params_ref({params.momentum, params.weights_decay}) {}
-
-    /// @brief Constructs learning config build option from C API @ref ::cldnn_build_option.
-    explicit build_option_learning_config(const cldnn_build_option& value)
-        : build_option_learning_config(make_config_from_ref(value)) {
-        assert(value.type == static_cast<int32_t>(cldnn_build_option_learning_config));
-    }
+        : params(params) {}
 
 private:
     /// @brief Returns build_option_type::learning_config.
     build_option_type get_type() const override { return build_option_type::learning_config; }
-    /// @brief Returns pointer to @ref cldnn_learning_params.
-    const void* get_data() const override { return &params_ref; }
 
     build_option_learning_config(const build_option_learning_config& other) = delete;
     build_option_learning_config& operator=(const build_option_learning_config& other) = delete;
-
-    const cldnn_learning_params params_ref;
-
-    static learning_params make_config_from_ref(const cldnn_build_option& value) {
-        if (value.type != cldnn_build_option_learning_config)
-            throw std::invalid_argument("option type does not match: should be 'learning_config'");
-        if (value.data == nullptr)
-            throw std::invalid_argument("Learning params data is empty");
-        auto refs = reinterpret_cast<const cldnn_learning_params*>(value.data);
-        learning_params result;
-        result.momentum = refs->momentum;
-        result.weights_decay = refs->weights_decay;
-        return result;
-    }
 };
 
 /// @brief @ref build_option specialization for tuning config.
@@ -267,36 +203,14 @@ struct build_option_tuning_config : build_option {
     /// @brief Constructs tuning config build option.
     /// @param tuning_config Configuration for the tuning.
     explicit build_option_tuning_config(const tuning_config_options& tuning_config)
-        : config(tuning_config), config_ref({static_cast<int32_t>(config.mode), config.cache_file_path.c_str()}) {}
-
-    /// @brief Constructs tuning config build option from C API @ref ::cldnn_build_option.
-    explicit build_option_tuning_config(const cldnn_build_option& value)
-        : build_option_tuning_config(make_config_from_ref(value)) {
-        assert(value.type == static_cast<int32_t>(cldnn_build_option_tuning_config));
-    }
+        : config(tuning_config) {}
 
 private:
     /// @brief Returns build_option_type::tuning_config.
     build_option_type get_type() const override { return build_option_type::tuning_config; }
-    /// @brief Returns pointer to @ref cldnn_tuning_config
-    const void* get_data() const override { return &config_ref; }
 
     build_option_tuning_config(const build_option_tuning_config& other) = delete;
     build_option_tuning_config& operator=(const build_option_tuning_config& other) = delete;
-
-    const cldnn_tuning_config config_ref;
-
-    static tuning_config_options make_config_from_ref(const cldnn_build_option& value) {
-        if (value.type != cldnn_build_option_tuning_config)
-            throw std::invalid_argument("option type does not match: should be 'tuning_config'");
-        if (value.data == nullptr)
-            throw std::invalid_argument("Tuning config data is empty");
-        auto refs = reinterpret_cast<const cldnn_tuning_config*>(value.data);
-        tuning_config_options result;
-        result.mode = tuning_mode(refs->mode);
-        result.cache_file_path = std::string(refs->cache_file_path);
-        return result;
-    }
 };
 
 /// @brief @ref build_option specialization for selecting a directory.
@@ -308,26 +222,12 @@ struct build_option_directory : build_option {
     /// @param outs List of ouput ids (names)
     explicit build_option_directory(const std::string& dir_path) : directory_path(dir_path) {}
 
-    /// @brief Constructs from C API @ref ::cldnn_build_option.
-    explicit build_option_directory(const cldnn_build_option& value) : directory_path(from_c_value(value)) {}
-
 private:
     /// @brief Returns build_option_type::graph_dumps_dir.
     build_option_type get_type() const override { return build_option_type::graph_dumps_dir; }
-    /// @brief Returns null terminated C string.
-    const void* get_data() const override { return (directory_path.empty() ? nullptr : directory_path.c_str()); }
 
     build_option_directory(const build_option_directory& other) = delete;
     build_option_directory& operator=(const build_option_directory& other) = delete;
-
-    static std::string from_c_value(const cldnn_build_option& value) {
-        if (value.type != static_cast<int32_t>(OptType))
-            throw std::invalid_argument("option type does not match");
-        if (value.data == nullptr)
-            return {};
-
-        return {static_cast<const char*>(value.data)};
-    }
 };
 
 /// @brief @ref build_option specialization for serialization process.
@@ -337,27 +237,11 @@ struct build_option_serialization : build_option {
 
     explicit build_option_serialization(const std::string& name) : serialization_network_name(name) {}
 
-    explicit build_option_serialization(const cldnn_build_option& value)
-        : serialization_network_name(from_c_value(value)) {}
-
 private:
     build_option_type get_type() const override { return build_option_type::serialize_network; }
 
-    const void* get_data() const override {
-        return (serialization_network_name.empty() ? nullptr : serialization_network_name.c_str());
-    }
-
     build_option_serialization(const build_option_serialization& other) = delete;
     build_option_serialization& operator=(const build_option_serialization& other) = delete;
-
-    static std::string from_c_value(const cldnn_build_option& value) {
-        if (value.type != static_cast<int32_t>(OptType))
-            throw std::invalid_argument("option type does not match");
-        if (value.data == nullptr)
-            return {};
-
-        return {static_cast<const char*>(value.data)};
-    }
 };
 
 /// @brief @ref build_option specialization for load_program process.
@@ -367,24 +251,11 @@ struct build_option_load_program : build_option {
 
     explicit build_option_load_program(const std::string& name) : load_program_name(name) {}
 
-    explicit build_option_load_program(const cldnn_build_option& value) : load_program_name(from_c_value(value)) {}
-
 private:
     build_option_type get_type() const override { return build_option_type::load_program; }
 
-    const void* get_data() const override { return (load_program_name.empty() ? nullptr : load_program_name.c_str()); }
-
     build_option_load_program(const build_option_load_program& other) = delete;
     build_option_load_program& operator=(const build_option_load_program& other) = delete;
-
-    static std::string from_c_value(const cldnn_build_option& value) {
-        if (value.type != static_cast<int32_t>(OptType))
-            throw std::invalid_argument("option type does not match");
-        if (value.data == nullptr)
-            return {};
-
-        return {static_cast<const char*>(value.data)};
-    }
 };
 
 namespace detail {
@@ -395,8 +266,6 @@ struct build_option_traits {
     typedef build_option object_type;
     /// @brief Make default @ref build_option corresponding @p OptType
     static std::shared_ptr<const build_option> make_default();
-    /// @brief Make @ref build_option from C API @ref ::cldnn_build_option
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option);
 };
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
@@ -404,91 +273,51 @@ template <>
 struct build_option_traits<build_option_type::fusing> {
     typedef build_option_bool<build_option_type::fusing> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::fusing(); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_fusing);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::optimize_data> {
     typedef build_option_bool<build_option_type::optimize_data> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::optimize_data(); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_optimize_data);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::detection_output_gpu> {
     typedef build_option_bool<build_option_type::detection_output_gpu> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::detection_output_gpu(); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_detection_output_gpu);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::debug> {
     typedef build_option_bool<build_option_type::debug> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::debug(); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_debug);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::outputs> {
     typedef build_option_outputs object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::outputs({}); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_outputs);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::learning_config> {
     typedef build_option_learning_config object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::learning_config(); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_learning_config);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::tuning_config> {
     typedef build_option_tuning_config object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::tuning_config(); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_tuning_config);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::graph_dumps_dir> {
     typedef build_option_directory<build_option_type::graph_dumps_dir> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::graph_dumps_dir({}); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_graph_dumps_dir);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::serialize_network> {
     typedef build_option_serialization<build_option_type::serialize_network> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::serialize_network({}); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_serialization);
-        return std::make_shared<object_type>(option);
-    }
 };
 template <>
 struct build_option_traits<build_option_type::load_program> {
     typedef build_option_load_program<build_option_type::load_program> object_type;
     static std::shared_ptr<const build_option> make_default() { return build_option::load_program({}); }
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        assert(option.type == cldnn_build_option_load_program);
-        return std::make_shared<object_type>(option);
-    }
 };
 
 #endif
@@ -553,13 +382,6 @@ public:
         set_option(args...);
     }
 
-    /// @brief Constructs build options list from C API ::cldnn_build_options.
-    explicit build_options(array_ref<cldnn_build_option> options) {
-        for (auto& o : options) {
-            _options.emplace_back(make_option(o));
-        }
-    }
-
     /// @brief Returns program build option for @p OptType
     template <build_option_type OptType>
     std::shared_ptr<const typename detail::build_option_traits<OptType>::object_type> get() const {
@@ -576,15 +398,6 @@ private:
     std::vector<std::shared_ptr<const build_option>> _options;
     void set_option(void) {}
 
-    /// @brief Returns C API compatible list of ::cldnn_build_option
-    std::vector<cldnn_build_option> get_refs() const {
-        std::vector<cldnn_build_option> result;
-        for (auto& o : _options) {
-            result.push_back({static_cast<int32_t>(o->get_type()), o->get_data()});
-        }
-        return result;
-    }
-
     void add_or_replace_option(std::shared_ptr<const build_option> opt) {
         for (auto& p : _options) {
             if (p->get_type() == opt->get_type()) {
@@ -594,35 +407,10 @@ private:
         }
         _options.push_back(opt);
     }
-
-    static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option) {
-        switch (option.type) {
-            case cldnn_build_option_fusing:
-                return detail::build_option_traits<build_option_type::fusing>::make_option(option);
-            case cldnn_build_option_learning_config:
-                return detail::build_option_traits<build_option_type::learning_config>::make_option(option);
-            case cldnn_build_option_optimize_data:
-                return detail::build_option_traits<build_option_type::optimize_data>::make_option(option);
-            case cldnn_build_option_detection_output_gpu:
-                return detail::build_option_traits<build_option_type::detection_output_gpu>::make_option(option);
-            case cldnn_build_option_debug:
-                return detail::build_option_traits<build_option_type::debug>::make_option(option);
-            case cldnn_build_option_outputs:
-                return detail::build_option_traits<build_option_type::outputs>::make_option(option);
-            case cldnn_build_option_tuning_config:
-                return detail::build_option_traits<build_option_type::tuning_config>::make_option(option);
-            case cldnn_build_option_graph_dumps_dir:
-                return detail::build_option_traits<build_option_type::graph_dumps_dir>::make_option(option);
-            case cldnn_build_option_serialization:
-                return detail::build_option_traits<build_option_type::serialize_network>::make_option(option);
-            case cldnn_build_option_load_program:
-                return detail::build_option_traits<build_option_type::load_program>::make_option(option);
-            default:
-                throw std::out_of_range("unsupported build option type");
-        }
-    }
 };
 
+struct program_impl;
+
 /// @brief Compiled program build from @ref topology by @ref engine
 struct program {
     friend struct network;
@@ -632,17 +420,9 @@ public:
     /// @param[in] engine The engine which will be used to build the program.
     /// @param[in] topology The user-defined topology on which the network will be based.
     /// @param[in] options Program build options. See @ref build_option and @ref build_options for details.
-    program(engine const& engine, topology const& topology, build_options const& options = build_options())
-        : _impl(check_status<cldnn_program>("program creation failed", [&](status_t* status) {
-              auto options_refs = options.get_refs();
-              return cldnn_build_program(engine.get(),
-                                         topology.get(),
-                                         options_refs.data(),
-                                         options_refs.size(),
-                                         status);
-          })) {}
-
-    /// @brief Retains the C API @ref cldnn_program handler stored in @p other.
+    program(engine const& engine, topology const& topology, build_options const& options = build_options());
+
+    /// @brief Copy constructor.
     program(program const& other) : _impl(other._impl) { retain(); }
 
     /// @brief Dereferences the counter of the underlying C API @ref cldnn_program handler.
@@ -664,22 +444,18 @@ public:
     friend bool operator!=(const program& lhs, const program& rhs) { return !(lhs == rhs); }
 
     /// @brief Returns wrapped C API @ref cldnn_program handler.
-    ::cldnn_program get() const { return _impl; }
+    program_impl* get() const { return _impl; }
 
 private:
-    ::cldnn_program _impl;
+    program_impl* _impl;
 
-    explicit program(::cldnn_program impl) : _impl(impl) {
+    explicit program(program_impl* impl) : _impl(impl) {
         if (_impl == nullptr)
             throw std::invalid_argument("implementation pointer should not be null");
     }
 
-    void retain() {
-        check_status<void>("retain topology failed", [=](status_t* status) { cldnn_retain_program(_impl, status); });
-    }
-    void release() {
-        check_status<void>("retain topology failed", [=](status_t* status) { cldnn_release_program(_impl, status); });
-    }
+    void retain();
+    void release();
 };
 /// @}
 /// @}
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <vector>
-
-#include "../C/proposal.h"
 #include "primitive.hpp"
+#include <vector>
 
 namespace cldnn {
 /// @addtogroup cpp_api C++ API
@@ -30,7 +28,9 @@ namespace cldnn {
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
-struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)> {
+#define CLDNN_ROI_VECTOR_SIZE 5
+
+struct proposal : public primitive_base<proposal> {
     CLDNN_DECLARE_PRIMITIVE(proposal)
 
     proposal(const primitive_id& id,
@@ -163,29 +163,6 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
               shift_anchors(shift_anchors),
               normalize(normalize) {}
 
-    proposal(const dto* dto)
-        : primitive_base(dto),
-          max_proposals(dto->max_proposals),
-          iou_threshold(dto->iou_threshold),
-          base_bbox_size(dto->base_bbox_size),
-          min_bbox_size(dto->min_bbox_size),
-          feature_stride(dto->feature_stride),
-          pre_nms_topn(dto->pre_nms_topn),
-          post_nms_topn(dto->post_nms_topn),
-          ratios(float_arr_to_vector(dto->ratios)),
-          scales(float_arr_to_vector(dto->scales)),
-          coordinates_offset(dto->coordinates_offset),
-          box_coordinate_scale(dto->box_coordinate_scale),
-          box_size_scale(dto->box_size_scale),
-          for_deformable(dto->for_deformable != 0),
-          swap_xy(dto->swap_xy != 0),
-          initial_clip(dto->initial_clip != 0),
-          clip_before_nms(dto->clip_before_nms != 0),
-          clip_after_nms(dto->clip_after_nms != 0),
-          round_ratios(dto->round_ratios != 0),
-          shift_anchors(dto->shift_anchors != 0),
-          normalize(dto->normalize != 0) {}
-
     int max_proposals;
     float iou_threshold;
     int base_bbox_size;
@@ -206,30 +183,6 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
     bool round_ratios;
     bool shift_anchors;
     bool normalize;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.max_proposals = max_proposals;
-        dto.iou_threshold = iou_threshold;
-        dto.base_bbox_size = base_bbox_size;
-        dto.min_bbox_size = min_bbox_size;
-        dto.feature_stride = feature_stride;
-        dto.pre_nms_topn = pre_nms_topn;
-        dto.post_nms_topn = post_nms_topn;
-        dto.ratios = float_vector_to_arr(ratios);
-        dto.scales = float_vector_to_arr(scales);
-        dto.coordinates_offset = coordinates_offset;
-        dto.box_coordinate_scale = box_coordinate_scale;
-        dto.box_size_scale = box_size_scale;
-        dto.for_deformable = for_deformable;
-        dto.swap_xy = swap_xy;
-        dto.initial_clip = initial_clip;
-        dto.clip_before_nms = clip_before_nms;
-        dto.clip_after_nms = clip_after_nms;
-        dto.round_ratios = round_ratios;
-        dto.shift_anchors = shift_anchors;
-        dto.normalize = normalize;
-    }
 };
 
 /// @}
@@ -14,7 +14,6 @@
 
 #pragma once
 
-#include "../C/pyramid_roi_align.h"
 #include "primitive.hpp"
 #include <string>
 
@@ -22,7 +21,7 @@ using namespace std;
 
 namespace cldnn {
 
-struct pyramid_roi_align : public primitive_base<pyramid_roi_align, CLDNN_PRIMITIVE_DESC(pyramid_roi_align)> {
+struct pyramid_roi_align : public primitive_base<pyramid_roi_align> {
     CLDNN_DECLARE_PRIMITIVE(pyramid_roi_align)
 
     pyramid_roi_align(const primitive_id &id, const primitive_id &input, const padding &output_padding = padding())
@@ -40,14 +39,5 @@ struct pyramid_roi_align : public primitive_base<pyramid_roi_align, CLDNN_PRIMIT
         : primitive_base(std::string(id_c),
                          {base_str, meta_str, P2_str, P3_str, P4_str, P5_str, pool_size_str},
                          output_padding) {}
-
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{broadcast}
-    pyramid_roi_align(const dto *dto)
-        : primitive_base(dto)
-
-    {}
-
-protected:
-    void update_dto(dto &) const override {}
 };
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/quantize.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -33,7 +32,7 @@ namespace cldnn {
 /// Values input_low and input_high specifies the input range of quantization.
 /// All input values, that are outside this range, clipped to the range before actual quantization.
 /// Values output_low and output_high define minimum and maximum quantized values at the output.
-struct quantize : public primitive_base<quantize, CLDNN_PRIMITIVE_DESC(quantize)> {
+struct quantize : public primitive_base<quantize> {
     CLDNN_DECLARE_PRIMITIVE(quantize)
 
     quantize(const primitive_id& id,
@@ -46,14 +45,8 @@ struct quantize : public primitive_base<quantize, CLDNN_PRIMITIVE_DESC(quantize)
              const padding& output_padding = padding())
         : primitive_base(id, {input, input_low, input_high, output_low, output_high}, output_padding), levels(levels) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{quantize}
-    quantize(const dto* dto) : primitive_base(dto), levels(dto->levels) {}
-
     /// @brief levels The number of quantization levels.
     int levels;
-
-protected:
-    void update_dto(dto& dto) const override { dto.levels = levels; }
 };
 /// @}
 /// @}
@@ -17,7 +17,6 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "../C/reduce.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -32,43 +31,43 @@ namespace cldnn {
 /// @brief Select mode for the @ref reduce layer
 enum class reduce_mode : uint16_t {
     /// @brief Reduce max
-    max = cldnn_reduce_max,
+    max,
     /// @brief Reduce min
-    min = cldnn_reduce_min,
+    min,
     /// @brief Reduce mean
-    mean = cldnn_reduce_mean,
+    mean,
     /// @brief Reduce prod
-    prod = cldnn_reduce_prod,
+    prod,
     /// @brief Reduce sum
-    sum = cldnn_reduce_sum,
+    sum,
     /// @brief Reduce and
-    logical_and = cldnn_reduce_and,
+    logical_and,
     /// @brief Reduce or
-    logical_or = cldnn_reduce_or,
+    logical_or,
     /// @brief Reduce  sum_square
-    sum_square = cldnn_reduce_sum_square,
+    sum_square,
     /// @brief Reduce l1
-    l1 = cldnn_reduce_l1,
+    l1,
     /// @brief Reduce l2
-    l2 = cldnn_reduce_l2,
+    l2,
     /// @brief Reduce log_sum
-    log_sum = cldnn_reduce_log_sum,
+    log_sum,
     /// @brief Reduce sum_exp
-    log_sum_exp = cldnn_reduce_log_sum_exp
+    log_sum_exp
 };
 
 /// @brief Applies the specific reduction function along provided axes (second input) of the input tensor (first input).
 /// @details
-struct reduce : public primitive_base<reduce, CLDNN_PRIMITIVE_DESC(reduce)> {
+struct reduce : public primitive_base<reduce> {
     CLDNN_DECLARE_PRIMITIVE(reduce)
 
     enum reduce_axis {
-        along_b = cldnn_reduce_along_b,
-        along_f = cldnn_reduce_along_f,
-        along_x = cldnn_reduce_along_x,
-        along_y = cldnn_reduce_along_y,
-        along_z = cldnn_reduce_along_z,
-        along_w = cldnn_reduce_along_w
+        along_b,
+        along_f,
+        along_x,
+        along_y,
+        along_z,
+        along_w
     };
 
     /// @brief Constructs reduce primitive
@@ -79,22 +78,12 @@ struct reduce : public primitive_base<reduce, CLDNN_PRIMITIVE_DESC(reduce)> {
            const int32_t keep_dims, const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), mode(mode), axes(axes), keep_dims(keep_dims) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{reduce}
-    reduce(const dto* dto) : primitive_base(dto), mode(static_cast<reduce_mode>(dto->mode)),
-                             axes(uint16_t_arr_to_vector(dto->axes)), keep_dims(dto->keep_dims) {}
     /// @brief Reduce operation type
     reduce_mode mode;
     /// @brief List of axes to reduce
     std::vector<uint16_t> axes;
     /// @brief Keep the reduced dimension or not, 1 mean keep reduced dimension
     int32_t keep_dims;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.mode = static_cast<int32_t>(mode);
-        dto.keep_dims = keep_dims;
-        dto.axes = uint16_t_vector_to_arr(axes);
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/region_yolo.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -31,7 +30,7 @@ namespace cldnn {
 /// @details
 /// @par Algorithm:
 /// @par Where:
-struct region_yolo : public primitive_base<region_yolo, CLDNN_PRIMITIVE_DESC(region_yolo)> {
+struct region_yolo : public primitive_base<region_yolo> {
     CLDNN_DECLARE_PRIMITIVE(region_yolo)
 
     /// @brief Constructs region_yolo primitive.
@@ -53,15 +52,6 @@ struct region_yolo : public primitive_base<region_yolo, CLDNN_PRIMITIVE_DESC(reg
           mask_size(mask_size),
           do_softmax(do_softmax) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{region_yolo}
-    region_yolo(const dto* dto)
-        : primitive_base(dto),
-          coords(dto->coords),
-          classes(dto->classes),
-          num(dto->num),
-          mask_size(dto->mask_size),
-          do_softmax(dto->do_softmax != 0) {}
-
     /// @brief Defines a scope of a region yolo normalization
     /// @details
     /// Specific behaviour is determined by these parameters, as follows:
@@ -70,15 +60,6 @@ struct region_yolo : public primitive_base<region_yolo, CLDNN_PRIMITIVE_DESC(reg
     uint32_t num;
     uint32_t mask_size;
     bool do_softmax;
-
-private:
-    void update_dto(dto& dto) const override {
-        dto.coords = coords;
-        dto.classes = classes;
-        dto.num = num;
-        dto.mask_size = mask_size;
-        dto.do_softmax = do_softmax;
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/reorder.h"
 #include "primitive.hpp"
 #include "memory.hpp"
 #include <vector>
@@ -29,11 +28,19 @@ namespace cldnn {
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
+/// @brief reorder mean operation modes
+enum class reorder_mean_mode {
+    none,      // val
+    subtract,  // val - mean
+    mul,       // val * mean
+    div,       // val/mean
+};
+
 /// @brief Changes how data is ordered in memory. Value type is not changed & all information is preserved.
 /// @details Corresponding values are bitwise equal before/after reorder.
 /// Also merged with subtraction layer, which can subtract, multiply or divide values based on mean_mode value, while doing reordering.
 /// NOTE THAT THIS WILL SUBTRACT THE SAME VALUES FROM EACH BATCH.
-struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)> {
+struct reorder : public primitive_base<reorder> {
     CLDNN_DECLARE_PRIMITIVE(reorder)
 
     /// @brief Constructs reorder primitive with directly provided mean subtract values.
@@ -45,7 +52,7 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)> {
             const primitive_id& input,
             const layout& output_layout,
             const std::vector<float>& values_to_subtract = {},
-            const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract)
+            const reorder_mean_mode mode = reorder_mean_mode::subtract)
         : primitive_base(id, {input}, output_layout.data_padding, optional_data_type {output_layout.data_type}),
           output_format(output_layout.format),
           mean(""),
@@ -61,7 +68,7 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)> {
             const primitive_id& input,
             const layout& output_layout,
             primitive_id const& mean,
-            const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract)
+            const reorder_mean_mode mode = reorder_mean_mode::subtract)
         : primitive_base(id, {input}, output_layout.data_padding, optional_data_type {output_layout.data_type}),
           output_format(output_layout.format),
           mean(mean),
@@ -78,7 +85,7 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)> {
             format output_format,
             data_types output_data_type,
             const std::vector<float>& values_to_subtract = {},
-            const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract,
+            const reorder_mean_mode mode = reorder_mean_mode::subtract,
             const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
           output_format(output_format),
@@ -96,7 +103,7 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)> {
             format output_format,
             data_types output_data_type,
             primitive_id const& mean,
-            const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract,
+            const reorder_mean_mode mode = reorder_mean_mode::subtract,
             const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding, optional_data_type {output_data_type}),
           output_format(output_format),
@@ -104,14 +111,6 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)> {
           subtract_per_feature(0),
           mean_mode(mode) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{reorder}
-    reorder(const dto* dto)
-        : primitive_base(dto),
-          output_format(dto->output_format),
-          mean(dto->mean_subtract),
-          subtract_per_feature(float_arr_to_vector(dto->subtract_per_feature)),
-          mean_mode(dto->mean_mode) {}
-
     /// @brief Requested memory format.
     format output_format;
     /// @brief Primitive id to get mean subtract values. Ignored if subtract_per_featrue is set.
@@ -119,7 +118,7 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)> {
     /// @brief Array of mean subtract values.
     std::vector<float> subtract_per_feature;
     /// @brief Mode of mean execution
-    cldnn_reorder_mean_mode mean_mode;
+    reorder_mean_mode mean_mode;
 
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
@@ -127,14 +126,8 @@ protected:
             return {};
         return {mean};
     }
-
-    void update_dto(dto& dto) const override {
-        dto.output_format = static_cast<cldnn_format_type>(output_format.value);
-        dto.mean_subtract = mean.c_str();
-        dto.subtract_per_feature = float_vector_to_arr(subtract_per_feature);
-        dto.mean_mode = mean_mode;
-    }
 };
+
 /// @}
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/reorg_yolo.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -31,7 +30,7 @@ namespace cldnn {
 /// @details
 /// @par Algorithm:
 /// @par Where:
-struct reorg_yolo : public primitive_base<reorg_yolo, CLDNN_PRIMITIVE_DESC(reorg_yolo)> {
+struct reorg_yolo : public primitive_base<reorg_yolo> {
     CLDNN_DECLARE_PRIMITIVE(reorg_yolo)
 
     /// @brief Constructs region_yolo primitive.
@@ -44,16 +43,10 @@ struct reorg_yolo : public primitive_base<reorg_yolo, CLDNN_PRIMITIVE_DESC(reorg
                const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), stride(stride) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{region_yolo}
-    reorg_yolo(const dto* dto) : primitive_base(dto), stride(dto->stride) {}
-
     /// @brief Defines a scope of a reorg yolo normalization
     /// @details
     /// Specific behaviour is determined by these parameters, as follows:
     uint32_t stride;
-
-private:
-    void update_dto(dto& dto) const override { dto.stride = stride; }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/reshape.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -32,7 +31,7 @@ namespace cldnn {
 /// @note reshape primitive is supposed only to reinterpret shape of the memory therefore it's not possible to change
 /// neither data type nor format of the input buffer and total number of elements in input and output (excluding paddings) must match.
 /// Please note that there is no guarantee that underlying data will be in proper format if primitive was explicitly added to output list.
-struct reshape : public primitive_base<reshape, CLDNN_PRIMITIVE_DESC(reshape)> {
+struct reshape : public primitive_base<reshape> {
     CLDNN_DECLARE_PRIMITIVE(reshape)
 
     /// @brief Constructs reshape primitive.
@@ -48,17 +47,11 @@ struct reshape : public primitive_base<reshape, CLDNN_PRIMITIVE_DESC(reshape)> {
             const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), output_shape(output_shape) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{reshape}
-    reshape(const dto* dto) : primitive_base(dto), output_shape(dto->output_shape) {}
-
     /// @brief Requested memory shape.
     tensor output_shape;
-
-protected:
-    void update_dto(dto& dto) const override { dto.output_shape = output_shape; }
 };
 
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -17,7 +17,6 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "../C/reverse_sequence.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -30,7 +29,7 @@ namespace cldnn {
 
 /// @brief
 /// @details
-struct reverse_sequence : public primitive_base<reverse_sequence, CLDNN_PRIMITIVE_DESC(reverse_sequence)> {
+struct reverse_sequence : public primitive_base<reverse_sequence> {
     CLDNN_DECLARE_PRIMITIVE(reverse_sequence)
 
     /// @brief Constructs reverse_sequence primitive.
@@ -67,19 +66,10 @@ struct reverse_sequence : public primitive_base<reverse_sequence, CLDNN_PRIMITIV
             throw std::runtime_error("Incorrect sequence axis value! Actual axis is" + std::to_string(seq_a));
     }
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{reverse_sequence}
-    reverse_sequence(const dto* dto) : primitive_base(dto), seq_axis(dto->seq_axis), batch_axis(dto->batch_axis) {}
-
     /// @brief The axis which is partially reversed.
     int32_t seq_axis;
     /// @brief The axis along which reversal is performed.
     int32_t batch_axis;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.seq_axis = seq_axis;
-        dto.batch_axis = batch_axis;
-    }
 };
 /// @}
 /// @}
@@ -17,7 +17,6 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include "pooling.hpp"
-#include "../C/roi_pooling.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -29,7 +28,7 @@ namespace cldnn {
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
-struct roi_pooling : public primitive_base<roi_pooling, CLDNN_PRIMITIVE_DESC(roi_pooling)> {
+struct roi_pooling : public primitive_base<roi_pooling> {
     CLDNN_DECLARE_PRIMITIVE(roi_pooling)
 
     roi_pooling(const primitive_id& id,
@@ -83,21 +82,6 @@ struct roi_pooling : public primitive_base<roi_pooling, CLDNN_PRIMITIVE_DESC(roi
           spatial_bins_x(spatial_bins_x),
           spatial_bins_y(spatial_bins_y) {}
 
-    roi_pooling(const dto* dto)
-        : primitive_base(dto),
-          mode(static_cast<pooling_mode>(dto->mode)),
-          position_sensitive(dto->position_sensitive),
-          pooled_width(dto->pooled_width),
-          pooled_height(dto->pooled_height),
-          spatial_scale(dto->spatial_scale),
-          trans_std(dto->trans_std),
-          no_trans(dto->no_trans),
-          output_dim(dto->output_dim),
-          part_size(dto->part_size),
-          group_size(dto->group_size),
-          spatial_bins_x(dto->spatial_bins_x),
-          spatial_bins_y(dto->spatial_bins_y) {}
-
     pooling_mode mode;
     bool position_sensitive;
     int pooled_width;
@@ -110,22 +94,6 @@ struct roi_pooling : public primitive_base<roi_pooling, CLDNN_PRIMITIVE_DESC(roi
     int group_size;
     int spatial_bins_x;
     int spatial_bins_y;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.mode = static_cast<int32_t>(mode);
-        dto.position_sensitive = position_sensitive;
-        dto.pooled_width = pooled_width;
-        dto.pooled_height = pooled_height;
-        dto.spatial_scale = spatial_scale;
-        dto.trans_std = trans_std;
-        dto.no_trans = no_trans;
-        dto.part_size = part_size;
-        dto.group_size = group_size;
-        dto.output_dim = output_dim;
-        dto.spatial_bins_x = spatial_bins_x;
-        dto.spatial_bins_y = spatial_bins_y;
-    }
 };
 
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/scale.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -41,7 +40,7 @@ namespace cldnn {
 /// Performs scale over feature when the scale feature size is equal to input feature size.<br>
 /// Performs scale over feature in batch when the scale feature and scale batch sizes are equal to input feature and input batch sizes.<br>
 /// Optionally it can also add provided biases by providing bias data.<br>
-struct scale : public primitive_base<scale, CLDNN_PRIMITIVE_DESC(scale)> {
+struct scale : public primitive_base<scale> {
     CLDNN_DECLARE_PRIMITIVE(scale)
 
     /// @brief Constructs scale primitive without adding bias.
@@ -68,12 +67,6 @@ struct scale : public primitive_base<scale, CLDNN_PRIMITIVE_DESC(scale)> {
           const padding& output_padding = padding())
         : primitive_base(id, {input, scale_input}, output_padding), bias(bias) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{scale}
-    scale(const dto* dto) : primitive_base(dto), bias(dto->bias) {
-        if (dto->input.size != 2)
-            throw std::invalid_argument("scale dto should contains exactly 2 inputs");
-    }
-
     /// @brief Primitive id containing bias data.
     primitive_id bias;
 
@@ -84,8 +77,6 @@ protected:
         else
             return {bias};
     }
-
-    void update_dto(dto& dto) const override { dto.bias = bias.c_str(); }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/scale_grad_input.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -29,7 +28,7 @@ namespace cldnn {
 /// @{
 
 /// @brief Performs scale primitive backward for input.
-struct scale_grad_input : public primitive_base<scale_grad_input, CLDNN_PRIMITIVE_DESC(scale_grad_input)> {
+struct scale_grad_input : public primitive_base<scale_grad_input> {
     CLDNN_DECLARE_PRIMITIVE(scale_grad_input)
 
     /// @brief Constructs scale_grad_input.
@@ -43,16 +42,8 @@ struct scale_grad_input : public primitive_base<scale_grad_input, CLDNN_PRIMITIV
                      const padding& output_padding = padding())
         : primitive_base(id, {input, scale_input}, output_padding) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{scale_grad_input}
-    scale_grad_input(const dto* dto) : primitive_base(dto) {
-        if (dto->input.size != 2)
-            throw std::invalid_argument("scale_grad_input dto should contains exactly 2 inputs");
-    }
-
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
-
-    void update_dto(dto&) const override {}
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/scale_grad_weights.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -29,7 +28,7 @@ namespace cldnn {
 /// @{
 
 /// @brief Performs scale layer backward for scale_input and biases.
-struct scale_grad_weights : public primitive_base<scale_grad_weights, CLDNN_PRIMITIVE_DESC(scale_grad_weights)> {
+struct scale_grad_weights : public primitive_base<scale_grad_weights> {
     CLDNN_DECLARE_PRIMITIVE(scale_grad_weights)
 
     /// @brief Constructs scale_grad_weights primitive without bias.
@@ -97,15 +96,6 @@ struct scale_grad_weights : public primitive_base<scale_grad_weights, CLDNN_PRIM
           prev_bias_grad(prev_bias_grad),
           scale_grad(scale_grad) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{scale_grad_weights}
-    scale_grad_weights(const dto* dto)
-        : primitive_base(dto),
-          scale_input(dto->scale_input),
-          bias(dto->bias),
-          prev_scale_grad(dto->prev_scale_grad),
-          prev_bias_grad(dto->prev_bias_grad),
-          scale_grad(dto->scale_grad) {}
-
     /// @brief Scale input primitive id.
     primitive_id scale_input;
     /// @brief Primitive id containing bias data.
@@ -134,14 +124,6 @@ protected:
 
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.bias = bias.c_str();
-        dto.scale_input = scale_input.c_str();
-        dto.prev_scale_grad = prev_scale_grad.c_str();
-        dto.prev_bias_grad = prev_bias_grad.c_str();
-        dto.scale_grad = scale_grad.c_str();
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/select.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -32,7 +31,7 @@ namespace cldnn {
 /// - both inputs have to have equal sizes in all dimensions
 /// - format of both inputs has to be the same
 /// - mask primitive input have to have equal size in all dimensions with inputs
-struct select : public primitive_base<select, CLDNN_PRIMITIVE_DESC(select)> {
+struct select : public primitive_base<select> {
     CLDNN_DECLARE_PRIMITIVE(select)
 
     /// @brief Constructs select primitive.
@@ -46,12 +45,6 @@ struct select : public primitive_base<select, CLDNN_PRIMITIVE_DESC(select)> {
            const primitive_id& mask,
            const padding& output_padding = padding())
         : primitive_base(id, {input, input2, mask}, output_padding) {}
-
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{select}
-    select(const dto* dto) : primitive_base(dto) {}
-
-protected:
-    void update_dto(dto&) const override {}
 };
 /// @}
 /// @}
@@ -17,7 +17,6 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "../C/shuffle_channels.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -30,7 +29,7 @@ namespace cldnn {
 
 /// @brief
 /// @details
-struct shuffle_channels : public primitive_base<shuffle_channels, CLDNN_PRIMITIVE_DESC(shuffle_channels)> {
+struct shuffle_channels : public primitive_base<shuffle_channels> {
     CLDNN_DECLARE_PRIMITIVE(shuffle_channels)
 
     /// @brief Constructs shuffle_channels primitive.
@@ -45,19 +44,10 @@ struct shuffle_channels : public primitive_base<shuffle_channels, CLDNN_PRIMITIV
                      const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), group(group), axis(axis) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{shuffle_channels}
-    shuffle_channels(const dto* dto) : primitive_base(dto), group(dto->group), axis(dto->axis) {}
-
     /// @brief The number of groups to split the channel dimension. This number must evenly divide the channel dimension size.
     int32_t group;
     /// @brief The index of the channel dimension (default is 1).
     int32_t axis;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.group = group;
-        dto.axis = axis;
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/softmax.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -35,17 +34,17 @@ namespace cldnn {
 ///   @li N : number of values to normalize
 ///   @li b : value after normalization
 ///   @li a : value before normalization
-struct softmax : public primitive_base<softmax, CLDNN_PRIMITIVE_DESC(softmax)> {
+struct softmax : public primitive_base<softmax> {
     CLDNN_DECLARE_PRIMITIVE(softmax)
 
     /// @brief Enum type to specify softmax's normalization scope (see #dimension).
     enum dimension_t {
-        normalize_f = cldnn_softmax_normalize_f,
-        normalize_x = cldnn_softmax_normalize_x,
-        normalize_y = cldnn_softmax_normalize_y,
-        normalize_z = cldnn_softmax_normalize_z,
-        normalize_fyx = cldnn_softmax_normalize_fyx,
-        normalize_all = cldnn_softmax_normalize_all,
+        normalize_f,
+        normalize_x,
+        normalize_y,
+        normalize_z,
+        normalize_fyx,
+        normalize_all
     };
 
     /// @brief Constructs softmax primitive.
@@ -58,9 +57,6 @@ struct softmax : public primitive_base<softmax, CLDNN_PRIMITIVE_DESC(softmax)> {
             const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), dimension(dimension) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{softmax}
-    softmax(const dto* dto) : primitive_base(dto), dimension(static_cast<dimension_t>(dto->dimension)) {}
-
     /// @brief Defines a scope of a single softmax normalization.
     /// @details
     /// Being given a 4-dimensional input, which consists of b,f,y,x dimensions, softmax normalizes data which are divided into multiple independent sets.
@@ -71,9 +67,6 @@ struct softmax : public primitive_base<softmax, CLDNN_PRIMITIVE_DESC(softmax)> {
     /// - when set to @link softmax::dimension_t softmax::normalize_fyx @endlink each 3d image within input is normalized independently,
     /// - when set to @link softmax::dimension_t softmax::normalize_bfyx @endlink everything is normalized,
     dimension_t dimension;
-
-private:
-    void update_dto(dto& dto) const override { dto.dimension = static_cast<cldnn_softmax_dimension>(dimension); }
 };
 /// @}
 /// @}
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/softmax_loss_grad.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -29,7 +28,7 @@ namespace cldnn {
 
 /// @brief Backward pass for Softmax log loss.
 /// @details The output values are the same as input_prob, except for the correct one based on the label which is subtracted by 1.
-struct softmax_loss_grad : public primitive_base<softmax_loss_grad, CLDNN_PRIMITIVE_DESC(softmax_loss_grad)> {
+struct softmax_loss_grad : public primitive_base<softmax_loss_grad> {
     CLDNN_DECLARE_PRIMITIVE(softmax_loss_grad)
 
     /// @brief Constructs softmax_loss_grad primitive.
@@ -41,14 +40,8 @@ struct softmax_loss_grad : public primitive_base<softmax_loss_grad, CLDNN_PRIMIT
                       const primitive_id& labels,
                       const padding& output_padding = padding())
         : primitive_base(id, {input_prob, labels}, output_padding) {}
-
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{softmax_loss_grad}
-    softmax_loss_grad(const dto* dto) : primitive_base(dto) {}
-
-private:
-    void update_dto(dto&) const override {}
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/split.h"
 #include "primitive.hpp"
 #include <vector>
 #include <utility>
@@ -49,7 +48,7 @@ namespace cldnn {
 /// @n output_ids_offsets[0] = { "out0", { 0,0,0,0 } };
 /// @n output_ids_offsets[1] = { "out1", { 0,2,0,0 } };
 /// @n After split there would be 2 primitives: "split:out0" and "split:out1" which contain 2 feature maps (lower and upper)
-struct split : public primitive_base<split, CLDNN_PRIMITIVE_DESC(split)> {
+struct split : public primitive_base<split> {
     CLDNN_DECLARE_PRIMITIVE(split)
 
     /// @brief Constructs split primitive.
@@ -61,33 +60,15 @@ struct split : public primitive_base<split, CLDNN_PRIMITIVE_DESC(split)> {
           const std::vector<std::pair<primitive_id, tensor> >& output_ids_offsets,
           const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          output_ids(_output_ids.cpp_ids),
           output_offsets(extract_tensor_vector(output_ids_offsets)),
-          _output_ids(extract_primitive_vector(output_ids_offsets)),
-          _output_offsets(tensor_vector_to_cldnn_vector(output_offsets)) {}
+          output_ids(extract_primitive_vector(output_ids_offsets)) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{split}
-    split(const dto* dto)
-        : primitive_base(dto),
-          output_ids(_output_ids.cpp_ids),
-          output_offsets(tensor_arr_to_vector(dto->output_offsets)),
-          _output_ids(dto->output_ids),
-          _output_offsets(tensor_arr_to_cldnn_vector(dto->output_offsets)) {}
-
-    /// @brief List of output_ids.
-    fixed_size_vector_ref output_ids;
     /// @brief Array of tensors with offsets.
     std::vector<tensor> output_offsets;
+    /// @brief List of output_ids.
+    const primitive_id_arr output_ids;
 
 protected:
-    primitive_id_arr _output_ids;
-    std::vector<cldnn_tensor> _output_offsets;
-
-    void update_dto(dto& dto) const override {
-        dto.output_ids = _output_ids.ref();
-        dto.output_offsets = tensor_vector_to_arr(_output_offsets);
-    }
-
     static std::vector<primitive_id> extract_primitive_vector(
         const std::vector<std::pair<primitive_id, tensor> >& stor) {
         std::vector<primitive_id> res;
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/strided_slice.h"
 #include "primitive.hpp"
 #include <vector>
 
@@ -30,7 +29,7 @@ namespace cldnn {
 
 /// @brief
 /// @details
-struct strided_slice : public primitive_base<strided_slice, CLDNN_PRIMITIVE_DESC(strided_slice)> {
+struct strided_slice : public primitive_base<strided_slice> {
     CLDNN_DECLARE_PRIMITIVE(strided_slice)
 
     /// @brief Constructs strided_slice primitive.
@@ -59,14 +58,6 @@ struct strided_slice : public primitive_base<strided_slice, CLDNN_PRIMITIVE_DESC
           new_axis_mask(new_axis_mask),
           shrink_axis_mask(shrink_axis_mask) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{strided_slice}
-    strided_slice(const dto* dto)
-        : primitive_base(dto),
-          begin_mask(uint8_t_arr_to_vector(dto->begin_mask)),
-          end_mask(uint8_t_arr_to_vector(dto->end_mask)),
-          new_axis_mask(uint8_t_arr_to_vector(dto->new_axis_mask)),
-          shrink_axis_mask(uint8_t_arr_to_vector(dto->shrink_axis_mask)) {}
-
     /// @param begin_mask Array of bits, that provide replace begin[i] to max possible range in that dimension.
     std::vector<uint8_t> begin_mask;
     /// @param end_mask Array of bits, that provide replace end[i] to max possible range in that dimension.
@@ -75,14 +66,6 @@ struct strided_slice : public primitive_base<strided_slice, CLDNN_PRIMITIVE_DESC
     std::vector<uint8_t> new_axis_mask;
     /// @param shrink_axis_mask Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i].
     std::vector<uint8_t> shrink_axis_mask;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.begin_mask = uint8_t_vector_to_arr(begin_mask);
-        dto.end_mask = uint8_t_vector_to_arr(end_mask);
-        dto.new_axis_mask = uint8_t_vector_to_arr(new_axis_mask);
-        dto.shrink_axis_mask = uint8_t_vector_to_arr(shrink_axis_mask);
-    }
 };
 /// @}
 /// @}
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "cldnn_defs.h"
+#include "cldnn.hpp"
 #include "compounds.h"
 #include "meta_utils.hpp"
 
@@ -85,73 +85,78 @@ struct format_traits {
 struct format {
     enum type : int32_t {
         // Data formats
-        yxfb                 = cldnn_format_yxfb,                  ///< batch first, feature and than spatials \n \image html yxfb.jpg
-        byxf                 = cldnn_format_byxf,                  ///< used in bitmaps, input from user i.e b images of RGB format \n \image html byxf.jpg
-        bfyx                 = cldnn_format_bfyx,                  ///< the most common format for activations in clDNN. \n \image html bfyx.jpg
-        fyxb                 = cldnn_format_fyxb,                  ///< format not used inside clDNN, but supported in reorder as extension
-                                                                   ///< for user provided formats.
-        bfyx_f16             = cldnn_format_bfyx_f16,              ///< format used for blocked convolution
-        bs_xs_xsv8_bsv8      = cldnn_format_bs_xs_xsv8_bsv8,       ///< format used only for fully connected weights: bs - batch slice,
-                                                                   ///< xs - x slice, bsv8 - 8 values of single slice.
-        bs_xs_xsv8_bsv16     = cldnn_format_bs_xs_xsv8_bsv16,      ///< format used only for fully connected weights: bs - batch slice,
-                                                                   ///< xs - x slice, bsv16 - 16 values of single slice.
-        bs_x_bsv16           = cldnn_format_bs_x_bsv16,            ///< format used only for fully connected weights fp16 batch=1 : bs - batch slice
-                                                                   ///< (responses slice), bsv16 - 16 values of single batch slice, x - flattened plane of (fyx)
-                                                                   ///< \n \image html bs_x_bsv16.jpg
-        bf8_xy16             = cldnn_format_bf8_xy16,              ///< format used only for convolution 1x1 input, xy aligned to 16, f aligned to 8
-                                                                   ///< \n \image html bf8_xy16.jpg
-        b_fs_yx_32fp         = cldnn_format_b_fs_yx_32fp,          ///< format for data for binary convolutions
-                                                                   ///< \n \image html image_2d_weights_c1_b_fyx.jpg
-        winograd_2x3_s1_data = cldnn_format_winograd_2x3_s1_data,  ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1
-        byxf_af32            = cldnn_format_byxf_af32,             ///< format for input for primitives using MMAD
-        byx8_f4              = cldnn_format_byx8_f4,               ///< format for input for MMAD convolutions
-        fs_bs_yx_bsv4_fsv32  = cldnn_format_fs_bs_yx_bs4_fs32,     ///< format for batched input for primitives using MMAD
-        b_fs_yx_fsv4         = cldnn_format_b_fs_yx_fsv4,          ///< format for input for IMAD convolutions
-        bfzyx                = cldnn_format_bfzyx,                 ///< format for 5d data tensors
-        bfwzyx               = cldnn_format_bfwzyx,                ///  batch, feature, 4D spatial
-        fs_b_yx_fsv32        = cldnn_format_fs_b_yx_fsv32,         ///< format for input for fp16 primitives
+        yxfb,                                   ///< batch first, feature and than spatials \n \image html yxfb.jpg
+        byxf,                                   ///< used in bitmaps, input from user i.e b images of RGB format \n \image html byxf.jpg
+        bfyx,                                   ///< the most common format for activations in clDNN. \n \image html bfyx.jpg
+        fyxb,                                   ///< format not used inside clDNN, but supported in reorder as extension
+                                                ///< for user provided formats.
+        bfyx_f16,                               ///< format used for blocked convolution
+        bs_xs_xsv8_bsv8,                        ///< format used only for fully connected weights: bs - batch slice,
+                                                ///< xs - x slice, bsv8 - 8 values of single slice.
+        bs_xs_xsv8_bsv16,                       ///< format used only for fully connected weights: bs - batch slice,
+                                                ///< xs - x slice, bsv16 - 16 values of single slice.
+        bs_x_bsv16,                             ///< format used only for fully connected weights fp16 batch=1 : bs - batch slice
+                                                ///< (responses slice), bsv16 - 16 values of single batch slice, x - flattened plane of (fyx)
+                                                ///< \n \image html bs_x_bsv16.jpg
+        bf8_xy16,                               ///< format used only for convolution 1x1 input, xy aligned to 16, f aligned to 8
+                                                ///< \n \image html bf8_xy16.jpg
+        b_fs_yx_32fp,                           ///< format for data for binary convolutions
+                                                ///< \n \image html image_2d_weights_c1_b_fyx.jpg
+        winograd_2x3_s1_data,                   ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1
+        byxf_af32,                              ///< format for input for primitives using MMAD
+        byx8_f4,                                ///< format for input for MMAD convolutions
+        fs_bs_yx_bsv4_fsv32,                    ///< format for batched input for primitives using MMAD
+        b_fs_yx_fsv4,                           ///< format for input for IMAD convolutions
+        bfzyx,                                  ///< format for 5d data tensors
+        bfwzyx,                                 ///< batch, feature, 4D spatial
+        fs_b_yx_fsv32,                          ///< format for input for fp16 primitives
+        bfzyx_f16,                              ///< format used for 3D blocked convolution (features blocked by 16)
 
         // Weights formats
-        o_i_yx_i16_o16                         = cldnn_format_o_i_yx_i16_o16,                      ///< format used for blocked convolution
-        oiyx_o16                               = cldnn_format_oiyx_o16,                            ///< format used only for convolution weights:
-                                                                                                   ///< os - output feature maps slice, i - input feature maps,
-                                                                                                   ///< yx - spatials, sv16 - 16 values of single slice.
-        os_iyx_osv16                           = cldnn_format_os_iyx_osv16,                        ///< format used only for convolution weights:
-                                                                                                   ///< os - output feature maps slice, i - input feature maps,
-                                                                                                   ///< yx - spatials, sv16 - 16 values of single slice.
-        os_iyx_osv32                           = cldnn_format_os_iyx_osv32,                        ///< format used only for convolution weights:
-                                                                                                   ///< os - output feature maps slice, i - input feature maps,
-                                                                                                   ///< yx - spatials, sv32 - 32 values of single slice.
-        os_iyx_osv64                           = cldnn_format_os_iyx_osv64,                        ///< format used only for convolution weights:
-                                                                                                   ///< os - output feature maps slice, i - input feature maps,
-                                                                                                   ///< yx - spatials, sv64 - 64 values of single slice.
-        image_2d_weights_c4_fyx_b              = cldnn_format_image_2d_weights_c4_fyx_b,           ///< image format for weights, width size is f*y*x/4
-                                                                                                   ///< (4-channels filled with fyx data), height is b
-                                                                                                   ///< \n \image html image_2d_weights_c4_fyx_b.jpg
-        image_2d_weights_c1_b_fyx              = cldnn_format_image_2d_weights_c1_b_fyx,           ///< image format for weights, width size is b,
-                                                                                                   ///< height is f*y*x, single channel
-        winograd_2x3_s1_weights                = cldnn_format_winograd_2x3_s1_weights,             ///< format used for weights for winograd non-fused
-                                                                                                   ///< convolution, F(2,3) -- filter 3x3 with stride 1
-        winograd_2x3_s1_fused_weights          = cldnn_format_winograd_2x3_s1_fused_weights,       ///< format used for weights for winograd fused
-                                                                                                   ///< convolution, F(2,3) -- filter 3x3 with stride 1
-        winograd_6x3_s1_fused_weights          = cldnn_format_winograd_6x3_s1_fused_weights,       ///< format used for weights for winograd fused
-                                                                                                   ///< convolution, F(6,3) -- filter 3x3 with stride 1
-        image_2d_weights_winograd_6x3_s1_fbxyb = cldnn_format_image_2d_weights_winograd_6x3_s1_fbxyb,  ///< image format used for weights for winograd fused
-                                                                                                       ///< convolution, F(6,3) -- filter 3x3 with stride 1
-        image_2d_weights_winograd_6x3_s1_xfbyb = cldnn_format_image_2d_weights_winograd_6x3_s1_xfbyb,  ///< image format used for weights for winograd fused
-                                                                                                       ///< convolution, F(6,3) -- filter 3x3 with stride 1
-        os_is_yx_isa8_osv8_isv4                = cldnn_format_os_is_yx_isa8_osv8_isv4,                 ///< format for weights for MMAD convolution
-        os_is_yx_isa8_osv8_isv4_swizzled_by_4  = cldnn_format_os_is_yx_isa8_osv8_isv4_swizzled_by_4,   ///< format for weights for MMAD convolution
-        is_o_yx_isv32                          = cldnn_format_is_o_yx_isv32,                           ///< format for weights for 1x1 MMAD convolutions
-        is_o32_yx_isv32_swizzled_by_4          = cldnn_format_is_o32_yx_isv32_swizzled_by_4,           ///< format for weights for 1x1 MMAD convolutions
-        os_is_y_x8_osv8_isv4                   = cldnn_format_os_is_y_x8_osv8_isv4,                    ///< format for weights for 1x1 MMAD convolutions
-        os_is_y_x8_osv8_isv4_swizzled_by_4     = cldnn_format_os_is_y_x8_osv8_isv4_swizzled_by_4,      ///< format for weights for 1x1 MMAD convolutions
-        os_is_yx_osv16_isv4                    = cldnn_format_os_is_yx_osv16_isv4,                     ///< format for weights for IMAD convolutions
-        bf_lyx_yx                              = cldnn_bf_lyx_yx,                                      ///< format for local convolution weights
-        os_is_yx_osv32_isv32p                  = cldnn_format_os_is_yx_osv32_isv32p,                   ///< format for weights for binary convolutions
-
-        format_num = cldnn_format_format_num,  ///< number of format types
-        any        = cldnn_format_any
+        o_i_yx_i16_o16,                             ///< format used for blocked convolution
+        oiyx_o16,                                   ///< format used only for convolution weights:
+                                                    ///< os - output feature maps slice, i - input feature maps,
+                                                    ///< yx - spatials, sv16 - 16 values of single slice.
+        os_iyx_osv16,                               ///< format used only for convolution weights:
+                                                    ///< os - output feature maps slice, i - input feature maps,
+                                                    ///< yx - spatials, sv16 - 16 values of single slice.
+        os_iyx_osv32,                               ///< format used only for convolution weights:
+                                                    ///< os - output feature maps slice, i - input feature maps,
+                                                    ///< yx - spatials, sv32 - 32 values of single slice.
+        os_iyx_osv64,                               ///< format used only for convolution weights:
+                                                    ///< os - output feature maps slice, i - input feature maps,
+                                                    ///< yx - spatials, sv64 - 64 values of single slice.
+        image_2d_weights_c4_fyx_b,                  ///< image format for weights, width size is f*y*x/4
+                                                    ///< (4-channels filled with fyx data), height is b
+                                                    ///< \n \image html image_2d_weights_c4_fyx_b.jpg
+        image_2d_weights_c1_b_fyx,                  ///< image format for weights, width size is b,
+                                                    ///< height is f*y*x, single channel
+        winograd_2x3_s1_weights,                    ///< format used for weights for winograd non-fused
+                                                    ///< convolution, F(2,3) -- filter 3x3 with stride 1
+        winograd_2x3_s1_fused_weights,              ///< format used for weights for winograd fused
+                                                    ///< convolution, F(2,3) -- filter 3x3 with stride 1
+        winograd_6x3_s1_fused_weights,              ///< format used for weights for winograd fused
+                                                    ///< convolution, F(6,3) -- filter 3x3 with stride 1
+        image_2d_weights_winograd_6x3_s1_fbxyb,     ///< image format used for weights for winograd fused
+                                                    ///< convolution, F(6,3) -- filter 3x3 with stride 1
+        image_2d_weights_winograd_6x3_s1_xfbyb,     ///< image format used for weights for winograd fused
+                                                    ///< convolution, F(6,3) -- filter 3x3 with stride 1
+        os_is_yx_isa8_osv8_isv4,                    ///< format for weights for MMAD convolution
+        os_is_yx_isa8_osv8_isv4_swizzled_by_4,      ///< format for weights for MMAD convolution
+        is_o_yx_isv32,                              ///< format for weights for 1x1 MMAD convolutions
+        is_o32_yx_isv32_swizzled_by_4,              ///< format for weights for 1x1 MMAD convolutions
+        os_is_y_x8_osv8_isv4,                       ///< format for weights for 1x1 MMAD convolutions
+        os_is_y_x8_osv8_isv4_swizzled_by_4,         ///< format for weights for 1x1 MMAD convolutions
+        os_is_yx_osv16_isv4,                        ///< format for weights for IMAD convolutions
+        bf_lyx_yx,                                  ///< format for local convolution weights
+        os_is_yx_osv32_isv32p,                      ///< format for weights for binary convolutions
+        o_i_zyx_i16_o16,                            ///< format used for blocked 3D convolution
+        i_o_zyx_o16_i16,                            ///< format used for blocked 3D deconvolution
+        lstm_weights_dio,                           ///< dynamic_lstm, direction,
+                                                    ///< than IO (I - input size, O - 4 * hidden_size)
+
+        format_num,  ///< number of format types
+        any        = -1
     };
 
     /// @brief Get format traits for particular @p format::type
@@ -175,6 +180,7 @@ struct format {
                 { bfwzyx,               { 1, 1, 4, 0, "bfwzyx", "bfxyzw", {}}},
                 { fs_b_yx_fsv32,        { 1, 1, 2, 0, "fbyx",   "bfxy?",  {{1, 32}}}},
                 { b_fs_yx_32fp,         { 1, 1, 2, 0, "bfyx",   "bfxy?",  {}}},
+                { bfzyx_f16,            { 1, 1, 3, 0, "bfzyx",  "bfxyz",  {{1, 16}}}},
 
                 { o_i_yx_i16_o16,                         { 1, 1, 2, 0, "bfyx",   "bfxy",    {{1, 16}, {0, 16}}}},
                 { oiyx_o16,                               { 1, 1, 2, 0, "bfyx",   "bfxy",    {{0, 16}}}},
@@ -188,6 +194,7 @@ struct format {
                 { image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, "xyfb",   "bfxy?",   {}}},
                 { image_2d_weights_c4_fyx_b,              { 1, 1, 2, 0, "bfyx",   "bfxy?",   {}}},
                 { image_2d_weights_c1_b_fyx,              { 1, 1, 2, 0, "bfyx",   "bfxy?",   {}}},
+                { lstm_weights_dio,                       { 1, 1, 2, 0, "bfxy",   "bfxy?",   {}}},
                 { os_is_yx_isa8_osv8_isv4,                { 1, 1, 2, 0, "bfyx",   "bfxy?",   {}}},
                 { os_is_yx_isa8_osv8_isv4_swizzled_by_4,  { 1, 1, 2, 0, "bfyx",   "bfxy?",   {}}},
                 { is_o_yx_isv32,                          { 1, 1, 2, 0, "byxf",   "bfxy?",   {{1, 32}}}},
@@ -197,6 +204,8 @@ struct format {
                 { bf_lyx_yx,                              { 1, 1, 2, 2, "bfklyx", "bfxy??lk", {}}},
                 { os_is_yx_osv16_isv4,                    { 1, 1, 1, 0, "bfxy",   "bfxy?",   {{0, 16}, {1, 4}}}},
                 { os_is_yx_osv32_isv32p,                  { 1, 1, 1, 0, "bfxy",   "bfxy?",   {}}},
+                { o_i_zyx_i16_o16,                        { 1, 1, 3, 0, "bfzyx",  "bfxyz",   {{0, 16}, {1, 16}}}},
+                { i_o_zyx_o16_i16,                        { 1, 1, 3, 0, "fbzyx",  "bfxyz",   {{0, 16}, {1, 16}}}},
         };
         return traits.at(fmt);
     }
@@ -274,12 +283,14 @@ struct format {
     constexpr format(type t) : value(t) {}
     /// @brief Implicit conversion to format::type.
     constexpr operator type() const { return value; }
-    /// @brief Conversion from C API @ref ::cldnn_format_type.
-    constexpr explicit format(cldnn_format_type t) : value(static_cast<type>(t)) {}
-    /// @brief Conversion to C API @ref ::cldnn_format_type.
-    constexpr explicit operator cldnn_format_type() const { return static_cast<cldnn_format_type>(value); }
 };
 
+constexpr int32_t tensor_batch_dim_max = 1;
+constexpr int32_t tensor_feature_dim_max = 1;
+constexpr int32_t tensor_spatial_dim_max = 4;
+constexpr int32_t tensor_local_dim_max = 2;
+constexpr int32_t tensor_dim_max = 8;
+
 struct tensor;
 
 /// @brief Helper structs used in tensor constructor with dim_vec_kinds
@@ -300,26 +311,26 @@ struct dim_vec_limits {
 
 template <>
 struct dim_vec_limits<dim_vec_kind::batch> {
-    static constexpr int32_t max_dimentionality = CLDNN_TENSOR_BATCH_DIM_MAX;
+    static constexpr int32_t max_dimentionality = tensor_batch_dim_max;
     static constexpr int32_t dim_offset = 0;
 };
 
 template <>
 struct dim_vec_limits<dim_vec_kind::feature> {
-    static constexpr int32_t max_dimentionality = CLDNN_TENSOR_FEATURE_DIM_MAX;
-    static constexpr int32_t dim_offset = CLDNN_TENSOR_BATCH_DIM_MAX;
+    static constexpr int32_t max_dimentionality = tensor_feature_dim_max;
+    static constexpr int32_t dim_offset = tensor_batch_dim_max;
 };
 
 template <>
 struct dim_vec_limits<dim_vec_kind::spatial> {
-    static constexpr int32_t max_dimentionality = CLDNN_TENSOR_SPATIAL_DIM_MAX;
-    static constexpr int32_t dim_offset = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX;
+    static constexpr int32_t max_dimentionality = tensor_spatial_dim_max;
+    static constexpr int32_t dim_offset = tensor_batch_dim_max + tensor_feature_dim_max;
 };
 
 template <>
 struct dim_vec_limits<dim_vec_kind::local> {
-    static constexpr int32_t max_dimentionality = CLDNN_TENSOR_LOCAL_DIM_MAX;
-    static constexpr int32_t dim_offset = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX;
+    static constexpr int32_t max_dimentionality = tensor_local_dim_max;
+    static constexpr int32_t dim_offset = tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max;
 };
 
 /// @brief Template class used in tensor constructor using dim_vec_kinds
@@ -377,18 +388,18 @@ struct tensor {
     mutable_array_ref<value_type> local;    ///< Local dimensions.
 
 private:
-    value_type _sizes[CLDNN_TENSOR_DIM_MAX];
+    value_type _sizes[tensor_dim_max];
     value_type _dimOffset;
     value_type _dimSize;
 
 public:
     explicit tensor(value_type default_size = 0) :
-        raw(_sizes, CLDNN_TENSOR_DIM_MAX),
-        batch(_sizes, CLDNN_TENSOR_BATCH_DIM_MAX),
-        feature(_sizes + CLDNN_TENSOR_BATCH_DIM_MAX, CLDNN_TENSOR_FEATURE_DIM_MAX),
-        spatial(_sizes + CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX, CLDNN_TENSOR_SPATIAL_DIM_MAX),
-        local(_sizes + CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX, CLDNN_TENSOR_LOCAL_DIM_MAX) {
-        std::fill_n(_sizes, CLDNN_TENSOR_DIM_MAX, default_size);
+        raw(_sizes, tensor_dim_max),
+        batch(_sizes, tensor_batch_dim_max),
+        feature(_sizes + tensor_batch_dim_max, tensor_feature_dim_max),
+        spatial(_sizes + tensor_batch_dim_max + tensor_feature_dim_max, tensor_spatial_dim_max),
+        local(_sizes + tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max, tensor_local_dim_max) {
+        std::fill_n(_sizes, tensor_dim_max, default_size);
     }
 
     /// @brief Constructs tensor.
@@ -432,11 +443,11 @@ public:
     tensor(value_type batch_num, value_type feature_num, value_type width, value_type height)
         : tensor(1) {
         _sizes[0] = batch_num;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX] = feature_num;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX] = width;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 1] = height;
+        _sizes[tensor_batch_dim_max] = feature_num;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
         if (batch_num == 0 && feature_num == 0 && width == 0 && height == 0)
-            _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 2] = 0;
+            _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = 0;
     }
 
     /// @brief Constructs @p tensor.
@@ -455,10 +466,10 @@ public:
     tensor(value_type batch_num, value_type feature_num, value_type width, value_type height, value_type depth)
         : tensor(1) {
         _sizes[0] = batch_num;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX] = feature_num;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX] = width;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 1] = height;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 2] = depth;
+        _sizes[tensor_batch_dim_max] = feature_num;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = depth;
     }
 
     /// @brief Constructs @p tensor.
@@ -479,11 +490,11 @@ public:
            value_type height, value_type local_x, value_type local_y)
         : tensor(1) {
         _sizes[0] = batch_num;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX] = feature_num;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX] = width;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 1] = height;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX] = local_x;
-        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX + 1] = local_y;
+        _sizes[tensor_batch_dim_max] = feature_num;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max] = local_x;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max + 1] = local_y;
     }
 
     /// @brief Constructs @p tensor using vector of sizes.
@@ -502,7 +513,7 @@ public:
      */
     explicit tensor(const std::vector<value_type>& sizes, value_type default_size = 1)
         : tensor(default_size) {
-        int max_size = std::min(static_cast<int>(sizes.size()), CLDNN_TENSOR_DIM_MAX);
+        int max_size = std::min(static_cast<int>(sizes.size()), tensor_dim_max);
         for (int i = 0; i < max_size; i++)
             _sizes[i] = sizes[i];
     }
@@ -527,34 +538,17 @@ public:
         }
     }
 
-    /// @brief Implicit conversion form C API :: cldnn_tensor.
-    explicit tensor(const cldnn_tensor& other)
-        : tensor(0) {
-        std::copy_n(other.sizes, CLDNN_TENSOR_DIM_MAX, _sizes);
-    }
-
-    /// @brief Implicit conversion to C API ::cldnn_tensor.
-    operator cldnn_tensor() const {
-        cldnn_tensor result;
-        result.batch_num = batch.size();
-        result.feature_num = feature.size();
-        result.spatial_num = spatial.size();
-        result.local_num = local.size();
-        std::copy_n(_sizes, CLDNN_TENSOR_DIM_MAX, result.sizes);
-        return result;
-    }
-
     /// @brief Copy construction.
     tensor(const tensor& other)
         : tensor(0) {
-        std::copy_n(other._sizes, CLDNN_TENSOR_DIM_MAX, _sizes);
+        std::copy_n(other._sizes, tensor_dim_max, _sizes);
     }
 
     /// @brief Copy assignment.
     tensor& operator=(const tensor& other) {
         if (this == &other)
             return *this;
-        std::copy_n(other._sizes, CLDNN_TENSOR_DIM_MAX, _sizes);
+        std::copy_n(other._sizes, tensor_dim_max, _sizes);
         return *this;
     }
 
@@ -613,7 +607,7 @@ public:
     /// @brief Returns a tensor with all negated elements.
     tensor negate() const {
         auto result = *this;
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; i++) {
+        for (size_t i = 0; i < tensor_dim_max; i++) {
             result._sizes[i] = -_sizes[i];
         }
         return result;
@@ -622,7 +616,7 @@ public:
     /// @brief Returns a tensor with all elements multilied to @p multiplier.
     tensor mul(value_type multiplier) const {
         auto result = *this;
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; i++) {
+        for (size_t i = 0; i < tensor_dim_max; i++) {
             result._sizes[i] *= multiplier;
         }
         return result;
@@ -631,7 +625,7 @@ public:
     /// @brief Returns a tensor with all elements divided by @p divider.
     tensor div(value_type divider) const {
         auto result = *this;
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; i++) {
+        for (size_t i = 0; i < tensor_dim_max; i++) {
             result._sizes[i] /= divider;
         }
         return result;
@@ -640,7 +634,7 @@ public:
     /// @brief Returns a tensor with all elements added by appropriate elements of @p rhs
     tensor add(const tensor& rhs) const {
         auto result = *this;
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; i++) {
+        for (size_t i = 0; i < tensor_dim_max; i++) {
             result._sizes[i] += rhs._sizes[i];
         }
         return result;
@@ -653,14 +647,14 @@ public:
 
     /// @brief Assign and add
     tensor& operator+=(const tensor& rhs) {
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; i++)
+        for (size_t i = 0; i < tensor_dim_max; i++)
             _sizes[i] += rhs._sizes[i];
         return *this;
     }
 
     /// @brief Assign and subtract
     tensor& operator-=(const tensor& rhs) {
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; i++)
+        for (size_t i = 0; i < tensor_dim_max; i++)
             _sizes[i] -= rhs._sizes[i];
         return *this;
     }
@@ -747,7 +741,7 @@ public:
             }
 
             // skip z for the formats that do not have it
-            if (((new_fmt != format::bfzyx && new_fmt != format::bfwzyx)) && (c == 'z')) {
+            if (((new_fmt != format::bfzyx && new_fmt != format::bfzyx_f16 && new_fmt != format::bfwzyx)) && (c == 'z')) {
                 if (new_order[i] == '?')
                     new_sizes[i] = default_size;
 
@@ -759,7 +753,7 @@ public:
                 if (new_order[i] == '?')
                     new_sizes[i] = default_size;
 
-                if (new_fmt == format::bfzyx)
+                if (new_fmt == format::bfzyx || new_fmt == format::bfzyx_f16)
                     tmp_w *= old_sizes[i];
                 else
                     tmp_z *= old_sizes[i];
@@ -868,7 +862,7 @@ public:
     /// @brief Returns a tensor containing values maximum from @p lhs and @p rhs.
     static tensor max(tensor const& lhs, tensor const& rhs) {
         auto ret = lhs;
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
+        for (size_t i = 0; i < tensor_dim_max; ++i)
             ret._sizes[i] = std::max(ret._sizes[i], rhs._sizes[i]);
 
         return ret;
@@ -877,7 +871,7 @@ public:
     /// @brief Returns a tensor containing values minimum from @p lhs and @p rhs.
     static tensor min(tensor const& lhs, tensor const& rhs) {
         auto ret = lhs;
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
+        for (size_t i = 0; i < tensor_dim_max; ++i)
             ret._sizes[i] = std::min(ret._sizes[i], rhs._sizes[i]);
 
         return ret;
@@ -914,24 +908,6 @@ inline tensor operator*(const tensor& lhs, tensor::value_type rhs) { return lhs.
 /// @brief Divides a @p tensor by a @p scalar
 inline tensor operator/(const tensor& lhs, tensor::value_type rhs) { return lhs.div(rhs); }
 
-///
-/// \brief Converts C API tensor_array to std::vector<tensor>
-///
-inline std::vector<tensor> tensor_arr_to_vector(const cldnn_tensor_arr& arr) {
-    std::vector<tensor> result(arr.size);
-    for (size_t i = 0; i < arr.size; i++)
-        result[i] = (tensor) arr.data[i];
-
-    return result;
-}
-
-///
-/// \brief Converts std::vector<tensor> to std::vector of C API tensor
-///
-inline std::vector<cldnn_tensor> tensor_vector_to_cldnn_vector(const std::vector<tensor>& stor) {
-    return std::vector<cldnn_tensor>(stor.begin(), stor.end());
-}
-
 /// @}
 /// @}
 }  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/tile.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -29,15 +28,15 @@ namespace cldnn {
 
 /// @brief Performs tile operation on input.
 /// @details copies the input data n times across chosen axis.
-struct tile : public primitive_base<tile, CLDNN_PRIMITIVE_DESC(tile)> {
+struct tile : public primitive_base<tile> {
     CLDNN_DECLARE_PRIMITIVE(tile)
 
     enum tile_axis {
-        along_b = cldnn_tile_along_b,
-        along_f = cldnn_tile_along_f,
-        along_x = cldnn_tile_along_x,
-        along_y = cldnn_tile_along_y,
-        along_z = cldnn_tile_along_z
+        along_b,
+        along_f,
+        along_x,
+        along_y,
+        along_z
     };
 
     /// @brief Constructs tile primitive.
@@ -51,19 +50,10 @@ struct tile : public primitive_base<tile, CLDNN_PRIMITIVE_DESC(tile)> {
          const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), axis(axis), tiles(tiles) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{tile}
-    tile(const dto* dto) : primitive_base(dto), axis(static_cast<tile_axis>(dto->axis)), tiles(dto->tiles) {}
-
     /// @brief Tiling axis
     tile_axis axis;
     /// @brief Tiles number across an axis
     int tiles;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.axis = static_cast<cldnn_tile_axis>(axis);
-        dto.tiles = tiles;
-    }
 };
 /// @}
 /// @}
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include <cstdint>
-#include "cldnn_defs.h"
+#include "cldnn.hpp"
 #include "compounds.h"
 #include "primitive.hpp"
 #include <vector>
+#include <memory>
 
 namespace cldnn {
 
@@ -30,10 +31,12 @@ namespace cldnn {
 /// @defgroup cpp_topology Network Topology
 /// @{
 
+struct topology_impl;
+
 /// @brief Network topology to be defined by user.
 struct topology {
     /// @brief Constructs empty network topology.
-    topology() : _impl(check_status<cldnn_topology>("failed to create topology", cldnn_create_topology)) {}
+    topology();
 
     /// @brief Constructs topology containing primitives provided in argument(s).
     template <class... Args>
@@ -55,7 +58,7 @@ struct topology {
     }
 
     /// Construct C++ topology based on C API @p cldnn_topology
-    explicit topology(const cldnn_topology& other) : _impl(other) {
+    explicit topology(topology_impl* other) : _impl(other) {
         if (_impl == nullptr)
             throw std::invalid_argument("implementation pointer should not be null");
     }
@@ -66,63 +69,38 @@ struct topology {
     friend bool operator==(const topology& lhs, const topology& rhs) { return lhs._impl == rhs._impl; }
     friend bool operator!=(const topology& lhs, const topology& rhs) { return !(lhs == rhs); }
 
+    void add_primitive(std::shared_ptr<primitive> desc);
+
     /// @brief Adds a primitive to topology.
     template <class PType>
     void add(PType const& desc) {
-        check_status<void>("primitive add failed",
-                           [&](status_t* status) { cldnn_add_primitive(_impl, desc.get_dto(), status); });
+        add_primitive(std::static_pointer_cast<primitive>(std::make_shared<PType>(desc)));
     }
 
     /// @brief Adds primitives to topology.
     template <class PType, class... Args>
     void add(PType const& desc, Args const&... args) {
-        check_status<void>("primitive add failed",
-                           [&](status_t* status) { cldnn_add_primitive(_impl, desc.get_dto(), status); });
+        add(desc);
         add<Args...>(args...);
     }
 
-    /// @brief Returns wrapped C API @ref cldnn_topology.
-    cldnn_topology get() const { return _impl; }
-
-    const std::vector<primitive_id> get_primitive_ids() const {
-        size_t size_ret = 0;
-        status_t err_invalid_arg = CLDNN_SUCCESS;
-        cldnn_get_primitive_ids(_impl, nullptr, 0, &size_ret, &err_invalid_arg);
-        assert(err_invalid_arg == CLDNN_INVALID_ARG);
-        assert(size_ret > 0);
-        std::vector<char> names_buf(size_ret);
-
-        check_status<void>("get topology ids failed", [&](status_t* status) {
-            cldnn_get_primitive_ids(_impl, names_buf.data(), names_buf.size(), &size_ret, status);
-        });
-        assert(names_buf.size() == size_ret);
-
-        std::vector<primitive_id> result;
-        for (auto buf_ptr = names_buf.data(); *buf_ptr != 0; buf_ptr += result.back().size() + 1) {
-            result.emplace_back(buf_ptr);
-        }
-        return result;
-    }
+    /// @brief Returns wrapped implementation pointer.
+    topology_impl* get() const { return _impl; }
 
-    void change_input_layout(primitive_id id, layout new_layout) {
-        check_status<void>("Change input layout failed.",
-                           [&](status_t* status) { cldnn_change_input_layout(_impl, id.c_str(), new_layout, status); });
-    }
+    const std::vector<primitive_id> get_primitive_ids() const;
+
+    void change_input_layout(primitive_id id, const layout& new_layout);
 
 private:
     friend struct engine;
     friend struct network;
-    cldnn_topology _impl;
+    topology_impl* _impl;
 
-    void retain() {
-        check_status<void>("retain topology failed", [=](status_t* status) { cldnn_retain_topology(_impl, status); });
-    }
-    void release() {
-        check_status<void>("retain topology failed", [=](status_t* status) { cldnn_release_topology(_impl, status); });
-    }
+    void retain();
+    void release();
 };
 
 CLDNN_API_CLASS(topology)
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,7 +16,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/upsampling.h"
 #include "primitive.hpp"
 
 namespace cldnn {
@@ -30,14 +29,14 @@ namespace cldnn {
 /// @brief Sample mode for the @ref upsampling layer.
 enum class upsampling_sample_type : int32_t {
     /// @brief upsampling nearest neighbor.
-    nearest = cldnn_upsampling_nearest,
+    nearest,
     /// @brief upsampling bilinear.
-    bilinear = cldnn_upsampling_bilinear,
+    bilinear
 };
 
 /// @brief Performs nearest neighbor/bilinear upsampling
 /// Also supports built-in Relu @ref activation available by setting it in arguments.
-struct upsampling : public primitive_base<upsampling, CLDNN_PRIMITIVE_DESC(upsampling)> {
+struct upsampling : public primitive_base<upsampling> {
     CLDNN_DECLARE_PRIMITIVE(upsampling)
 
     /// @brief Constructs upsampling primitive.
@@ -50,30 +49,21 @@ struct upsampling : public primitive_base<upsampling, CLDNN_PRIMITIVE_DESC(upsam
     /// @param activation_slp Relu activation slope.
     upsampling(const primitive_id& id,
                const primitive_id& input,
-               float scale,
+               tensor output_size,
                uint32_t num_filter,
                upsampling_sample_type sample_type,
                bool with_activation = false,
                float activation_slp = 0.0f,
                const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding),
-          scale(scale),
+          output_size(output_size),
           num_filter(num_filter),
           sample_type(sample_type),
           with_activation(with_activation),
           activation_negative_slope(activation_slp) {}
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{upsampling}
-    upsampling(const dto* dto)
-        : primitive_base(dto),
-          scale(dto->scale),
-          num_filter(dto->num_filter),
-          sample_type(static_cast<upsampling_sample_type>(dto->sample_type)),
-          with_activation(dto->with_activation != 0),
-          activation_negative_slope(dto->activation_negative_slope) {}
-
     /// @param scale Upsampling scale.
-    float scale;
+    tensor output_size;
     /// @param num_filter Input filter. Only used by bilinear sample_type.
     uint32_t num_filter;
     /// @param sample_type Upsampling method (nearest neighbor/bilinear).
@@ -82,15 +72,6 @@ struct upsampling : public primitive_base<upsampling, CLDNN_PRIMITIVE_DESC(upsam
     bool with_activation;
     /// @brief Relu activation slope.
     float activation_negative_slope;
-
-protected:
-    void update_dto(dto& dto) const override {
-        dto.scale = scale;
-        dto.num_filter = num_filter;
-        dto.sample_type = static_cast<cldnn_upsampling_sample_type>(sample_type);
-        dto.with_activation = with_activation;
-        dto.activation_negative_slope = activation_negative_slope;
-    }
 };
 /// @}
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_bn_scale.h b/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_bn_scale.h
deleted file mode 100644 (file)
index 1acd60a..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "api/C/cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Primitives that fuses convolution, batch norm, scale and optionally Relu.
-CLDNN_BEGIN_PRIMITIVE_DESC(fused_conv_bn_scale)
-/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
-cldnn_tensor input_offset;
-/// @brief Defines shift in input buffer between adjacent calculations of output values.
-cldnn_tensor stride;
-/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
-/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
-/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
-cldnn_tensor dilation;
-/// @brief Enable Relu activation.
-uint32_t with_activation;
-/// @brief Relu activation slope.
-float activation_negative_slope;
-/// @brief On how many cards split the computation to.
-uint32_t split;
-/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr weights;
-/// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split.
-cldnn_primitive_id_arr bias;
-/// @brief Primitive id containing scale bias data for fused convolution.
-cldnn_primitive_id scale_bias;
-/// @brief Primitive id containing inverted variance used in future gradient computing for fused convolution.
-cldnn_primitive_id inv_variance;
-/// @brief Epsilon for fused convolution.
-float epsilon;
-/// @brief Indicates that primitive is fused with batch norm and scale.
-uint32_t fused_batch_norm_scale;
-CLDNN_END_PRIMITIVE_DESC(fused_conv_bn_scale)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(fused_conv_bn_scale);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
diff --git a/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_eltwise.h b/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_eltwise.h
deleted file mode 100644 (file)
index 6768836..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "api/C/cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs forward spatial convolution with weight sharing fused with eltwise.
-/// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} separate for convolution and for eltwise, available by setting it in arguments.
-CLDNN_BEGIN_PRIMITIVE_DESC(fused_conv_eltwise)
-
-struct conv_data {
-    /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
-    cldnn_tensor input_offset;
-    /// @brief Defines shift in input buffer between adjacent calculations of output values.
-    cldnn_tensor stride;
-    /// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
-    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
-    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
-    cldnn_tensor dilation;
-    /// @brief Enable Relu activation.
-    uint32_t with_activation;
-    /// @brief Relu activation slope.
-    float activation_negative_slope;
-    /// @brief On how many cards split the computation to.
-    uint32_t split;
-    /// @brief Indicates that the primitive has user-defined output size (non-zero value).
-    uint32_t with_output_size;
-    /// @brief User-defined output data size of the primitive (w/o padding).
-    cldnn_tensor output_size;
-    /// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
-    cldnn_primitive_id_arr weights;
-    /// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split.
-    cldnn_primitive_id_arr bias;
-    /// @brief List of primitive ids containing weights quanitization factors per output feature map.
-    cldnn_primitive_id_arr weights_quantization_factors;
-    /// @brief List of primitive ids containing output calibration factors per output feature map.
-    cldnn_primitive_id_arr output_calibration_factors;
-    /// @brief Input quantization factor
-    float input_quantization_factor;
-    /// @brief Output quantization factor
-    float output_quantization_factor;
-} conv;
-
-struct eltw_data {
-    /// @brief Primitive id containing output quanitization factors per output feature map.
-    cldnn_primitive_id output_calibration_factors;
-    /// @brief Output quantization factor
-    float output_quantization_factor;
-    /// @brief Eltwise mode. See #cldnn_eltwise_mode.
-    int32_t mode; /*cldnn_eltwise_mode*/
-    /// @brief Blob-wise coefficient for SUM operation
-    cldnn_float_arr coefficients;
-    /// @brief Enables Relu activation.
-    uint32_t with_activation;
-    /// @brief Relu activation slope.
-    float activation_negative_slope;
-    /// @brief Defines shift in input buffers between adjacent calculations of output values.
-    cldnn_tensor_arr stride;
-} eltw;
-
-// @brief Non-convolution output scaling factor. Might be used both to represent
-// i8->float dynamic range conversion and dynamic range scaling without changing
-// data precision (e.g. to align dynamic range with that of convolution result).
-float non_conv_scale = 1.0f;
-
-/// @brief Is optimization that output contains data from second input ON ?
-bool second_input_in_output = false;
-
-CLDNN_END_PRIMITIVE_DESC(fused_conv_eltwise)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(fused_conv_eltwise);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
diff --git a/inference-engine/thirdparty/clDNN/api_extension/C/lstm_dynamic_input.h b/inference-engine/thirdparty/clDNN/api_extension/C/lstm_dynamic_input.h
deleted file mode 100644 (file)
index 2b476bb..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "api/C/cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs forward calcaulations of input gates for dynamic lstm layer.
-/// @details The current implementation of LSTM_DYNAMIC is described the following equations.
-///   it = f(Xt*(Wi^T) + Ht-1*Ri + Wbi)
-///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Wbf)
-///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc)
-///   Ct = ft (.) Ct-1 + it (.) ct
-///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
-///   Ht = ot (.) h(Ct)
-/// Where f = Sigmoid, g = Tanh, and h = Tanh.
-CLDNN_BEGIN_PRIMITIVE_DESC(lstm_dynamic_input)
-
-/// @brief Array of primitive ids containing weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id weights;
-/// @brief Array of primitive ids containing bias vectors for input, output, forget, and cell gates.
-cldnn_primitive_id bias;
-/// @brief Primitive id containing the dynamic sequence lengths.
-cldnn_primitive_id dyn_length;
-CLDNN_END_PRIMITIVE_DESC(lstm_dynamic_input)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lstm_dynamic_input);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
diff --git a/inference-engine/thirdparty/clDNN/api_extension/C/lstm_dynamic_timeloop.h b/inference-engine/thirdparty/clDNN/api_extension/C/lstm_dynamic_timeloop.h
deleted file mode 100644 (file)
index 3203755..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "api/C/cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// @brief Performs forward calcaulations of input gates for dynamic lstm layer.
-/// @details The current implementation of LSTM_DYNAMIC is described the following equations.
-///   it = f(Xt*(Wi^T) + Ht-1*Ri + Wbi)
-///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Wbf)
-///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc)
-///   Ct = ft (.) Ct-1 + it (.) ct
-///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
-///   Ht = ot (.) h(Ct)
-/// Where f = Sigmoid, g = Tanh, and h = Tanh.
-CLDNN_BEGIN_PRIMITIVE_DESC(lstm_dynamic_timeloop)
-
-/// @brief Array of primitive ids containing recurrent weight matrices for input, output, forget, and cell gates.
-cldnn_primitive_id recurrent;
-/// @brief Primitive Id of mutable data primitive pointing to buffer, which will be filled with last hidden state.
-cldnn_primitive_id last_hidden_state;
-/// @brief Primitive Id of mutable data primitive pointing to buffer, which will be filled with last cell state.
-cldnn_primitive_id last_cell_state;
-/// @brief Array of primitive ids containing the initial value of the hidden data (Ht-1).
-cldnn_primitive_id initial_hidden;
-/// @brief Array of primitive ids containing the initial value of the cell state data (Ct-1).
-cldnn_primitive_id initial_cell;
-/// @brief Primitive id containing the dynamic sequence lengths.
-cldnn_primitive_id dyn_length;
-/// @brief Cell clip threshold T. It is applied to the input of activations [-T, T]. No clip is applied if it is not specified.
-float clip;
-/// @brief Couple the input and forget gates if input_forget is 1. Default is 0.
-bool input_forget;
-CLDNN_END_PRIMITIVE_DESC(lstm_dynamic_timeloop)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(lstm_dynamic_timeloop);
-
-#ifdef __cplusplus
-}
-#endif
-
-/// @}
-/// @}
-/// @}
@@ -16,8 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/fused_conv_bn_scale.h"
-#include "api/CPP/primitive.hpp"
+#include "api/primitive.hpp"
 #include <vector>
 
 namespace cldnn {
@@ -29,7 +28,7 @@ namespace cldnn {
 /// @{
 
 /// @brief Primitives that fuses convolution, batch norm, scale and optionally Relu.
-struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale, CLDNN_PRIMITIVE_DESC(fused_conv_bn_scale)> {
+struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale> {
     CLDNN_DECLARE_PRIMITIVE(fused_conv_bn_scale)
 
     /// @brief Constructs convolution primitive fused with batch norm and scale.
@@ -57,50 +56,21 @@ struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale, CLDNN_PR
                         tensor dilation = {1, 1, 1, 1},
                         tensor input_offset = {0, 0, 0, 0},
                         const primitive_id& inv_variance = "",
-                        bool with_activation = false,
-                        float activation_slp = 0.0f,
                         const padding& output_padding = padding())
         : primitive_base(id, {input, scale_input}, output_padding),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
           input_offset(input_offset),
           stride(stride),
           dilation(dilation),
-          with_activation(with_activation),
-          activation_negative_slope(activation_slp),
           with_output_size(false),
           scale_bias(scale_bias),
           inv_variance(inv_variance),
           epsilon(epsilon),
-          _weights(weights),
-          _bias(bias) {
+          weights(weights),
+          bias(bias) {
         if ((bias.size() != 0) && (weights.size() != bias.size()))
             throw std::runtime_error("convolution's weights/bias count does not match");
     }
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{fused_conv_bn_scale}
-    fused_conv_bn_scale(const dto* dto)
-        : primitive_base(dto),
-          weights(_weights.cpp_ids),
-          bias(_bias.cpp_ids),
-          input_offset(dto->input_offset),
-          stride(dto->stride),
-          dilation(dto->dilation),
-          with_activation(dto->with_activation != 0),
-          activation_negative_slope(dto->activation_negative_slope),
-          scale_bias(dto->scale_bias),
-          inv_variance(dto->inv_variance),
-          epsilon(dto->epsilon),
-          _weights(dto->weights),
-          _bias(dto->bias) {
-        if (!dto->split || (weights.size() != bias.size() && bias.size() != 0) || dto->split != weights.size())
-            throw std::invalid_argument("Invalid convolution dto: bad split value");
-    }
-
-    /// @brief List of primitive ids containing weights data.
-    fixed_size_vector_ref weights;
-    /// @brief List of primitive ids containing bias data.
-    fixed_size_vector_ref bias;
     /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
     tensor input_offset;
     /// @brief Defines shift in input buffer between adjacent calculations of output values.
@@ -109,10 +79,6 @@ struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale, CLDNN_PR
     /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
     /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
     tensor dilation;
-    /// @brief Enable Relu activation.
-    bool with_activation;
-    /// @brief Relu activation slope.
-    float activation_negative_slope;
     /// @brief Indicates that the primitive has user-defined output size (non-zero value).
     bool with_output_size;
     /// @brief User-defined output data size of the primitive (w/o padding).
@@ -125,38 +91,25 @@ struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale, CLDNN_PR
     float epsilon;
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
+    /// @brief List of primitive ids containing weights data.
+    const primitive_id_arr weights;
+    /// @brief List of primitive ids containing bias data.
+    const primitive_id_arr bias;
 
 protected:
-    primitive_id_arr _weights;
-    primitive_id_arr _bias;
-
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         ret.reserve(weights.size() + bias.size() + !scale_bias.empty() + !inv_variance.empty());
-        for (auto& w : weights) ret.push_back(w);
-        for (auto& b : bias) ret.push_back(b);
+        for (auto& w : weights) ret.push_back(std::ref(w));
+        for (auto& b : bias) ret.push_back(std::ref(b));
         if (!scale_bias.empty())
             ret.push_back(scale_bias);
         if (!inv_variance.empty())
             ret.push_back(inv_variance);
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.weights = _weights.ref();
-        dto.bias = _bias.ref();
-        dto.input_offset = input_offset;
-        dto.stride = stride;
-        dto.dilation = dilation;
-        dto.split = split();
-        dto.with_activation = with_activation;
-        dto.activation_negative_slope = activation_negative_slope;
-        dto.epsilon = epsilon;
-        dto.inv_variance = inv_variance.c_str();
-        dto.scale_bias = scale_bias.c_str();
-    }
 };
 /// @}
 /// @}
 /// @}
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
@@ -16,9 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/fused_conv_eltwise.h"
-#include "api/CPP/primitive.hpp"
-#include "api/CPP/eltwise.hpp"
+#include "api/primitive.hpp"
+#include "api/eltwise.hpp"
 #include <vector>
 
 namespace cldnn {
@@ -30,7 +29,7 @@ namespace cldnn {
 /// @{
 
 /// @brief Performs forward spatial convolution with fused eltwise and optionally Relu.
-struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise, CLDNN_PRIMITIVE_DESC(fused_conv_eltwise)> {
+struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
     CLDNN_DECLARE_PRIMITIVE(fused_conv_eltwise)
 
     /// @brief Constructs fused_conv_eltwise primitive.
@@ -74,16 +73,16 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise, CLDNN_PRIM
                        const padding& output_padding = padding(),
                        optional_data_type output_data_type = {})
         : primitive_base(id, {input, input2}, output_padding, output_data_type),
-          conv((fixed_size_vector_ref)_conv_weights.cpp_ids,
-               (fixed_size_vector_ref)_conv_bias.cpp_ids,
-               (fixed_size_vector_ref)_conv_weights_quantization_factors.cpp_ids,
-               (fixed_size_vector_ref)_conv_output_calibration_factors.cpp_ids),
+          conv((primitive_id_arr)weights,
+              (primitive_id_arr)bias,
+              (primitive_id_arr)conv_w_quantization_factor,
+              (primitive_id_arr)conv_output_calibration_factors),
           eltw(eltw_output_calibration_factors),
           non_conv_scale(non_conv_scale),
-          _conv_weights(weights),
-          _conv_bias(bias),
-          _conv_weights_quantization_factors(conv_w_quantization_factor),
-          _conv_output_calibration_factors(conv_output_calibration_factors) {
+          conv_weights(weights),
+          conv_bias(bias),
+          conv_weights_quantization_factors(conv_w_quantization_factor),
+          conv_output_calibration_factors(conv_output_calibration_factors) {
         conv.input_quantization_factor = conv_i_quantization_factor;
         conv.output_quantization_factor = 1.0f;
 
@@ -107,52 +106,15 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise, CLDNN_PRIM
         }
     }
 
-    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{convolution}
-    fused_conv_eltwise(const dto* dto)
-        : primitive_base(dto),
-          conv((fixed_size_vector_ref) _conv_weights.cpp_ids,
-               (fixed_size_vector_ref) _conv_bias.cpp_ids,
-               (fixed_size_vector_ref) _conv_weights_quantization_factors.cpp_ids,
-               (fixed_size_vector_ref) _conv_output_calibration_factors.cpp_ids),
-          eltw(dto->eltw.output_calibration_factors),
-          _conv_weights(dto->conv.weights),
-          _conv_bias(dto->conv.bias),
-          _conv_weights_quantization_factors(dto->conv.weights_quantization_factors),
-          _conv_output_calibration_factors(dto->conv.output_calibration_factors),
-          _eltw_stride(tensor_vector_to_cldnn_vector(eltw.stride)) {
-        conv.input_quantization_factor = dto->conv.input_quantization_factor;
-        conv.output_quantization_factor = dto->conv.output_quantization_factor;
-        conv.input_offset = (tensor) dto->conv.input_offset;
-        conv.stride = (tensor)dto->conv.stride;
-        conv.dilation = (tensor)dto->conv.dilation;
-        conv.with_activation = dto->conv.with_activation != 0;
-        conv.activation_negative_slope = dto->conv.activation_negative_slope;
-        conv.with_output_size = dto->conv.with_output_size != 0;
-        conv.output_size = (tensor)dto->conv.output_size;
-
-        eltw.output_calibration_factors = dto->eltw.output_calibration_factors;
-        eltw.output_quantization_factor = dto->eltw.output_quantization_factor;
-        eltw.mode = static_cast<eltwise_mode>(dto->eltw.mode);
-        eltw.with_activation = dto->eltw.with_activation != 0;
-        eltw.activation_negative_slope = dto->eltw.activation_negative_slope;
-
-        non_conv_scale = dto->non_conv_scale;
-        second_input_in_output = dto->second_input_in_output;
-
-        if (!dto->conv.split || (conv.weights.size() != conv.bias.size() && conv.bias.size() != 0) ||
-            dto->conv.split != conv.weights.size())
-            throw std::invalid_argument("Invalid convolution dto: bad split value");
-    }
-
     struct conv_data {
         /// @brief List of primitive ids containing weights data.
-        fixed_size_vector_ref weights;
+        const primitive_id_arr weights;
         /// @brief List of primitive ids containing bias data.
-        fixed_size_vector_ref bias;
+        const primitive_id_arr bias;
         /// @brief List of primitive ids containing weights quanitization factors per output feature map.
-        fixed_size_vector_ref weights_quantization_factors;
+        const primitive_id_arr weights_quantization_factors;
         /// @brief List of primitive ids containing output quanitization factors per output feature map for convolution.
-        fixed_size_vector_ref output_calibration_factors;
+        const primitive_id_arr output_calibration_factors;
         /// @brief Input quantization factor for convolution
         float input_quantization_factor;
         /// @brief Output quantization factor for convolution
@@ -174,10 +136,10 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise, CLDNN_PRIM
         /// @brief User-defined output data size of the primitive (w/o padding).
         tensor output_size;
 
-        conv_data(const fixed_size_vector_ref& weights,
-                  const fixed_size_vector_ref& bias,
-                  const fixed_size_vector_ref& weights_quantization_factors,
-                  const fixed_size_vector_ref& output_calibration_factors)
+        conv_data(const primitive_id_arr& weights,
+                  const primitive_id_arr& bias,
+                  const primitive_id_arr& weights_quantization_factors,
+                  const primitive_id_arr& output_calibration_factors)
             : weights(weights),
               bias(bias),
               weights_quantization_factors(weights_quantization_factors),
@@ -197,7 +159,6 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise, CLDNN_PRIM
         float activation_negative_slope;
         /// @brief Defines shift in input buffers between adjacent calculations of output values.
         std::vector<tensor> stride;
-
         explicit eltw_data(const primitive_id& output_calibration_factors)
             : output_calibration_factors(output_calibration_factors) {}
     } eltw;
@@ -218,55 +179,26 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise, CLDNN_PRIM
     bool second_input_in_output = false;
 
 protected:
-    primitive_id_arr _conv_weights;
-    primitive_id_arr _conv_bias;
-    primitive_id_arr _conv_weights_quantization_factors;
-    primitive_id_arr _conv_output_calibration_factors;
-
-    std::vector<cldnn_tensor> _eltw_stride;
+    const primitive_id_arr conv_weights;
+    const primitive_id_arr conv_bias;
+    const primitive_id_arr conv_weights_quantization_factors;
+    const primitive_id_arr conv_output_calibration_factors;
 
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
         ret.reserve(conv.weights.size() + conv.bias.size() + conv.weights_quantization_factors.size() +
                     conv.output_calibration_factors.size() + (eltw.output_calibration_factors.empty() ? 0 : 1));
 
-        for (auto& w : conv.weights) ret.push_back(w);
-        for (auto& b : conv.bias) ret.push_back(b);
-        for (auto& q : conv.weights_quantization_factors) ret.push_back(q);
-        for (auto& q : conv.output_calibration_factors) ret.push_back(q);
+        for (auto& w : conv.weights) ret.push_back(std::ref(w));
+        for (auto& b : conv.bias) ret.push_back(std::ref(b));
+        for (auto& q : conv.weights_quantization_factors) ret.push_back(std::ref(q));
+        for (auto& q : conv.output_calibration_factors) ret.push_back(std::ref(q));
 
         if (!eltw.output_calibration_factors.empty())
             ret.push_back(eltw.output_calibration_factors);
 
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.conv.weights = _conv_weights.ref();
-        dto.conv.bias = _conv_bias.ref();
-        dto.conv.weights_quantization_factors = _conv_weights_quantization_factors.ref();
-        dto.conv.output_calibration_factors = _conv_output_calibration_factors.ref();
-        dto.conv.input_quantization_factor = conv.input_quantization_factor;
-        dto.conv.output_quantization_factor = conv.output_quantization_factor;
-        dto.conv.input_offset = conv.input_offset;
-        dto.conv.stride = conv.stride;
-        dto.conv.split = split();
-        dto.conv.with_activation = conv.with_activation;
-        dto.conv.activation_negative_slope = conv.activation_negative_slope;
-        dto.conv.dilation = conv.dilation;
-        dto.conv.with_output_size = conv.with_output_size;
-        dto.conv.output_size = conv.output_size;
-
-        dto.eltw.output_calibration_factors = eltw.output_calibration_factors.c_str();
-        dto.eltw.output_quantization_factor = eltw.output_quantization_factor;
-        dto.eltw.mode = static_cast<cldnn_eltwise_mode>(eltw.mode);
-        dto.eltw.with_activation = eltw.with_activation;
-        dto.eltw.activation_negative_slope = eltw.activation_negative_slope;
-        dto.eltw.stride = tensor_vector_to_arr(_eltw_stride);
-
-        dto.non_conv_scale = non_conv_scale;
-        dto.second_input_in_output = second_input_in_output;
-    }
 };
 /// @}
 /// @}
@@ -16,8 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/lstm_dynamic_input.h"
-#include "api/CPP/primitive.hpp"
+#include "api/primitive.hpp"
 #include <vector>
 
 namespace cldnn {
@@ -37,7 +36,7 @@ namespace cldnn {
 ///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
 ///   Ht = ot (.) h(Ct)
 /// Where f = Sigmoid, g = Tanh, and h = Tanh.
-struct lstm_dynamic_input : public primitive_base<lstm_dynamic_input, CLDNN_PRIMITIVE_DESC(lstm_dynamic_input)> {
+struct lstm_dynamic_input : public primitive_base<lstm_dynamic_input> {
     CLDNN_DECLARE_PRIMITIVE(lstm_dynamic_input)
 
     /// @brief Constructs lstm_dynamic layer.
@@ -55,10 +54,6 @@ struct lstm_dynamic_input : public primitive_base<lstm_dynamic_input, CLDNN_PRIM
                        const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), dyn_length(dyn_length), weights(weights), bias(bias) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{lstm_dynamic}
-    lstm_dynamic_input(const dto* dto)
-        : primitive_base(dto), dyn_length(dto->dyn_length), weights(dto->weights), bias(dto->bias) {}
-
     /// @brief Primitive id containing the dynamic sequence lengths.
     primitive_id dyn_length;
     /// @brief Primitive id containing weights data.
@@ -77,12 +72,6 @@ protected:
         }
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.dyn_length = dyn_length.c_str();
-        dto.weights = weights.c_str();
-        dto.bias = bias.c_str();
-    }
 };
 /// @}
 /// @}
@@ -16,8 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "../C/lstm_dynamic_timeloop.h"
-#include "api/CPP/primitive.hpp"
+#include "api/primitive.hpp"
 #include <vector>
 
 namespace cldnn {
@@ -38,7 +37,7 @@ namespace cldnn {
 ///   Ht = ot (.) h(Ct)
 /// Where f = Sigmoid, g = Tanh, and h = Tanh.
 struct lstm_dynamic_timeloop
-    : public primitive_base<lstm_dynamic_timeloop, CLDNN_PRIMITIVE_DESC(lstm_dynamic_timeloop)> {
+    : public primitive_base<lstm_dynamic_timeloop> {
     CLDNN_DECLARE_PRIMITIVE(lstm_dynamic_timeloop)
 
     /// @brief Constructs lstm_dynamic layer.
@@ -71,18 +70,6 @@ struct lstm_dynamic_timeloop
           clip(clip),
           input_forget(input_forget) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{lstm_dynamic}
-    lstm_dynamic_timeloop(const dto* dto)
-        : primitive_base(dto),
-          dyn_length(dto->dyn_length),
-          recurrent(dto->recurrent),
-          last_hidden_state(dto->last_hidden_state),
-          last_cell_state(dto->last_cell_state),
-          initial_hidden(dto->initial_hidden),
-          initial_cell(dto->initial_cell),
-          clip(dto->clip),
-          input_forget(dto->input_forget) {}
-
     /// @brief Primitive id containing the dynamic sequence lengths.
     primitive_id dyn_length;
     /// @brief Primitive id containing recurrent data.
@@ -120,17 +107,6 @@ protected:
         }
         return ret;
     }
-
-    void update_dto(dto& dto) const override {
-        dto.dyn_length = dyn_length.c_str();
-        dto.recurrent = recurrent.c_str();
-        dto.last_hidden_state = last_hidden_state.c_str();
-        dto.last_cell_state = last_cell_state.c_str();
-        dto.initial_hidden = initial_hidden.c_str();
-        dto.initial_cell = initial_cell.c_str();
-        dto.clip = clip;
-        dto.input_forget = input_forget;
-    }
 };
 /// @}
 /// @}
index b3c8170..34f9ae3 100644 (file)
@@ -42,32 +42,21 @@ intel_config_flag_apply_settings(CompilerOptions CMAKE_C_FLAGS ALL_PATTERN ""
 
 set(__CLDNN_Label__api                  "api")
 file(GLOB __CLDNN_Headers__api
-    "${CLDNN__API_DIR}/*.h"
     "${CLDNN__API_DIR}/*.hpp"
   )
 
 set(__CLDNN_Directory__api__cpp         "${CLDNN__API_DIR}/CPP")
 set(__CLDNN_Label__api__cpp             "${__CLDNN_Label__api}\\CPP")
 file(GLOB __CLDNN_Headers__api__cpp
-    "${__CLDNN_Directory__api__cpp}/*.h"
     "${__CLDNN_Directory__api__cpp}/*.hpp"
   )
 
-set(__CLDNN_Directory__api__c           "${CLDNN__API_DIR}/C")
-set(__CLDNN_Label__api__c               "${__CLDNN_Label__api}\\C")
-file(GLOB __CLDNN_Headers__api__c
-    "${__CLDNN_Directory__api__c}/*.h"
-    "${__CLDNN_Directory__api__c}/*.hpp"
-  )
-
 set(__CLDNN_Directory__test_builds      "${CLDNN__CODEGEN_DIR}/test_builds")
 set(__CLDNN_CGDirectory__test_builds    "${CLDNN__CODEGEN_BASEDIR}/test_builds")
 set(__CLDNN_Label__test_builds          "test builds\\codegen")
 set(__CLDNN_File__test_builds__api__cpp "api_cpp_test.cpp")
-set(__CLDNN_File__test_builds__api__c   "api_c_test.c")
 set(__CLDNN_Sources__test_builds
     "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api__cpp}"
-    "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api__c}"
   )
 
 set(__CLDNN_AllSources
@@ -93,20 +82,10 @@ foreach(__CLDNN_Header ${__CLDNN_Headers__api} ${__CLDNN_Headers__api__cpp})
 endforeach()
 file(APPEND "${__CLDNN_CGFile__api__cpp}" "\n")
 
-# C API testing (C99 compatibility).
-set(__CLDNN_CGFile__api__c "${__CLDNN_CGDirectory__test_builds}/${__CLDNN_File__test_builds__api__c}")
-file(WRITE "${__CLDNN_CGFile__api__c}" "// This file is auto-generated. Please, do not modify it directly.\n\n")
-foreach(__CLDNN_Header ${__CLDNN_Headers__api} ${__CLDNN_Headers__api__c})
-  string(REPLACE ";" "\;" __CLDNN_Header "${__CLDNN_Header}") # [WA#1] Must escape ; again if occurred in item.
-  file(APPEND "${__CLDNN_CGFile__api__c}" "#include \"${__CLDNN_Header}\"\n")
-endforeach()
-file(APPEND "${__CLDNN_CGFile__api__c}" "\n")
-
 # =============================================== Filters ==============================================
 
 source_group("${__CLDNN_Label__api}"         FILES ${__CLDNN_Headers__api})
 source_group("${__CLDNN_Label__api__cpp}"    FILES ${__CLDNN_Headers__api__cpp})
-source_group("${__CLDNN_Label__api__c}"      FILES ${__CLDNN_Headers__api__c})
 source_group("${__CLDNN_Label__test_builds}" FILES ${__CLDNN_Sources__test_builds})
 
 # ===================================== Include/Link directories =======================================
@@ -139,10 +118,5 @@ add_custom_command(OUTPUT "${__CLDNN_Directory__test_builds}/${__CLDNN_File__tes
     DEPENDS "${__CLDNN_CGFile__api__cpp}" ${__CLDNN_Headers__api} ${__CLDNN_Headers__api__cpp}
     COMMENT "Updating file if the file changed (${__CLDNN_File__test_builds__api__cpp}) ..."
   )
-add_custom_command(OUTPUT "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api__c}"
-    COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__CLDNN_CGFile__api__c}" "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api__c}"
-    DEPENDS "${__CLDNN_CGFile__api__c}" ${__CLDNN_Headers__api} ${__CLDNN_Headers__api__c}
-    COMMENT "Updating file if the file changed (${__CLDNN_File__test_builds__api__c}) ..."
-  )
 
 # ======================================================================================================
index ca2e1f5..46774fc 100644 (file)
@@ -154,7 +154,7 @@ set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME   "${CLDNN_BUILD
 
 
 target_link_libraries("${CLDNN_BUILD__PROJ}"
-    OpenCL
+    clDNN_OpenCL
   )
 
 if(WIN32)
index 7fec673..aca0319 100644 (file)
@@ -76,7 +76,8 @@ enum class KernelType {
     QUANTIZE,
     LSTM_DYNAMIC_INPUT,
     LSTM_DYNAMIC_TIMELOOP,
-    REDUCE
+    REDUCE,
+    GATHER_TREE
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
index 4f51308..dc9f680 100644 (file)
@@ -47,6 +47,7 @@ DataTensor::DataChannelArray DataTensor::dataChannelArray {{
     { DataLayout::fs_b_yx_fsv32,        {  0,  1, -1, -1,  3,  2 } },
     { DataLayout::b_fs_yx_32fp,         {  0,  1, -1, -1,  2,  3 } },
     { DataLayout::bfwzyx,               {  0,  1,  2,  3,  4,  5 } },
+    { DataLayout::bfzyx_f16,            {  0,  1,  2, -1,  3,  4 } },
 }};
 
 WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
@@ -76,6 +77,7 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
     { WeightsLayout::winograd_6x3_s1_fused_weights,          {  0,  1, -1,   2,   3, -1, -1 } },
     { WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb, {  0,  1, -1,   2,   3, -1, -1 } },
     { WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb, {  0,  1, -1,   2,   3, -1, -1 } },
+    { WeightsLayout::dlstm_dir_io,                           {  1,  0, -1,   2,   3, -1, -1 } },
     { WeightsLayout::os_is_yx_isa8_osv8_isv4,                {  0,  1, -1,   2,   3, -1, -1 } },
     { WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4,  {  0,  1, -1,   2,   3, -1, -1 } },
     { WeightsLayout::is_o_yx_isv32,                          {  1,  2, -1,   0,   3, -1, -1 } },
@@ -86,6 +88,8 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
     { WeightsLayout::os_is_yx_osv16_isv4,                    {  0,  1, -1,   2,   3, -1, -1 } },
     { WeightsLayout::oizyx,                                  {  0,  1,  2,   3,   4, -1, -1 } },
     { WeightsLayout::os_is_yx_osv32_isv32p,                  {  0,  1, -1,   2,   3, -1, -1 } },
+    { WeightsLayout::o_i_zyx_i16_o16,                        {  0,  1,  2,   3,   4, -1, -1 } },
+    { WeightsLayout::i_o_zyx_o16_i16,                        {  0,  1,  2,   4,   3, -1, -1 } },
 }};
 
 NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l) {
@@ -134,6 +138,11 @@ NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l) {
         case fs_b_yx_fsv32:
             assert(newDims.size() == 4);
             newDims[3] = RoundUp(newDims[3], 32);
+            break;
+        case bfzyx_f16:
+            assert(newDims.size() == 5);
+            newDims[3] = RoundUp(newDims[3], 16);
+            break;
         default:
             break;
     }
@@ -434,6 +443,16 @@ NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l
             assert(newDims.size() == 4);
             newDims[3] = RoundUp(newDims[3], 16);
             break;
+        case o_i_zyx_i16_o16:
+            assert(newDims.size() == 5);
+            newDims[3] = RoundUp(newDims[3], 16);
+            newDims[4] = RoundUp(newDims[4], 16);
+            break;
+        case i_o_zyx_o16_i16:
+            assert(newDims.size() == 5);
+            newDims[3] = RoundUp(newDims[3], 16);
+            newDims[4] = RoundUp(newDims[4], 16);
+            break;
         default:
             break;
     }
index 849074d..71361f7 100644 (file)
@@ -41,6 +41,7 @@ enum DataLayout {
     yxfb,                  // 3D+batch
     byxf,                  // 3D+batch
     fyxb,                  // 3D+batch
+    bfxy,                  // 3D+batch
     bfyx_f16,              // 3D+batch
     bs_f_bsv8__af8,        // for optimized FC
     bs_f_bsv16__af8,       // for optimized FC
@@ -54,6 +55,7 @@ enum DataLayout {
     fs_b_yx_fsv32,         // for FP16 kernels, 32 features to avoid partial writes
     b_fs_yx_32fp,          // bfyx with blocks of 16 packed binary input channels
     bfwzyx,                // batch, feature, 4D spatial
+    bfzyx_f16,             // batch, feature, 3D spatial. Blocks of 16 input channels
     DataLayoutCount        // NUMBER OF ELEMENTS IN ENUM
 };
 
@@ -88,6 +90,7 @@ enum WeightsLayout {
                                              // 3x3 with stride 1
     image_2d_weights_winograd_6x3_s1_xfbyb,  // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter
                                              // 3x3 with stride 1
+    dlstm_dir_io,                            // dlstm weights layout direction, input_size, 4* hiden_size
     os_is_yx_isa8_osv8_isv4,                 // for MMAD convolution
     os_is_yx_isa8_osv8_isv4_swizzled_by_4,   // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28,
                                              // 1,5...
@@ -99,7 +102,9 @@ enum WeightsLayout {
     bf_lyx_yx,                           // local convolution
     os_is_yx_osv16_isv4,                 // swizzled weights for convolution using IMAD
     oizyx,
-    os_is_yx_osv32_isv32p,  // 2 blocks: 16 packed binary in channels and 16 output channels
+    os_is_yx_osv32_isv32p,  // 2 blocks: 32 packed binary in channels and 32 output channels
+    o_i_zyx_i16_o16,
+    i_o_zyx_o16_i16,
     WeightsLayoutCount      // NUMBER OF ELEMENTS IN ENUM
 };
 
@@ -142,6 +147,7 @@ inline bool SimpleLayout(WeightsLayout l) {
         case WeightsLayout::iyxo:
         case WeightsLayout::yxio:
         case WeightsLayout::oizyx:
+        case WeightsLayout::dlstm_dir_io:
             return true;
         default:
             return false;
@@ -176,6 +182,15 @@ inline bool IsImageType(WeightsLayout l) {
     }
 }
 
+inline bool IsDynamicLSTMType(WeightsLayout l) {
+    switch (l) {
+    case WeightsLayout::dlstm_dir_io:
+        return true;
+    default:
+        return false;
+    }
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Tensor Exaplnation
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
index a881bf1..4058675 100644 (file)
@@ -59,7 +59,7 @@ JitConstants ActivationKernelBase::GetJitConstants(const activation_params& para
     const auto& inputNlParams = params.inputActivationParams;
 
     jit.AddConstants({
-        MakeJitConstant("PARAMS_NUM", GetActivationAdditionalParamsNumber(params.activation.function)),
+        MakeJitConstant("PARAMS_NUM", GetActivationAdditionalParamsNumber(params.activations[0].function)),
     });
 
     if (!inputNlParams.empty()) {
index 632d4c3..43c6c20 100644 (file)
 #include "activation_kernel_selector.h"
 #include "activation_kernel_opt.h"
 #include "activation_kernel_ref.h"
-#include "activation_kernel_tutorial.h"
 
 namespace kernel_selector {
 activation_kernel_selector::activation_kernel_selector() {
     Attach<ActivationKernelRef>();
     Attach<ActivationKernelOpt>();
-    Attach<ActivationKernel_Tutorial>();
 }
 
 KernelsData activation_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.cpp
deleted file mode 100644 (file)
index 290d39d..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "activation_kernel_tutorial.h"
-#include "kernel_selector_utils.h"
-
-namespace kernel_selector {
-
-// Step 0:
-//
-// take a look on activaion_kernel_tutorial.h
-
-ParamsKey ActivationKernel_Tutorial::GetSupportedKey() const {
-    // Step 1:
-    // - Update the features supported by the kernel below
-
-    ParamsKey k;
-
-    // Supported data type
-    k.EnableInputDataType(Datatype::F16);
-    k.EnableInputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::F16);
-    k.EnableOutputDataType(Datatype::F32);
-
-    // Supported layout
-    k.EnableAllInputLayout();
-    k.EnableAllOutputLayout();
-
-    // Supported tensor offset/pitch/padding
-    k.EnableTensorOffset();
-    k.EnableTensorPitches();
-    k.EnableBatching();
-
-    // Supported activation extra data
-    k.EnableActivationAdditionalParamsAsInput();
-
-    return k;
-}
-
-#ifdef BASIC_TUTORIAL
-
-KernelsData ActivationKernel_Tutorial::GetKernelsData(const Params& /*params*/, const optional_params& /*options*/) const {
-    return {};
-
-    // Step 2:
-    // - Uncomment and update the following lines
-
-    // assert(params.GetType() == KernelType::ACTIVATION && options.GetType() == KernelType::ACTIVATION);
-    //
-    // const uint32_t numOfkernels = 1;
-    // KernelData kd = KernelData::Default<activation_params>(params, numOfkernels);
-    // activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
-    // const activation_optional_params& optParams = static_cast<const activation_optional_params&>(options);
-    // auto& kernel = kd.kernels[0];
-
-    // Step 3:
-    // - fill "kernel.kernelString"
-    //   - fill "kernel.kernelString->str"                  - the source of the kernel.
-    //     please use "db.get(kernelName)" in case you use "*.cl" file which located under "kernel_selector\core\cl_kernels\".
-    //   - fill "kernel.kernelString->jit"                  - Dynamic jit of this params.
-    //   - fill "kernel.kernelString->options"              - options which pass to cl program build functions (like "-cl-no-subgroup-ifp")
-    //   - fill "kernel.kernelString->entry_point"          - kernel entry point
-    //   - fill "kernel.kernelString->batch_compilation"    - A flag that allow clDNN kernel to compile this kernel as a part of a program
-    //                                                        NOTE: this can only be used if you prevent symbol conflicts with other kernels
-    //                                                        (#undef is done automatically by clDNN)
-
-    // Step 4:
-    // - fill "kernel.WorkGroupSizes" - local/global work group sizes for OpenCL kernel
-
-    // Step 5:
-    // - fill "kernel.arguments" - which describe the argument of the kernel.
-    //   in this tutorial you can use:
-    //     kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); // "0" mean index of the input in case of multiple inputs.
-    //     kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
-    //
-    //   in case that you have more than one kernel, you probably need an intermediate buffers.
-    //   in order to support that you have to describe the buffer size in kd.internalBufferSizes and add a kernel argument like:
-    //     kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, <index to kd.internalBufferSize> });
-
-    // Step 6:
-    // - estimate the kernel's execution time. currently it's under development so please use FORCE_PRIORITY_<X> - lower is better.
-
-    // return{ kd };
-}
-
-#else
-
-ActivationKernel_Tutorial::Parent::DispatchData ActivationKernel_Tutorial::SetDefault(const activation_params& params) const {
-    auto runInfo = Parent::SetDefault(params);
-
-    // Step 2:
-    //
-    // Init Dispatchdata, and set kernel effiecncy
-    runInfo.effiency = TUTORIAL_PRIORITY;
-
-    return runInfo;
-}
-
-bool ActivationKernel_Tutorial::Validate(const Params& p, const optional_params& o) const {
-    if (!Parent::Validate(p, o)) {
-        return false;
-    }
-
-    // Step 3:
-    //
-    // Validate this kernel support params and optional params. use:
-    // const activation_params& params = static_cast<const activation_params&>(p);
-    // const activation_optional_params& options = static_cast<const activation_optional_params&>(o);
-
-    return true;
-}
-
-JitConstants ActivationKernel_Tutorial::GetJitConstants(const activation_params& params, DispatchData runInfo) const {
-    auto jit = Parent::GetJitConstants(params, runInfo);
-    jit.AddConstant(MakeJitConstant("ADVANCED_TUTORIAL", ""));
-
-    // Step 4:
-    //
-    // Add you own jit constants. for example
-    // jit.AddConstant(MakeJitConstant("<MY_CONST>", <my val>));
-    // - "my val" can be most of KernelSelector/C++ common types
-
-    return jit;
-}
-
-KernelsData ActivationKernel_Tutorial::GetKernelsData(const Params& params, const optional_params& options) const {
-    return GetCommonKernelsData(params, options);
-}
-
-#endif
-}  // namespace kernel_selector
\ No newline at end of file
index 7ced10b..1119b59 100644 (file)
@@ -114,6 +114,9 @@ JitConstants BinaryConvolutionKernel1x1::GetFusedPrimitivesJitConstants(const bi
                                                                         const DispatchData& /*kd*/) const {
     JitConstants jit = {};
 
+    FusedOpsConfiguration conf = {"", {"b", "(f_block*16 + oc)", "y", "x"}, "res", 1, false, false, true, false };
+    jit.Merge(MakeFusedOpsDeclsJitConstants(params, {conf}));
+
     size_t op_id = 0;
     std::string input_decls = "";
     std::string eltwise_fused_ops = "";
@@ -131,98 +134,92 @@ JitConstants BinaryConvolutionKernel1x1::GetFusedPrimitivesJitConstants(const bi
             return "(intel_sub_group_shuffle(" + var + ", " + lid + "))";
         };
 
-        std::string op_type = "";
-        std::string op_prefix = "FUSED_OP_" + std::to_string(op_id) + "_INPUT";
+        std::string data_type = fused_dep.GetInputTypeName(0, 1);
+        std::string vec_data_type = fused_dep.GetInputTypeName(0, 2);
+        std::string sc = "sc" + std::to_string(op_id);
+        std::string sh = "sh" + std::to_string(op_id);
         switch (fused_dep.type) {
             case binary_convolution_params::fused_operation_desc::Type::SCALE: {
-                op_type = "scale";
-                std::string data_type = op_prefix + "0_TYPE";
-                std::string vec_data_type = "MAKE_VECTOR_TYPE(" + data_type + ", 2)";
                 std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
-
                 if (fused_dep.tensors.size() == 1) {
-                    std::string var_name = op_type + std::to_string(op_id) + "_scales";
-                    prepare_data += vec_data_type + var_name + cast_type +
-                                    get_aligned_load2(op_type + "_input0", "f_block*OC_BLOCK_SIZE") + ";";
-                    eltwise_fused_ops += data_type + " sc = (oc < 16) ? " + get_shuffle(var_name + ".s0", "oc") +
-                                         " : " + get_shuffle(var_name + ".s1", "oc") + ";";
-                    eltwise_fused_ops += "res = res*sc;";
+                    std::string var_name = fused_dep.GetInputVarName(0);
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var_name + " = " + cast_type +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
+                    eltwise_fused_ops += "\\\n\t" + data_type + " " + sc + " = (oc < 16) ? " +
+                                    get_shuffle(var_name + ".s0", "oc") + " : " + get_shuffle(var_name + ".s1", "oc") + ";";
+                    eltwise_fused_ops += "\\\n\tres = res*" + sc + ";";
                 } else {
-                    std::string var0_name = op_type + std::to_string(op_id) + "_scales";
-                    std::string var1_name = op_type + std::to_string(op_id) + "_shifts";
-                    prepare_data += vec_data_type + " " + var0_name + " = " + cast_type +
-                                    get_aligned_load2(op_type + "_input0", "f_block*OC_BLOCK_SIZE") + ";";
-                    prepare_data += vec_data_type + " " + var1_name + " = " + cast_type +
-                                    get_aligned_load2(op_type + "_input1", "f_block*OC_BLOCK_SIZE") + ";";
-                    eltwise_fused_ops += data_type + " sc = (oc < 16) ? " + get_shuffle(var0_name + ".s0", "oc") +
-                                         " : " + get_shuffle(var0_name + ".s1", "oc") + ";";
-                    eltwise_fused_ops += data_type + " sh = (oc < 16) ? " + get_shuffle(var1_name + ".s0", "oc") +
-                                         " : " + get_shuffle(var1_name + ".s1", "oc") + ";";
-                    eltwise_fused_ops += "res = res*sc + sh;";
+                    std::string var0_name = fused_dep.GetInputVarName(0);
+                    std::string var1_name = fused_dep.GetInputVarName(1);
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var0_name + " = " + cast_type +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var1_name + " = " + cast_type +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(1), "f_block*OC_BLOCK_SIZE") + ";";
+                    eltwise_fused_ops += "\\\n\t" + data_type + " " + sc +" = (oc < 16) ? " +
+                                    get_shuffle(var0_name + ".s0", "oc") + " : " + get_shuffle(var0_name + ".s1", "oc") + ";";
+                    eltwise_fused_ops += "\\\n\t" + data_type + " " + sh + " = (oc < 16) ? " +
+                                    get_shuffle(var1_name + ".s0", "oc") + " : " + get_shuffle(var1_name + ".s1", "oc") + ";";
+                    eltwise_fused_ops += "\\\n\tres = res*" + sc + " + " + sh + ";";
                 }
 
                 break;
             }
             case binary_convolution_params::fused_operation_desc::Type::QUANTIZE: {
-                op_type = "quantize";
-                std::string data_type = op_prefix + "0_TYPE";
-                std::string vec_data_type = "MAKE_VECTOR_TYPE(" + data_type + ", 2)";
-                std::string cast_type_in = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
-
-                std::string var_name_in = op_type + std::to_string(op_id) + "_threshold";
-                std::string var_name_out = op_type + std::to_string(op_id) + "_out";
-                prepare_data += vec_data_type + " " + var_name_in + " = " + cast_type_in +
-                                get_aligned_load2(op_type + "_input0", "f_block*OC_BLOCK_SIZE") + ";";
-                prepare_data += "int packed_res = 0;";
+                std::string var_name_in = fused_dep.GetInputVarName(0);
+                std::string var_name_out = fused_dep.GetInputVarName(3);
+                std::string cast_type_vec = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
+                std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float" : "as_half";
+
+                prepare_data += "\\\n\tint packed_res = 0;";
+                if (fused_dep.tensors[0].Feature().v == params.output.Feature().v) {
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var_name_in + " = " + cast_type_vec +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
+                    eltwise_fused_ops += "\\\n\t" + data_type + " thresh = (oc < 16) ? " + get_shuffle(var_name_in + ".s0", "oc") +
+                                         " : " + get_shuffle(var_name_in + ".s1", "oc") + ";";
+                } else {
+                    prepare_data += "\\\n\t" + data_type + " " + var_name_in + " = " + cast_type +
+                                 + "(" + fused_dep.GetInputPtrName(0) + "[0]);";
+                    eltwise_fused_ops += "\\\n\t" + data_type + " thresh = " + var_name_in + ";";
+                }
 
-                eltwise_fused_ops += data_type + " thresh = (oc < 16) ? " + get_shuffle(var_name_in + ".s0", "oc") +
-                                     " : " + get_shuffle(var_name_in + ".s1", "oc") + ";";
 
                 if (fused_dep.tensors[2].Feature().v == params.output.Feature().v) {
                     // Per-channel output value
-                    std::string cast_type_out = (fused_dep.tensors[3].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
-                    prepare_data += vec_data_type + " " + var_name_out + " = " + cast_type_out +
-                                    get_aligned_load2(op_type + "_input3", "f_block*OC_BLOCK_SIZE") + ";";
-                    eltwise_fused_ops += data_type + " out_val = (oc < 16) ? " + get_shuffle(var_name_out + ".s0", "oc") +
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var_name_out + " = " + cast_type_vec +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(3), "f_block*OC_BLOCK_SIZE") + ";";
+                    eltwise_fused_ops +="\\\n\t" + data_type + " out_val = (oc < 16) ? " + get_shuffle(var_name_out + ".s0", "oc") +
                                          " : " + get_shuffle(var_name_out + ".s1", "oc") + ";";
                 } else {
                     // Per-tensor output value
-                    std::string cast_type_out = (fused_dep.tensors[3].GetDType() == Datatype::F32) ? "as_float" : "as_half";
-                    prepare_data += data_type + " " + var_name_out + " = " + cast_type_out +
-                                    +"(" + op_type + "_input3[0]);";
-                    eltwise_fused_ops += data_type + " out_val = " + var_name_out + ";";
+                    prepare_data += "\\\n\t" + data_type + " " + var_name_out + " = " + cast_type +
+                                    + "(" + fused_dep.GetInputPtrName(3) + "[0]);";
+                    eltwise_fused_ops += "\\\n\t" + data_type + " out_val = " + var_name_out + ";";
                 }
-                eltwise_fused_ops += "if (out_val == 1) ";
-                eltwise_fused_ops += "packed_res |= (res > thresh) << oc;";
-                eltwise_fused_ops += "else ";
-                eltwise_fused_ops += "packed_res |= (res <= thresh) << oc;";
+                eltwise_fused_ops += "\\\n\tif (out_val == 1) ";
+                eltwise_fused_ops += "\\\n\t\tpacked_res |= (res > thresh) << oc;";
+                eltwise_fused_ops += "\\\n\telse ";
+                eltwise_fused_ops += "\\\n\t\tpacked_res |= (res <= thresh) << oc;";
 
                 break;
             }
+            case binary_convolution_params::fused_operation_desc::Type::ACTIVATION: {
+                break;
+            }
             default:
                 throw std::invalid_argument("Invalid fused op in binary_convolution kernel: " + params.layerID);
         }
 
-        for (size_t op_input_id = 0; op_input_id < fused_dep.tensors.size(); op_input_id++) {
-            std::string name = op_prefix + std::to_string(op_input_id);
-            jit.AddConstant(MakeJitConstant(name, fused_dep.tensors[op_input_id]));
-            input_decls += "const __global " + toCLType(fused_dep.tensors[op_input_id].GetDType()) + "* " + op_type +
-                           "_input" + std::to_string(op_input_id) + ",";
-        }
-
         if (fused_dep.activation.function != ActivationFunction::NONE) {
-            std::string temp_op_type = op_type;
-            for (auto& ch : temp_op_type) ch = static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
-            std::string suffix = "_" + temp_op_type;
+            auto suffix = "_FUSED_OP" + std::to_string(op_id);
 
             jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
-            eltwise_fused_ops += "res = ACTIVATION" + suffix + "(res, ACTIVATION_PARAMS" + suffix + ");";
+            eltwise_fused_ops += "\\\n\tres = ACTIVATION" + suffix + "((OUTPUT_TYPE)res, ACTIVATION_PARAMS" + suffix + ");";
         }
         op_id++;
     }
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
     jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
     jit.AddConstant(MakeJitConstant("FUSED_OPS_PREPARE_DATA", prepare_data));
+    jit.AddConstant(MakeJitConstant("CUSTOM_FUSED_OPS", true));
 
     return jit;
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp
new file mode 100644 (file)
index 0000000..40f3582
--- /dev/null
@@ -0,0 +1,190 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include <iostream>
+#include "binary_convolution_kernel_1x1_b_fs_yx_fsv16.h"
+#include <string>
+
+namespace kernel_selector {
+
+static const int sub_group_size = 16;
+static const int ic_pack_size = 32;
+static const int xy_block_size = 16;
+
+ParamsKey BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::BINARY);
+    k.EnableInputWeightsType(WeightsType::BINARY);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::INT32);
+    k.EnableInputLayout(DataLayout::b_fs_yx_32fp);
+    k.EnableOutputLayout(DataLayout::bfyx_f16);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableDifferentTypes();
+    return k;
+}
+
+BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1_b_fs_yx_fsv16::SetDefault(
+    const binary_convolution_params& params,
+    int) const {
+    DispatchData kd = BinaryConvolutionKernelBase::SetDefault(params);
+
+    const auto& out = params.output;
+
+    auto x = out.X().v;
+    auto y = out.Y().v;
+    auto f = out.Feature().v;
+    auto b = out.Batch().v;
+
+    kd.gws0 = Align(x * y, sub_group_size);
+    kd.gws1 = CeilDiv(f, sub_group_size);  // 1 WI calcs 16 OC
+    kd.gws2 = b;
+
+    kd.lws0 = sub_group_size;
+    kd.lws1 = 1;
+    kd.lws2 = 1;
+
+    kd.effiency = FORCE_PRIORITY_1;
+
+    return kd;
+}
+
+bool BinaryConvolutionKernel1x1_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const {
+    if (!BinaryConvolutionKernelBase::Validate(p, o) || !CovolutionBinaryCheckInput(p, o))
+        return false;
+
+    const auto& params = static_cast<const binary_convolution_params&>(p);
+
+    const auto& input = params.inputs[0];
+    const auto& output = params.output;
+
+    const bool bOutputSizes = output.X().v != input.X().v || output.Y().v != input.Y().v;
+    const bool bFilterSize = params.filterSize.x != 1 || params.filterSize.y != 1;
+    const bool bStride = params.stride.x != 1 || params.stride.y != 1;
+    const bool bGroups = params.split > 1 || params.groups > 1 || params.depthwise_separable_opt;
+
+    if (bOutputSizes || bFilterSize || bStride || bGroups)
+        return false;
+
+    return true;
+}
+
+JitConstants BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetJitConstants(const binary_convolution_params& params,
+                                                         const DispatchData& runInfo) const {
+    auto jit = Parent::GetJitConstants(params, runInfo);
+
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
+    jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size)));
+    jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_PACKED", CeilDiv(params.output.Feature().v, ic_pack_size)));
+    jit.AddConstant(MakeJitConstant("PADDED_INPUT", params.inputs[0].X().pad.Total() != 0));
+    jit.AddConstant(MakeJitConstant("PADDED_OUTPUT", params.output.X().pad.Total() != 0));
+    jit.AddConstant(MakeJitConstant("XY_BLOCK_SIZE", xy_block_size));
+    if (params.inputs[0].Feature().v % ic_pack_size) {
+        jit.AddConstant(MakeJitConstant("LEFTOVERS_IC", params.inputs[0].Feature().v % ic_pack_size));
+        jit.AddConstant(MakeJitConstant("FILTER_MASK",
+                                        (0xFFFFFFFF >> (ic_pack_size - params.inputs[0].Feature().v % ic_pack_size))));
+    }
+
+    if (params.output.Feature().v % 32 != 0) {
+        jit.AddConstant(MakeJitConstant("LEFTOVERS_OC", true));
+    }
+
+    if (params.output.GetDType() == Datatype::BINARY) {
+        jit.AddConstant(MakeJitConstant("BINARY_PACKED_OUTPUT", 1));
+    }
+
+    return jit;
+}
+
+JitConstants BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetFusedPrimitivesJitConstants(const binary_convolution_params& params,
+                                                                                      const DispatchData& /*kd*/) const {
+    JitConstants jit = {};
+
+    FusedOpsConfiguration conf = {"", {"b", "(f_block*16 + oc)", "y", "x"}, "res", 1, false, false, true, false };
+    jit.Merge(MakeFusedOpsDeclsJitConstants(params, {conf}));
+
+    size_t op_id = 0;
+    std::string input_decls = "";
+    std::string eltwise_fused_ops = "";
+    std::string prepare_data = "";
+    for (auto& fused_dep : params.fused_ops) {
+        auto get_aligned_load = [&](std::string ptr, std::string byte_offset) -> std::string {
+            if (fused_dep.tensors[0].GetDType() == Datatype::F32)
+                return "(intel_sub_group_block_read((const __global uint*)(" + ptr + ") + (" + byte_offset + ")))";
+            else
+                return "(intel_sub_group_block_read_us((const __global ushort*)(" + ptr + ") + (" + byte_offset +
+                       ")))";
+        };
+
+        auto get_shuffle = [&](std::string var, std::string lid) -> std::string {
+            return "(intel_sub_group_shuffle(" + var + ", " + lid + "))";
+        };
+
+        std::string data_type = fused_dep.GetInputTypeName(0, 1);
+        std::string vec_data_type = fused_dep.GetInputTypeName(0, 1);
+        std::string sc = "sc" + std::to_string(op_id);
+        switch (fused_dep.type) {
+            case binary_convolution_params::fused_operation_desc::Type::SCALE: {
+                std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float" : "as_half";
+                if (fused_dep.tensors.size() == 1) {
+                    std::string var_name = fused_dep.GetInputVarName(0);
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var_name + " = " + cast_type +
+                                    get_aligned_load(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
+                    eltwise_fused_ops += "\\\n\t" + data_type + " " + sc + " = " + get_shuffle(var_name, "oc") + ";";
+                    eltwise_fused_ops += "\\\n\tres = res*" + var_name + ";";
+                } else {
+                    std::string var0_name = fused_dep.GetInputVarName(0);
+                    std::string var1_name = fused_dep.GetInputVarName(1);
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var0_name + " = " + cast_type +
+                                    get_aligned_load(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
+                    prepare_data += "\\\n\t" + vec_data_type + " " + var1_name + " = " + cast_type +
+                                    get_aligned_load(fused_dep.GetInputPtrName(1), "f_block*OC_BLOCK_SIZE") + ";";
+                    eltwise_fused_ops += "\\\n\tres = res*" + var0_name + " + " + var1_name + ";";
+                }
+
+                break;
+            }
+            case binary_convolution_params::fused_operation_desc::Type::ACTIVATION: {
+                break;
+            }
+            default:
+                throw std::invalid_argument("Invalid fused op in binary_convolution kernel: " + params.layerID);
+        }
+
+        if (fused_dep.activation.function != ActivationFunction::NONE) {
+            auto suffix = "_FUSED_OP" + std::to_string(op_id);
+
+            jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
+            eltwise_fused_ops += "\\\n\tres = ACTIVATION" + suffix + "((OUTPUT_TYPE)res, ACTIVATION_PARAMS" + suffix + ");";
+        }
+        op_id++;
+    }
+    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
+    jit.AddConstant(MakeJitConstant("FUSED_OPS_PREPARE_DATA", prepare_data));
+    jit.AddConstant(MakeJitConstant("CUSTOM_FUSED_OPS", true));
+
+    return jit;
+}
+
+KernelsData BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetTunedKernelsDataByIndex(params, options);
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h
new file mode 100644 (file)
index 0000000..2306960
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include "binary_convolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class BinaryConvolutionKernel1x1_b_fs_yx_fsv16 : public BinaryConvolutionKernelBase {
+public:
+    using Parent = BinaryConvolutionKernelBase;
+
+    BinaryConvolutionKernel1x1_b_fs_yx_fsv16() : BinaryConvolutionKernelBase("binary_convolution_gpu_1x1_b_fs_yx_fsv16") {}
+    virtual ~BinaryConvolutionKernel1x1_b_fs_yx_fsv16() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    std::vector<WeightsLayout> GetSupportedWeightLayouts(const binary_convolution_params&) const override {
+        return {WeightsLayout::os_is_yx_osv32_isv32p};
+    }
+    JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params,
+                                                const DispatchData& kd) const override;
+    bool Validate(const Params& p, const optional_params& o) const override;
+    DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override;
+    JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& kd) const override;
+};
+}  // namespace kernel_selector
index feaebfb..f0704a3 100644 (file)
@@ -48,39 +48,18 @@ bool BinaryConvolutionKernelBase::Validate(const Params& p, const optional_param
 
 JitConstants BinaryConvolutionKernelBase::GetJitConstants(const binary_convolution_params& params,
                                                           const DispatchData& kd) const {
-    JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params);
-    mem_consts.Merge(GetFusedPrimitivesJitConstants(params, kd));
-    const auto& padding = params.padding;
-    const auto& input = params.inputs[0];
+    JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
+    jit.Merge(GetFusedPrimitivesJitConstants(params, kd));
 
-    int64_t input_offset_with_padding =
-        (int64_t)input.GetFirstElementOffset() - padding.x * input.X().pitch - input.Y().pitch * padding.y;
-    input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
-
-    mem_consts.AddConstants({
+    jit.AddConstants({
         MakeJitConstant("STRIDE", params.stride),
         MakeJitConstant("PADDING", params.padding),
         MakeJitConstant("DILATION", params.dilation),
-        MakeJitConstant("FILTER_ARRAY_NUM", params.split),
-        MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
-        MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", params.depthwise_separable_opt),
     });
 
-    std::vector<uint32_t> unrollLoopParams{params.filterSize.x,
-                                           params.filterSize.y,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDX,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDY,
-                                           (uint32_t)kd.gemmStyle.globalWorkSizeDZ,
-                                           (uint32_t)kd.gemmStyle.subBlockDimM,
-                                           (uint32_t)kd.gemmStyle.subBlockDimK,
-                                           (uint32_t)kd.gemmStyle.subBlockDimN};
-
-    auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end());
-
-    JitConstants mem_consts_loop = MakeLoopUnrollParamsJitConstants(loopCount);
-    mem_consts.Merge(mem_consts_loop);
+    jit.Merge(MakeTypeJitConstants(params.out_dt, "CONV_RESULT"));
 
-    return mem_consts;
+    return jit;
 }
 
 JitConstants BinaryConvolutionKernelBase::GetFusedPrimitivesJitConstants(const binary_convolution_params& /*params*/,
@@ -100,36 +79,6 @@ bool BinaryConvolutionKernelBase::CheckWorkGroups(const BinaryConvolutionKernelB
     return true;
 }
 
-namespace {
-bool CheckTensorForSplit(const DataTensor& t, uint32_t split) {
-    if (t.PitchesDifferFromLogicalDims()) {
-        auto feature = t.Feature();
-        auto featureIndex = DataTensor::Channelndex(t.GetLayout(), Tensor::DataChannelName::FEATURE);
-        if (featureIndex >= 0 && featureIndex + 1 < static_cast<int>(DataTensor::ChannelsCount(t.GetLayout()))) {
-            if (feature.v * split <= t.GetDims()[featureIndex + 1].pitch) {
-                Tensor::NDims newDims = t.GetDims();
-                newDims[featureIndex].v = feature.v * split;
-
-                DataTensor newTensor{newDims,
-                                     t.GetDType(),
-                                     t.GetLayout(),
-                                     t.GetViewOffset(),
-                                     t.PhysicalSize(),
-                                     t.GetPaddedVal()};
-
-                if (newTensor.PitchesDifferFromLogicalDims() == false) {
-                    return true;
-                }
-            }
-        }
-
-        return false;
-    }
-
-    return true;
-}
-}  // namespace
-
 BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefault(
     const binary_convolution_params& params,
     int) const {
index 012bfc6..ed7b79f 100644 (file)
@@ -113,8 +113,10 @@ JitConstants BinaryConvolutionKernelGeneric::GetFusedPrimitivesJitConstants(cons
                                                                             const DispatchData& /*kd*/) const {
     JitConstants jit = {};
 
+    FusedOpsConfiguration conf = {"", {"b", "(f_block*16 + i)", "y", "x"}, "res", 1, false, false, true, false };
+    jit.Merge(MakeFusedOpsDeclsJitConstants(params, {conf}));
+
     size_t op_id = 0;
-    std::string input_decls = "";
     std::string eltwise_fused_ops = "";
     std::string channel_pack_fused_ops = "";
     std::string prepare_data = "";
@@ -126,111 +128,111 @@ JitConstants BinaryConvolutionKernelGeneric::GetFusedPrimitivesJitConstants(cons
                 return "(intel_sub_group_block_read_us2((const __global ushort*)(" + ptr + ") + (" + byte_offset +
                        ")))";
         };
-
-        std::string op_type = "";
-        std::string op_prefix = "FUSED_OP_" + std::to_string(op_id) + "_INPUT";
+        std::string data_type = fused_dep.GetInputTypeName(0, 1);
+        std::string vec_data_type = fused_dep.GetInputTypeName(0, 2);
+        std::string sc = "sc" + std::to_string(op_id);
+        std::string sh = "sh" + std::to_string(op_id);
         switch (fused_dep.type) {
             case binary_convolution_params::fused_operation_desc::Type::SCALE: {
-                op_type = "scale";
-                std::string data_type = op_prefix + "0_TYPE";
-                std::string vec_data_type = "MAKE_VECTOR_TYPE(" + data_type + ", 2)";
                 std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
-
                 if (fused_dep.tensors.size() == 1) {
-                    std::string var_name = op_type + std::to_string(op_id) + "_scales";
-                    prepare_data += vec_data_type + var_name + cast_type +
-                                    get_aligned_load2(op_type + "_input0", "f_block*OC_BLOCK_SIZE") + ";";
-                    eltwise_fused_ops += data_type + " sc = (i < 16) ? " + var_name + ".s0" + " : " + var_name + ".s1;";
-                    eltwise_fused_ops += "res = res*sc;";
+                    std::string var_name = fused_dep.GetInputVarName(0);
+                    prepare_data += vec_data_type + " " + var_name + " = " + cast_type +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
+                    eltwise_fused_ops += data_type + " " + sc + " = (i < 16) ? " + var_name + ".s0" + " : " + var_name + ".s1;";
+                    eltwise_fused_ops += "res = res*" + sc +";";
                 } else {
-                    std::string var0_name = op_type + std::to_string(op_id) + "_scales";
-                    std::string var1_name = op_type + std::to_string(op_id) + "_shifts";
+                    std::string var0_name = fused_dep.GetInputVarName(0);
+                    std::string var1_name = fused_dep.GetInputVarName(1);
                     prepare_data += vec_data_type + " " + var0_name + " = " + cast_type +
-                                    get_aligned_load2(op_type + "_input0", "f_block*OC_BLOCK_SIZE") + ";";
+                                    get_aligned_load2(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
                     prepare_data += vec_data_type + " " + var1_name + " = " + cast_type +
-                                    get_aligned_load2(op_type + "_input1", "f_block*OC_BLOCK_SIZE") + ";";
+                                    get_aligned_load2(fused_dep.GetInputPtrName(1), "f_block*OC_BLOCK_SIZE") + ";";
                     eltwise_fused_ops +=
-                        data_type + " sc = (i < 16) ? " + var0_name + ".s0" + " : " + var0_name + ".s1;";
+                        data_type + " " + sc + " = (i < 16) ? " + var0_name + ".s0" + " : " + var0_name + ".s1;";
                     eltwise_fused_ops +=
-                        data_type + " sh = (i < 16) ? " + var1_name + ".s0" + " : " + var1_name + ".s1;";
-                    eltwise_fused_ops += "res = res*sc + sh;";
+                        data_type + " " + sh + " = (i < 16) ? " + var1_name + ".s0" + " : " + var1_name + ".s1;";
+                    eltwise_fused_ops += "res = res*" + sc + " + " + sh + ";";
                 }
                 break;
             }
             case binary_convolution_params::fused_operation_desc::Type::QUANTIZE: {
-                op_type = "quantize";
-                std::string data_type = op_prefix + "0_TYPE";
-                std::string vec_data_type = "MAKE_VECTOR_TYPE(" + data_type + ", 2)";
-                std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
+                std::string var_name_in = fused_dep.GetInputVarName(0);
+                std::string var_name_out = fused_dep.GetInputVarName(3);
+                std::string cast_type_vec = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
+                std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float" : "as_half";
+
+                if (fused_dep.tensors[0].Feature().v == params.output.Feature().v) {
+                    prepare_data += vec_data_type + " " + var_name_in + " = " + cast_type_vec +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";";
+                } else {
+                    prepare_data += data_type + " " + var_name_in + " = " + cast_type +
+                                 + "(" + fused_dep.GetInputPtrName(0) + "[0]);";
+                }
 
-                std::string var_name_in = op_type + std::to_string(op_id) + "_threshold";
-                std::string var_name_out = op_type + std::to_string(op_id) + "_out_val";
-                prepare_data += vec_data_type + " " + var_name_in + " = " + cast_type +
-                                get_aligned_load2(op_type + "_input0", "f_block*OC_BLOCK_SIZE") + ";";
                 if (fused_dep.tensors[2].Feature().v == params.output.Feature().v) {
-                    std::string cast_type_out = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2";
-                    prepare_data += vec_data_type + " " + var_name_out + " = " + cast_type_out +
-                                    get_aligned_load2(op_type + "_input3", "f_block*OC_BLOCK_SIZE") + ";";
+                    prepare_data += vec_data_type + " " + var_name_out + " = " + cast_type_vec +
+                                    get_aligned_load2(fused_dep.GetInputPtrName(3), "f_block*OC_BLOCK_SIZE") + ";";
                 } else {
-                    std::string cast_type_out = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float" : "as_half";
-                    prepare_data += vec_data_type + " " + var_name_out + " = " + cast_type_out +
-                                    "(" + op_type + "_input3[0]);";
+                    prepare_data += data_type + " " + var_name_out + " = " + cast_type +
+                                    "(" + fused_dep.GetInputPtrName(3)+"[0]);";
                 }
 
-                channel_pack_fused_ops += "for (int i = 0; i < 16; i++) {";
+                std::string var_in_s0 = fused_dep.tensors[0].Feature().v == params.output.Feature().v ? var_name_in + ".s0" : var_name_in;
+                std::string var_in_s1 = fused_dep.tensors[0].Feature().v == params.output.Feature().v ? var_name_in + ".s1" : var_name_in;
+
+                std::string var_out_s0 = fused_dep.tensors[3].Feature().v == params.output.Feature().v ? var_name_out + ".s0" : var_name_out;
+                std::string var_out_s1 = fused_dep.tensors[3].Feature().v == params.output.Feature().v ? var_name_out + ".s1" : var_name_out;
+
+                channel_pack_fused_ops += "\\\n\tfor (int i = 0; i < 16; i++) {";
+                channel_pack_fused_ops += "\\\n\tint ch0, ch1;";
                 if (fused_dep.tensors[2].Feature().v == params.output.Feature().v) {
-                    channel_pack_fused_ops += "if ("+ var_name_out+ ".s0 == UNIT_VAL_ONE) ";
-                    channel_pack_fused_ops += "int ch0 = dst[0*SUB_GROUP_SIZE + i] > " + var_name_in + ".s0 ? (1 << lid) : 0;";
-                    channel_pack_fused_ops += "else ";
-                    channel_pack_fused_ops += "int ch0 = dst[0*SUB_GROUP_SIZE + i] <= " + var_name_in + ".s0 ? (1 << lid) : 0;";
-                    channel_pack_fused_ops += "if ("+ var_name_out+ ".s1 == UNIT_VAL_ONE) ";
-                    channel_pack_fused_ops += "int ch1 = dst[1*SUB_GROUP_SIZE + i] > " + var_name_in + ".s1 ? "
+                    channel_pack_fused_ops += "\\\n\tif ("+ var_out_s0 + " == UNIT_VAL_ONE) ";
+                    channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] > " + var_in_s0 + " ? (1 << lid) : 0;";
+                    channel_pack_fused_ops += "\\\n\telse ";
+                    channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] <= " + var_in_s0 + " ? (1 << lid) : 0;";
+                    channel_pack_fused_ops += "\\\n\tif ("+ var_out_s1 + " == UNIT_VAL_ONE) ";
+                    channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] > " + var_in_s1 + " ? "
                                                          "(1 << (SUB_GROUP_SIZE + lid)) : 0;";
-                    channel_pack_fused_ops += "else ";
-                    channel_pack_fused_ops += "int ch1 = dst[1*SUB_GROUP_SIZE + i] <= " + var_name_in + ".s1 ? "
+                    channel_pack_fused_ops += "\\\n\telse ";
+                    channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] <= " + var_in_s1 + " ? "
                                                          "(1 << (SUB_GROUP_SIZE + lid)) : 0;";
                 } else {
-                    channel_pack_fused_ops += "if ("+ var_name_out+ " == UNIT_VAL_ONE) {";
-                    channel_pack_fused_ops += "int ch0 = dst[0*SUB_GROUP_SIZE + i] > " + var_name_in + ".s0 ? (1 << lid) : 0;";
-                    channel_pack_fused_ops += "int ch1 = dst[1*SUB_GROUP_SIZE + i] > " + var_name_in + ".s1 ? "
+                    channel_pack_fused_ops += "\\\n\tif ("+ var_out_s0 + " == UNIT_VAL_ONE) {";
+                    channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] > " + var_in_s0 + " ? (1 << lid) : 0;";
+                    channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] > " + var_in_s1 + " ? "
                                                          "(1 << (SUB_GROUP_SIZE + lid)) : 0;";
-                    channel_pack_fused_ops += "} else {";
-                    channel_pack_fused_ops += "int ch0 = dst[0*SUB_GROUP_SIZE + i] > " + var_name_in + ".s0 ? (1 << lid) : 0;";
-                    channel_pack_fused_ops += "int ch1 = dst[1*SUB_GROUP_SIZE + i] > " + var_name_in + ".s1 ? "
+                    channel_pack_fused_ops += "\\\n\t} else {";
+                    channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] <= " + var_in_s0 + " ? (1 << lid) : 0;";
+                    channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] <= " + var_in_s1 + " ? "
                                                                                                        "(1 << (SUB_GROUP_SIZE + lid)) : 0;";
-                    channel_pack_fused_ops += "}";
+                    channel_pack_fused_ops += "\\\n\t}";
                 }
-                channel_pack_fused_ops += "int packed = ch0 + ch1;";
-                channel_pack_fused_ops += "packed_out[i] = sub_group_reduce_add(packed);";
-                channel_pack_fused_ops += "}";
+                channel_pack_fused_ops += "\\\n\tint packed = ch0 + ch1;";
+                channel_pack_fused_ops += "\\\n\tpacked_out[i] = sub_group_reduce_add(packed);";
+                channel_pack_fused_ops += "\\\n\t}";
 
                 break;
             }
+            case binary_convolution_params::fused_operation_desc::Type::ACTIVATION: {
+                break;
+            }
             default:
                 throw std::invalid_argument("Invalid fused op in binary_convolution kernel: " + params.layerID);
         }
 
-        for (size_t op_input_id = 0; op_input_id < fused_dep.tensors.size(); op_input_id++) {
-            std::string name = op_prefix + std::to_string(op_input_id);
-            jit.AddConstant(MakeJitConstant(name, fused_dep.tensors[op_input_id]));
-            input_decls += "const __global " + toCLType(fused_dep.tensors[op_input_id].GetDType()) + "* " + op_type +
-                           "_input" + std::to_string(op_input_id) + ",";
-        }
-
         if (fused_dep.activation.function != ActivationFunction::NONE) {
-            std::string temp_op_type = op_type;
-            for (auto& ch : temp_op_type) ch = static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
-            std::string suffix = "_" + temp_op_type;
+            auto suffix = "_FUSED_OP" + std::to_string(op_id);
 
             jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
-            eltwise_fused_ops += "res = ACTIVATION" + suffix + "(res, ACTIVATION_PARAMS" + suffix + ");";
+            eltwise_fused_ops += "\\\n\tres = ACTIVATION" + suffix + "((OUTPUT_TYPE)res, ACTIVATION_PARAMS" + suffix + ");";
         }
         op_id++;
     }
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
     jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
     jit.AddConstant(MakeJitConstant("DO_CHANNEL_PACK_OPS", channel_pack_fused_ops));
     jit.AddConstant(MakeJitConstant("FUSED_OPS_PREPARE_DATA", prepare_data));
+    jit.AddConstant(MakeJitConstant("CUSTOM_FUSED_OPS", true));
 
     return jit;
 }
index be1c2ae..1427a07 100644 (file)
@@ -100,47 +100,8 @@ JitConstants BinaryConvolutionKernelRef::GetFusedPrimitivesJitConstants(const bi
                                                                         const DispatchData& /*kd*/) const {
     JitConstants jit = {};
 
-    size_t op_id = 0;
-    std::string input_decls = "";
-    std::string eltwise_fused_ops = "";
-    for (auto& fused_dep : params.fused_ops) {
-        std::string op_type = "";
-        switch (fused_dep.type) {
-            case binary_convolution_params::fused_operation_desc::Type::SCALE: {
-                op_type = "scale";
-                // Variables that are supposed to be defined:
-                // f (int) - index of output feature channel
-                // res (float, half) - results of layer without any fusions
-                if (fused_dep.tensors.size() == 1) {
-                    eltwise_fused_ops += "res = (res*" + op_type + "_input0[f]);";
-                } else {
-                    eltwise_fused_ops += "res = (res*" + op_type + "_input0[f] + " + op_type + "_input1[f]);";
-                }
-                break;
-            }
-            default:
-                throw std::invalid_argument("Invalid fused op in binary_convolution kernel: " + params.layerID);
-        }
-
-        for (size_t op_input_id = 0; op_input_id < fused_dep.tensors.size(); op_input_id++) {
-            std::string name = "FUSED_OP_" + std::to_string(op_id) + "_INPUT" + std::to_string(op_input_id);
-            jit.AddConstant(MakeJitConstant(name, fused_dep.tensors[op_input_id]));
-            input_decls += "const __global " + toCLType(fused_dep.tensors[op_input_id].GetDType()) + "* " + op_type +
-                           "_input" + std::to_string(op_input_id) + ",";
-        }
-
-        if (fused_dep.activation.function != ActivationFunction::NONE) {
-            std::string temp_op_type = op_type;
-            for (auto& ch : temp_op_type) ch = static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
-            std::string suffix = "_" + temp_op_type;
-
-            jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
-            eltwise_fused_ops += "res = ACTIVATION" + suffix + "(res, ACTIVATION_PARAMS" + suffix + ");";
-        }
-        op_id++;
-    }
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
+    FusedOpsConfiguration conf = {"", {"b", "f", "y", "x"}, "res", 1, false, false, true, false };
+    jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
 
     return jit;
 }
index b8f065b..2232212 100644 (file)
 #include "binary_convolution_kernel_ref.h"
 #include "binary_convolution_kernel_generic.h"
 #include "binary_convolution_kernel_1x1.h"
+#include "binary_convolution_kernel_1x1_b_fs_yx_fsv16.h"
 
 namespace kernel_selector {
 binary_convolution_kernel_selector::binary_convolution_kernel_selector() {
     Attach<BinaryConvolutionKernel1x1>();
+    Attach<BinaryConvolutionKernel1x1_b_fs_yx_fsv16>();
     Attach<BinaryConvolutionKernelGeneric>();
     Attach<BinaryConvolutionKernelRef>();
 }
index 3ca2e59..4042e74 100644 (file)
@@ -26,32 +26,18 @@ namespace kernel_selector {
 // binary_convolution_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct binary_convolution_params : public weight_bias_params {
-    struct fused_operation_desc {
-        enum class Type : uint8_t {
-            SCALE = 0,
-            QUANTIZE,
-            UNDEFINED
-        };
-
-        Type type;
-        size_t dep_idx_start;
-        size_t dep_size;
-        MultiDataTensor tensors;
-        kernel_selector::base_activation_params activation;
-    };
-
     binary_convolution_params() : weight_bias_params(KernelType::BINARY_CONVOLUTION) {}
 
     uSize filterSize;
     uSize stride;
     uSize dilation;
     uSize padding;
+    Datatype out_dt = Datatype::UNSUPPORTED;
     uint32_t split = 1;
     bool depthwise_separable_opt = false;
     float pad_value = 0.0f;
     uint32_t groups = 1;
 
-    std::vector<fused_operation_desc> fused_ops = {};
     std::string to_string() const override;
     ParamsKey GetParamsKey() const override;
 };
index 78d9933..631ad66 100644 (file)
@@ -35,7 +35,7 @@ BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params&
 
     kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 
-    std::vector<size_t> global{output.X().v, output.Y().v, output.Batch().v * output.Feature().v};
+    std::vector<size_t> global{output.X().v * output.Z().v, output.Y().v * output.W().v, output.Batch().v * output.Feature().v};
     const auto& local = GetOptimalLocalWorkGroupSizes(global);
 
     kd.gws0 = global[0];
index 2c46e8e..7880139 100644 (file)
@@ -31,10 +31,14 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableInputLayout(DataLayout::yxfb);
     k.EnableInputLayout(DataLayout::byxf);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfwzyx);
 
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::yxfb);
     k.EnableOutputLayout(DataLayout::byxf);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfwzyx);
 
     k.EnableTensorOffset();
     k.EnableTensorPitches();
index bdd02a5..c9d1c1e 100644 (file)
@@ -43,6 +43,8 @@ ParamsKey ConcatenationKernel_simple_Ref::GetSupportedKey() const {
     k.EnableOutputLayout(DataLayout::bfzyx);
     k.EnableInputLayout(DataLayout::bfwzyx);
     k.EnableOutputLayout(DataLayout::bfwzyx);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
@@ -63,10 +65,14 @@ bool ConcatenationKernel_simple_Ref::Validate(const Params& p, const optional_pa
 
     const concatenation_params& params = static_cast<const concatenation_params&>(p);
 
-    // all inputs have to have same layout
+    // all inputs have to have same layout (exept bfzyx and bfzyx_f16)
     auto same_layout = params.inputs[0].GetLayout();
     for (const auto& lt : params.inputs) {
-        if (lt.GetLayout() != same_layout) {
+        auto cur_layout = lt.GetLayout();
+        if ((cur_layout == DataLayout::bfzyx && same_layout == DataLayout::bfzyx_f16) ||
+            (cur_layout == DataLayout::bfzyx_f16 && same_layout == DataLayout::bfzyx)) {
+            continue;
+        } else if (cur_layout != same_layout) {
             return false;
         }
     }
index ad3f1d0..e5bb4e8 100644 (file)
@@ -23,7 +23,7 @@ namespace kernel_selector {
 // contract_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct contract_params : public base_params {
-    contract_params() : base_params(KernelType::CONTRACT) {}
+    contract_params() : base_params(KernelType::CONTRACT), mode(ContractMode::ANY) {}
     ContractMode mode;
     std::vector<uint16_t> reduction_axes;
 };
index 6e54d64..e56bfc9 100644 (file)
@@ -293,21 +293,26 @@ bool CheckConvolutionPaddedInputDesc(const convolution_params& params, const Dat
 
 static DataTensor GetConvolutionBFYXPaddedTensor(const convolution_params& cp) {
     assert(cp.inputs.size() >= 1);
-    assert(cp.inputs[0].GetDims().size() == 4U);
+    auto ndims = cp.inputs[0].GetDims().size();
 
     DataTensor t = cp.inputs[0];
-    std::vector<Tensor::Pad> pad{{0, 0}, {0, 0}, {0, 0}, {0, 0}};
+    std::vector<Tensor::Pad> pad{{0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} };
 
     pad[0].before = cp.padding.x;
     pad[1].before = cp.padding.y;
+    pad[2].before = cp.padding.z;
+
 
     const auto inputLimitX = (cp.output.X().v - 1) * cp.stride.x + (cp.filterSize.x - 1) * cp.dilation.x + 1;
     const auto inputLimitY = (cp.output.Y().v - 1) * cp.stride.y + (cp.filterSize.y - 1) * cp.dilation.y + 1;
+    const auto inputLimitZ = (cp.output.Z().v - 1) * cp.stride.z + (cp.filterSize.z - 1) * cp.dilation.z + 1;
+
 
     pad[0].after = (size_t)std::max(static_cast<int>(inputLimitX) - static_cast<int>(t.X().v) - static_cast<int>(pad[0].before), static_cast<int>(0));
     pad[1].after = (size_t)std::max(static_cast<int>(inputLimitY) - static_cast<int>(t.Y().v) - static_cast<int>(pad[1].before), static_cast<int>(0));
+    pad[2].after = (size_t)std::max(static_cast<int>(inputLimitZ) - static_cast<int>(t.Z().v) - static_cast<int>(pad[2].before), static_cast<int>(0));
 
-    Tensor::NDims dims(4);
+    Tensor::NDims dims(ndims);
     const Tensor::NDims& orgDims = cp.inputs[0].GetDims();
     size_t pitch = 1;
     for (size_t i = 0; i < dims.size(); i++) {
@@ -379,10 +384,9 @@ KernelsData ConvolutionKernelBase::GetKernelsDataForAutoTune(const Params& param
     return res;
 }
 
-JitConstants ConvolutionKernelBase::GetFusedPrimitivesJitConstants(const convolution_params& params,
+JitConstants ConvolutionKernelBase::GetFusedPrimitivesJitConstants(const convolution_params& /*params*/,
                                                                    const DispatchData& /*kd*/) const {
-    JitConstants jit = {};
-    return jit;
+    return {};
 }
 
 }  // namespace kernel_selector
index a3f91e4..3132273 100644 (file)
@@ -100,4 +100,4 @@ JitConstants ConvolutionKernel_bfyx_1x1::GetJitConstants(const convolution_param
 KernelsData ConvolutionKernel_bfyx_1x1::GetKernelsData(const Params& params, const optional_params& options) const {
     return GetTunedKernelsDataByIndex(params, options);
 }
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
index 61bbb2f..5a60d58 100644 (file)
@@ -128,6 +128,13 @@ JitConstants ConvolutionKernel_bfyx_f16::GetJitConstants(const convolution_param
     auto jit = Parent::GetJitConstants(params, runInfo);
 
     auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    if (params.fused_ops.size() > 0) {
+        FusedOpsConfiguration conf_vec = {"_VEC", {"b", "(f_block*16)", "y", "x"}, "dst", blockWidth, true, false, true, false };
+        FusedOpsConfiguration conf_scalar = {"_SCALAR", {"b", "(f_block*16)", "y", "(x+i)"}, "dst[i]", 1, true, false, true, false };
+        jit.Merge(MakeFusedOpsJitConstants(params, {conf_vec, conf_scalar}));
+        jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
+        jit.Merge(MakeTypeJitConstants(Datatype::F16, "half"));
+    }
 
     size_t input_line_size = std::min(params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1)*params.dilation.x + 1,
                                       input.X().v + input.X().pad.Total());
@@ -176,84 +183,4 @@ KernelsData ConvolutionKernel_bfyx_f16::GetKernelsDataForAutoTune(const Params&
     return res;
 }
 
-JitConstants ConvolutionKernel_bfyx_f16::GetFusedPrimitivesJitConstants(const convolution_params& params,
-                                                                        const DispatchData& kd) const {
-    JitConstants jit = {};
-
-    size_t op_id = 0;
-    std::string input_decls = "";
-    std::string load_decls_vec = "";
-    std::string load_decls = "";
-    std::string eltwise_fused_ops_vec = "";
-    std::string eltwise_fused_ops = "";
-
-    auto make_jit_vector_type = [](std::string tensor_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-        if (vec_size > 1)
-            return "MAKE_VECTOR_TYPE(" + tensor_name + "_TYPE," + std::to_string(vec_size) + ")";
-        else
-            return tensor_name + "_TYPE";
-    };
-
-    auto make_jit_load = [](std::string tensor_name, std::string ptr_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-
-        std::string index_func_call_vec = tensor_name + "_GET_INDEX(b, f_block*16, y, x)";
-        std::string index_func_call = tensor_name + "_GET_INDEX(b, f_block*16, y, x+i)";
-        if (vec_size > 1)
-            return " UNIT_BLOCK_READ" + std::to_string(vec_size) + "(" + ptr_name + ", " + index_func_call_vec + ")";
-        else
-            return " UNIT_BLOCK_READ(" + ptr_name + ", " + index_func_call + ")";
-    };
-
-    for (auto& fused_dep : params.fused_ops) {
-        std::string op_type = "";
-        switch (fused_dep.type) {
-            case convolution_params::fused_operation_desc::Type::ELTWISE: {
-                op_type = "eltwise";
-                eltwise_fused_ops_vec += "dst = (dst + " + op_type + "_data);";
-                eltwise_fused_ops += "dst[i] = (dst[i] + " + op_type + "_data);";
-                break;
-            }
-            default:
-                throw std::invalid_argument("Invalid fused op in binary_convolution kernel: " + params.layerID);
-        }
-
-        for (size_t op_input_id = 0; op_input_id < fused_dep.tensors.size(); op_input_id++) {
-            std::string name = "FUSED_OP_" + std::to_string(op_id) + "_INPUT" + std::to_string(op_input_id);
-            std::string ptr_name = op_type + "_input" + std::to_string(op_input_id);
-
-            std::string var_name = op_type + "_data";
-            jit.AddConstant(MakeJitConstant(name, fused_dep.tensors[op_input_id]));
-            input_decls += "const __global " + toCLType(fused_dep.tensors[op_input_id].GetDType()) +
-                           "* " + ptr_name + ",";
-            load_decls_vec += make_jit_vector_type(name, kd.cldnnStyle.blockWidth) + " " + var_name + " = " +
-                              make_jit_load(name, ptr_name, kd.cldnnStyle.blockWidth) + ";";
-            load_decls += make_jit_vector_type(name, 1) + " " + var_name + " = " +
-                          make_jit_load(name, ptr_name, 1) + ";";
-        }
-
-        if (fused_dep.activation.function != ActivationFunction::NONE) {
-            std::string temp_op_type = op_type;
-            for (auto& ch : temp_op_type)
-                ch = static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
-            std::string suffix = "_" + temp_op_type;
-
-            jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
-            eltwise_fused_ops_vec += "dst = ACTIVATION"+suffix+"(dst, ACTIVATION_PARAMS"+suffix+");";
-            eltwise_fused_ops += "dst[i] = ACTIVATION"+suffix+"(dst[i], ACTIVATION_PARAMS"+suffix+");";
-        }
-        op_id++;
-    }
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA_VEC", load_decls_vec));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA", load_decls));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS_VEC", eltwise_fused_ops_vec));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
-
-    return jit;
-}
-
 }  // namespace kernel_selector
index 62fc584..f4ce971 100644 (file)
@@ -45,7 +45,6 @@ protected:
     bool Validate(const Params& p, const optional_params& o) const override;
     DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
-    JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& kd) const override;
 
 private:
     struct AutoTuneOption {
index bbd5365..061941a 100644 (file)
@@ -120,6 +120,13 @@ JitConstants ConvolutionKernel_bfyx_f16_1x1::GetJitConstants(const convolution_p
     auto jit = Parent::GetJitConstants(params, runInfo);
 
     auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    if (params.fused_ops.size() > 0) {
+        FusedOpsConfiguration conf_vec = {"_VEC", {"b", "(f_block*16)", "y", "x"}, "dst", blockWidth, true, false, true, false };
+        FusedOpsConfiguration conf_scalar = {"_SCALAR", {"b", "(f_block*16)", "yi", "xi"}, "dst[i]", 1, true, false, true, false };
+        jit.Merge(MakeFusedOpsJitConstants(params, {conf_vec, conf_scalar}));
+        jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
+        jit.Merge(MakeTypeJitConstants(Datatype::F16, "half"));
+    }
 
     jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
     jit.AddConstant(MakeJitConstant("PADDED_INPUT", params.inputs[0].X().pad.Total() != 0));
@@ -138,86 +145,6 @@ JitConstants ConvolutionKernel_bfyx_f16_1x1::GetJitConstants(const convolution_p
     return jit;
 }
 
-JitConstants ConvolutionKernel_bfyx_f16_1x1::GetFusedPrimitivesJitConstants(const convolution_params& params,
-                                                                            const DispatchData& kd) const {
-    JitConstants jit = {};
-
-    size_t op_id = 0;
-    std::string input_decls = "";
-    std::string load_decls_vec = "";
-    std::string load_decls = "";
-    std::string eltwise_fused_ops_vec = "";
-    std::string eltwise_fused_ops = "";
-
-    auto make_jit_vector_type = [](std::string tensor_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-        if (vec_size > 1)
-            return "MAKE_VECTOR_TYPE(" + tensor_name + "_TYPE," + std::to_string(vec_size) + ")";
-        else
-            return tensor_name + "_TYPE";
-    };
-
-    auto make_jit_load = [](std::string tensor_name, std::string ptr_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-
-        std::string index_func_call_vec = tensor_name + "_GET_INDEX(b, f_block*16, y, x)";
-        std::string index_func_call = tensor_name + "_GET_INDEX(b, f_block*16, yi, xi)";
-        if (vec_size > 1)
-            return " UNIT_BLOCK_READ" + std::to_string(vec_size) + "(" + ptr_name + ", " + index_func_call_vec + ")";
-        else
-            return " UNIT_BLOCK_READ(" + ptr_name + ", " + index_func_call + ")";
-    };
-
-    for (auto& fused_dep : params.fused_ops) {
-        std::string op_type = "";
-        switch (fused_dep.type) {
-            case convolution_params::fused_operation_desc::Type::ELTWISE: {
-                op_type = "eltwise" + std::to_string(op_id);
-                eltwise_fused_ops_vec += "dst = (dst + " + op_type + "_data);";
-                eltwise_fused_ops += "dst[i] = (dst[i] + " + op_type + "_data);";
-                break;
-            }
-            default:
-                throw std::invalid_argument("Invalid fused op in convolution kernel: " + params.layerID);
-        }
-
-        for (size_t op_input_id = 0; op_input_id < fused_dep.tensors.size(); op_input_id++) {
-            std::string name = "FUSED_OP_" + std::to_string(op_id) + "_INPUT" + std::to_string(op_input_id);
-            std::string ptr_name = op_type + "_input" + std::to_string(op_input_id);
-
-            std::string var_name = op_type + "_data";
-            jit.AddConstant(MakeJitConstant(name, fused_dep.tensors[op_input_id]));
-            input_decls += "const __global " + toCLType(fused_dep.tensors[op_input_id].GetDType()) +
-                           "* " + ptr_name + ",";
-            load_decls_vec += make_jit_vector_type(name, kd.cldnnStyle.blockWidth) + " " + var_name + " = " +
-                              make_jit_load(name, ptr_name, kd.cldnnStyle.blockWidth) + ";";
-            load_decls += make_jit_vector_type(name, 1) + " " + var_name + " = " +
-                          make_jit_load(name, ptr_name, 1) + ";";
-        }
-
-        if (fused_dep.activation.function != ActivationFunction::NONE) {
-            std::string temp_op_type = op_type;
-            for (auto& ch : temp_op_type)
-                ch = static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
-            std::string suffix = "_" + temp_op_type;
-
-            jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
-            eltwise_fused_ops_vec += "dst = ACTIVATION"+suffix+"(dst, ACTIVATION_PARAMS"+suffix+");";
-            eltwise_fused_ops += "dst[i] = ACTIVATION"+suffix+"(dst[i], ACTIVATION_PARAMS"+suffix+");";
-        }
-        op_id++;
-    }
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA_VEC", load_decls_vec));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA", load_decls));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS_VEC", eltwise_fused_ops_vec));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
-
-    return jit;
-}
-
 KernelsData ConvolutionKernel_bfyx_f16_1x1::GetKernelsData(const Params& params, const optional_params& options) const {
     return GetCommonKernelsData(params, options, DEFAULT, -1);
 }
index 0ed826a..0c220a0 100644 (file)
@@ -38,7 +38,6 @@ protected:
     bool Validate(const Params& p, const optional_params& o) const override;
     DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
-    JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& kd) const override;
 
     struct AutoTuneOption {
         size_t blockWidth;
index 6bb08e7..ec25cb3 100644 (file)
@@ -49,7 +49,7 @@ bool ConvolutionKernel_bfyx_f16_depthwise::Validate(const Params& p, const optio
     if (!cp.depthwise_separable_opt || (cp.inputs[0].Feature().v != cp.split && cp.inputs[0].Feature().v != cp.groups))
         return false;
 
-    if (cp.filterSize.x != 3 || cp.filterSize.y != 3 || cp.inputs[0].Batch().v != 1)
+    if (cp.filterSize.x != 3 || cp.filterSize.y != 3)
         return false;
 
     if (cp.stride.x != 1 && cp.stride.x != 2)
@@ -82,8 +82,18 @@ JitConstants ConvolutionKernel_bfyx_f16_depthwise::GetJitConstants(const convolu
                                                                    const DispatchData& kd) const {
     auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
 
+    const auto block_width = 8;
+
+    if (params.fused_ops.size() > 0) {
+        FusedOpsConfiguration conf_vec = {"_VEC", {"b", "(f_block*16)", "y", "x"}, "dst", block_width, true, false, true, false };
+        FusedOpsConfiguration conf_scalar = {"_SCALAR", {"b", "(f_block*16)", "y", "(x+i)"}, "dst[i]", 1, true, false, true, false };
+        jit.Merge(MakeFusedOpsJitConstants(params, {conf_vec, conf_scalar}));
+        jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
+        jit.Merge(MakeTypeJitConstants(Datatype::F16, "half"));
+    }
+
     jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
-    jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.output.X().v, 8)));
+    jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.output.X().v, block_width)));
     jit.AddConstant(MakeJitConstant("IC_BLOCK", feature_block_size));
     if (params.output.Feature().v % feature_block_size != 0) {
         jit.AddConstant(MakeJitConstant("OUTPUT_LEFTOVERS", 1));
@@ -97,83 +107,4 @@ KernelsData ConvolutionKernel_bfyx_f16_depthwise::GetKernelsData(const Params& p
     return GetCommonKernelsData(params, options);
 }
 
-JitConstants ConvolutionKernel_bfyx_f16_depthwise::GetFusedPrimitivesJitConstants(const convolution_params& params,
-                                                                                  const DispatchData& /*kd*/) const {
-    JitConstants jit = {};
-
-    size_t op_id = 0;
-    std::string input_decls = "";
-    std::string load_decls_vec = "";
-    std::string load_decls = "";
-    std::string eltwise_fused_ops_vec = "";
-    std::string eltwise_fused_ops = "";
-
-    auto make_jit_vector_type = [](std::string tensor_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-        if (vec_size > 1)
-            return "MAKE_VECTOR_TYPE(" + tensor_name + "_TYPE," + std::to_string(vec_size) + ")";
-        else
-            return tensor_name + "_TYPE";
-    };
-
-    auto make_jit_load = [](std::string tensor_name, std::string ptr_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-
-        std::string index_func_call_vec = tensor_name + "_GET_INDEX(b, f_block*16, y, x)";
-        std::string index_func_call = tensor_name + "_GET_INDEX(b, f_block*16, y, (x+i))";
-        if (vec_size > 1)
-            return " UNIT_BLOCK_READ" + std::to_string(vec_size) + "(" + ptr_name + ", " + index_func_call_vec + ")";
-        else
-            return " UNIT_BLOCK_READ(" + ptr_name + ", " + index_func_call + ")";
-    };
-
-    for (auto& fused_dep : params.fused_ops) {
-        std::string op_type = "";
-        switch (fused_dep.type) {
-            case convolution_params::fused_operation_desc::Type::ELTWISE: {
-                op_type = "eltwise";
-                eltwise_fused_ops_vec += "dst = (dst + " + op_type + "_data);";
-                eltwise_fused_ops += "dst[i] = (dst[i] + " + op_type + "_data);";
-                break;
-            }
-            default:
-                throw std::invalid_argument("Invalid fused op in convolution kernel: " + params.layerID);
-        }
-
-        for (size_t op_input_id = 0; op_input_id < fused_dep.tensors.size(); op_input_id++) {
-            std::string name = "FUSED_OP_" + std::to_string(op_id) + "_INPUT" + std::to_string(op_input_id);
-            std::string ptr_name = op_type + "_input" + std::to_string(op_input_id);
-
-            std::string var_name = op_type + "_data";
-            jit.AddConstant(MakeJitConstant(name, fused_dep.tensors[op_input_id]));
-            input_decls += "const __global " + toCLType(fused_dep.tensors[op_input_id].GetDType()) +
-                           "* " + ptr_name + ",";
-            load_decls_vec += make_jit_vector_type(name, 8) + " " + var_name + " = " +
-                              make_jit_load(name, ptr_name, 8) + ";";
-            load_decls += make_jit_vector_type(name, 1) + " " + var_name + " = " +
-                          make_jit_load(name, ptr_name, 1) + ";";
-        }
-
-        if (fused_dep.activation.function != ActivationFunction::NONE) {
-            std::string temp_op_type = op_type;
-            for (auto& ch : temp_op_type)
-                ch = static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
-            std::string suffix = "_" + temp_op_type;
-
-            jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
-            eltwise_fused_ops_vec += "dst = ACTIVATION"+suffix+"(dst, ACTIVATION_PARAMS"+suffix+");";
-            eltwise_fused_ops += "dst[i] = ACTIVATION"+suffix+"(dst[i], ACTIVATION_PARAMS"+suffix+");";
-        }
-        op_id++;
-    }
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA_VEC", load_decls_vec));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA", load_decls));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS_VEC", eltwise_fused_ops_vec));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
-
-    return jit;
-}
 }  // namespace kernel_selector
index d98f159..c7a7eff 100644 (file)
@@ -38,6 +38,5 @@ protected:
     bool NeedPaddedInput() const override { return true; }
     JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
     DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
-    JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& kd) const override;
 };
 }  // namespace kernel_selector
index 523c9f4..7da4391 100644 (file)
@@ -131,6 +131,14 @@ JitConstants ConvolutionKernel_bfyx_to_bfyx_f16::GetJitConstants(const convoluti
 
     auto blockWidth = runInfo.cldnnStyle.blockWidth;
 
+    if (params.fused_ops.size() > 0) {
+        FusedOpsConfiguration conf_vec = {"_VEC", {"b", "(f_block*16)", "y", "x"}, "dst", blockWidth, true, false, true, false };
+        FusedOpsConfiguration conf_scalar = {"_SCALAR", {"b", "(f_block*16)", "y", "(x+i)"}, "dst[i]", 1, true, false, true, false };
+        jit.Merge(MakeFusedOpsJitConstants(params, {conf_vec, conf_scalar}));
+        jit.Merge(MakeTypeJitConstants(Datatype::F32, "float"));
+        jit.Merge(MakeTypeJitConstants(Datatype::F16, "half"));
+    }
+
     size_t input_line_size = std::min(params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1)*params.dilation.x + 1,
                                       input.X().v + input.X().pad.Total());
     size_t input_block_size = CeilDiv(input_line_size * params.filterSize.y, sub_group_size);
@@ -162,86 +170,6 @@ KernelsData ConvolutionKernel_bfyx_to_bfyx_f16::GetKernelsData(const Params& par
     return GetTunedKernelsDataByIndex(params, options);
 }
 
-JitConstants ConvolutionKernel_bfyx_to_bfyx_f16::GetFusedPrimitivesJitConstants(const convolution_params& params,
-                                                                                const DispatchData& kd) const {
-    JitConstants jit = {};
-
-    size_t op_id = 0;
-    std::string input_decls = "";
-    std::string load_decls_vec = "";
-    std::string load_decls = "";
-    std::string eltwise_fused_ops_vec = "";
-    std::string eltwise_fused_ops = "";
-
-    auto make_jit_vector_type = [](std::string tensor_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-        if (vec_size > 1)
-            return "MAKE_VECTOR_TYPE(" + tensor_name + "_TYPE," + std::to_string(vec_size) + ")";
-        else
-            return tensor_name + "_TYPE";
-    };
-
-    auto make_jit_load = [](std::string tensor_name, std::string ptr_name, size_t vec_size) -> std::string {
-        if (vec_size == 0 || vec_size > 8)
-            throw std::invalid_argument("Invalid vector size in jit definitions");
-
-        std::string index_func_call_vec = tensor_name + "_GET_INDEX(b, f_block*16, y, x)";
-        std::string index_func_call = tensor_name + "_GET_INDEX(b, f_block*16, y, x+i)";
-        if (vec_size > 1)
-            return " UNIT_BLOCK_READ" + std::to_string(vec_size) + "(" + ptr_name + ", " + index_func_call_vec + ")";
-        else
-            return " UNIT_BLOCK_READ(" + ptr_name + ", " + index_func_call + ")";
-    };
-
-    for (auto& fused_dep : params.fused_ops) {
-        std::string op_type = "";
-        switch (fused_dep.type) {
-            case convolution_params::fused_operation_desc::Type::ELTWISE: {
-                op_type = "eltwise";
-                eltwise_fused_ops_vec += "dst = (dst + " + op_type + "_data);";
-                eltwise_fused_ops += "dst[i] = (dst[i] + " + op_type + "_data);";
-                break;
-            }
-            default:
-                throw std::invalid_argument("Invalid fused op in binary_convolution kernel: " + params.layerID);
-        }
-
-        for (size_t op_input_id = 0; op_input_id < fused_dep.tensors.size(); op_input_id++) {
-            std::string name = "FUSED_OP_" + std::to_string(op_id) + "_INPUT" + std::to_string(op_input_id);
-            std::string ptr_name = op_type + "_input" + std::to_string(op_input_id);
-
-            std::string var_name = op_type + "_data";
-            jit.AddConstant(MakeJitConstant(name, fused_dep.tensors[op_input_id]));
-            input_decls += "const __global " + toCLType(fused_dep.tensors[op_input_id].GetDType()) +
-                           "* " + ptr_name + ",";
-            load_decls_vec += make_jit_vector_type(name, kd.cldnnStyle.blockWidth) + " " + var_name + " = " +
-                              make_jit_load(name, ptr_name, kd.cldnnStyle.blockWidth) + ";";
-            load_decls += make_jit_vector_type(name, 1) + " " + var_name + " = " +
-                          make_jit_load(name, ptr_name, 1) + ";";
-        }
-
-        if (fused_dep.activation.function != ActivationFunction::NONE) {
-            std::string temp_op_type = op_type;
-            for (auto& ch : temp_op_type)
-                ch = static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
-            std::string suffix = "_" + temp_op_type;
-
-            jit.Merge(MakeActivationJitConstants(fused_dep.activation, suffix));
-            eltwise_fused_ops_vec += "dst = ACTIVATION"+suffix+"(dst, ACTIVATION_PARAMS"+suffix+");";
-            eltwise_fused_ops += "dst[i] = ACTIVATION"+suffix+"(dst[i], ACTIVATION_PARAMS"+suffix+");";
-        }
-        op_id++;
-    }
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA_VEC", load_decls_vec));
-    jit.AddConstant(MakeJitConstant("FUSED_OPS_LOAD_DATA", load_decls));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS_VEC", eltwise_fused_ops_vec));
-    jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops));
-
-    return jit;
-}
-
 KernelsData ConvolutionKernel_bfyx_to_bfyx_f16::GetKernelsDataForAutoTune(const Params& params,
                                                                           const optional_params& options) const {
     if (!Validate(params, options)) {
index b0d815e..76818ec 100644 (file)
@@ -44,7 +44,6 @@ protected:
     bool Validate(const Params& p, const optional_params& o) const override;
     DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
-    JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& kd) const override;
 
 private:
     struct AutoTuneOption {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_f16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_f16.cpp
new file mode 100644 (file)
index 0000000..efda858
--- /dev/null
@@ -0,0 +1,194 @@
+//
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "convolution_kernel_bfzyx_f16.h"
+#include "kernel_selector_utils.h"
+#include <algorithm>
+
+namespace kernel_selector {
+
+static const size_t sub_group_size = 16;
+static const size_t feature_block_size = 16;
+
+ParamsKey ConvolutionKernel_bfzyx_f16::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableInputWeightsType(WeightsType::F32);
+    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableSplitSupport();
+    k.EnableBatching();
+    k.EnableSubGroup();
+    k.EnableSubGroupShort();
+    return k;
+}
+
+ConvolutionKernelBase::DispatchData ConvolutionKernel_bfzyx_f16::SetDefault(const convolution_params& params,
+                                                                           int autoTuneIndex) const {
+    DispatchData kd = ConvolutionKernelBase::SetDefault(params, autoTuneIndex);
+
+    const auto& out = params.output;
+
+    auto x = out.X().v;
+    auto y = out.Y().v;
+    auto z = out.Z().v;
+    auto f = out.Feature().v;
+    auto b = out.Batch().v;
+
+    auto oh_block = 1;
+
+    auto div = 16;
+    while (div > 1) {
+        if (x % div == 0)
+            break;
+        div--;
+    }
+    auto ow_block = std::max(8, div);
+
+    auto ocb = 128;
+    while (ocb > 16) {
+        if (f % ocb == 0)
+            break;
+        else
+            ocb /= 2;
+    }
+
+    kd.cldnnStyle.blockWidth = ow_block;
+
+    kd.gws0 = ocb;
+    kd.gws1 = CeilDiv(y, oh_block) * CeilDiv(x, ow_block) * z;
+    kd.gws2 = b * (f / ocb);
+
+    kd.lws0 = sub_group_size;
+    kd.lws1 = 1;
+    kd.lws2 = 1;
+
+    if (b == 1)
+        kd.effiency = FORCE_PRIORITY_2;
+    else
+        kd.effiency = FORCE_PRIORITY_7;
+
+    return kd;
+}
+
+bool ConvolutionKernel_bfzyx_f16::Validate(const Params& p, const optional_params& o) const {
+    if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
+        return false;
+    }
+
+    const auto& params = static_cast<const convolution_params&>(p);
+
+    const auto& input = params.inputs[0];
+    const auto& output = params.output;
+
+    if (output.GetDType() != use_data_type)
+        return false;
+
+    if (output.Feature().v % feature_block_size != 0)
+        return false;
+
+    if (input.Feature().v % feature_block_size != 0)
+        return false;
+
+    // Check that padding before features doesn't miss-align the blocks
+    if (input.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
+        return false;
+    }
+
+    return true;
+}
+
+JitConstants ConvolutionKernel_bfzyx_f16::GetJitConstants(const convolution_params& params,
+                                                         const DispatchData& runInfo) const {
+    auto input = params.inputs[0];
+    auto output = params.output;
+    auto jit = Parent::GetJitConstants(params, runInfo);
+
+    jit.AddConstant(MakeJitConstant("VER_8OW16C", 1));
+    jit.AddConstant(MakeJitConstant("OC_BLOCK", 16));
+    jit.AddConstant(MakeJitConstant("NCHW", 1));
+    jit.AddConstant(MakeJitConstant("CASE_3D", 1));
+
+    jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0));
+    jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1));
+    jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2));
+
+    jit.AddConstant(MakeJitConstant("OCB", runInfo.gws0));
+
+    jit.AddConstant(MakeJitConstant("SUM_SCALE", 1));
+
+    auto blockWidth = runInfo.cldnnStyle.blockWidth;
+    // the conditional code below was replaced to fix security issue
+    // auto is_1stconv = false;
+    // auto mb_block =(is_1stconv && output.Batch().v % 16 == 0) ? 16 : 1;
+    // auto ic_block = (is_1stconv) ? 1 : 16;
+    auto mb_block = 1;
+    auto ic_block = 16;
+
+    jit.AddConstant(MakeJitConstant("MB_BLOCK", mb_block));
+    jit.AddConstant(MakeJitConstant("MB_LAST", (output.Batch().v / 16) * 16));
+    jit.AddConstant(MakeJitConstant("IC_BLOCK", ic_block));
+    jit.AddConstant(MakeJitConstant("OH_BLOCK", 1));
+    jit.AddConstant(MakeJitConstant("OW_BLOCK", blockWidth));
+    jit.AddConstant(MakeJitConstant("OW_LAST", (output.X().v / blockWidth) * blockWidth));
+    jit.AddConstant(MakeJitConstant("OWB", CeilDiv(output.X().v, blockWidth)));
+    jit.AddConstant(MakeJitConstant("OHB", CeilDiv(output.Y().v, 1)));
+    jit.AddConstant(MakeJitConstant("G", params.split));
+    jit.AddConstant(MakeJitConstant("DD", params.dilation.z - 1));
+    jit.AddConstant(MakeJitConstant("DH", params.dilation.y - 1));
+    jit.AddConstant(MakeJitConstant("DW", params.dilation.x - 1));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
+    jit.AddConstant(MakeJitConstant("FWD_DATA", 1));
+    jit.AddConstant(MakeJitConstant("IS_DW", "DEPTHWISE_SEPARABLE_OPT"));
+    jit.AddConstant(MakeJitConstant("WITH_BIAS", "BIAS_TERM"));
+
+    jit.AddConstant(MakeJitConstant("MB", "OUTPUT_BATCH_NUM"));
+    jit.AddConstant(MakeJitConstant("OC", "OUTPUT_FEATURE_NUM"));
+    jit.AddConstant(MakeJitConstant("OD", "OUTPUT_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("OH", "OUTPUT_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("OW", "OUTPUT_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("IC", "INPUT0_FEATURE_NUM"));
+    jit.AddConstant(MakeJitConstant("ID", "INPUT0_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("IH", "INPUT0_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("IW", "INPUT0_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("KD", "FILTER_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("KH", "FILTER_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("KW", "(FILTER_SIZE_X)"));
+    jit.AddConstant(MakeJitConstant("SD", "STRIDE_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("SH", "STRIDE_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("SW", "STRIDE_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("PD", "PADDING_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("PH", "PADDING_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("PW", "PADDING_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("PD_R", "PADDING_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("PH_R", "PADDING_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("PW_R", "PADDING_SIZE_X"));
+
+    return jit;
+}
+
+KernelsData ConvolutionKernel_bfzyx_f16::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetTunedKernelsDataByIndex(params, options);
+}
+}  // namespace kernel_selector
@@ -1,4 +1,5 @@
-// Copyright (c) 2016 Intel Corporation
+//
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
+//
 
 #pragma once
 
 #include "convolution_kernel_base.h"
 #include <vector>
 
-// Step 0:
-//
-// 1. choose a tutorial mode
-// 2. modify convolution_tutorial.cl as well
-
-#define ADVANCED_TUTORIAL  // simple runnable example with explanations
-
 namespace kernel_selector {
 
-class ConvolutionKernel_Tutorial : public ConvolutionKernelBase {
+class ConvolutionKernel_bfzyx_f16 : public ConvolutionKernelBase {
 public:
     using Parent = ConvolutionKernelBase;
-    ConvolutionKernel_Tutorial() : Parent("convolution_tutorial") {}
-    virtual ~ConvolutionKernel_Tutorial() {}
+
+    explicit ConvolutionKernel_bfzyx_f16(Datatype use_data_type) :
+        ConvolutionKernelBase(use_data_type == Datatype::F32 ? "gen9_common_conv_fwd_data_f32" : "gen9_common_conv_fwd_data_f16"),
+        use_data_type(use_data_type) {}
+
+    virtual ~ConvolutionKernel_bfzyx_f16() {}
 
     KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
     ParamsKey GetSupportedKey() const override;
@@ -39,17 +37,14 @@ public:
 protected:
     std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override {
         return {
-            WeightsLayout::oiyx,
-            WeightsLayout::yxio,
-            WeightsLayout::iyxo,
-            WeightsLayout::oyxi,
+            WeightsLayout::o_i_zyx_i16_o16,
         };
     }
-
-#ifdef ADVANCED_TUTORIAL
     bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
     DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
-#endif
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+
+    // This class is base one for FP16 and FP32 classes
+    Datatype use_data_type;
 };
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
@@ -1,4 +1,5 @@
-// Copyright (c) 2018 Intel Corporation
+//
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+//
 
 #pragma once
 
-#include "cldnn.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "convolution_kernel_bfzyx_f16.h"
 
-CLDNN_BEGIN_PRIMITIVE_DESC(pyramid_roi_align)
+namespace kernel_selector {
 
-CLDNN_END_PRIMITIVE_DESC(pyramid_roi_align)
+class ConvolutionKernel_bfzyx_f16_fp16 : public ConvolutionKernel_bfzyx_f16 {
+public:
+    using Parent = ConvolutionKernel_bfzyx_f16;
 
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(pyramid_roi_align);
+    ConvolutionKernel_bfzyx_f16_fp16() : ConvolutionKernel_bfzyx_f16(Datatype::F16) {}
 
-#ifdef __cplusplus
-}
-#endif
+    virtual ~ConvolutionKernel_bfzyx_f16_fp16() {}
+};
+}  // namespace kernel_selector
@@ -1,4 +1,4 @@
-/*
+//
 // Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-*/
+//
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "cldnn.h"
-/// @addtogroup c_api C API
-/// @{
-/// @addtogroup c_topology Network Topology
-/// @{
-/// @addtogroup c_primitives Primitives
-/// @{
-
-#ifdef __cplusplus
-extern "C" {
-#endif
 
-CLDNN_BEGIN_PRIMITIVE_DESC(quantize)
-/// @brief levels The number of quantization levels.
-int levels;
-CLDNN_END_PRIMITIVE_DESC(quantize)
+#include "convolution_kernel_bfzyx_f16.h"
 
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(quantize);
+namespace kernel_selector {
 
-#ifdef __cplusplus
-}
-#endif
+class ConvolutionKernel_bfzyx_f16_fp32 : public ConvolutionKernel_bfzyx_f16 {
+public:
+    using Parent = ConvolutionKernel_bfzyx_f16;
 
-/// @}
-/// @}
-/// @}
+    ConvolutionKernel_bfzyx_f16_fp32() : ConvolutionKernel_bfzyx_f16(Datatype::F32) {}
 
+    virtual ~ConvolutionKernel_bfzyx_f16_fp32() {}
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfzyx_ref.cpp
deleted file mode 100644 (file)
index f103809..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "convolution_kernel_bfzyx_ref.h"
-
-namespace kernel_selector {
-
-ParamsKey ConvolutionKernel_bfzyx_Ref::GetSupportedKey() const {
-    ParamsKey k;
-    k.EnableInputDataType(Datatype::F16);
-    k.EnableInputDataType(Datatype::F32);
-    k.EnableInputDataType(Datatype::INT8);
-    k.EnableOutputDataType(Datatype::F16);
-    k.EnableOutputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::INT8);
-    k.EnableInputWeightsType(WeightsType::F16);
-    k.EnableInputWeightsType(WeightsType::F32);
-    k.EnableInputWeightsType(WeightsType::INT8);
-    k.EnableInputLayout(DataLayout::bfzyx);
-    k.EnableOutputLayout(DataLayout::bfzyx);
-    k.EnableTensorOffset();
-    k.EnableTensorPitches();
-    k.EnableDilation();
-    k.EnableBiasPerFeature();
-    k.EnableBiasPerOutput();
-    k.EnableNonBiasTerm();
-    k.EnableBatching();
-    k.EnableSplitSupport();
-    k.EnableDepthwiseSeparableOpt();
-    k.EnableInt8Quantization();
-    k.EnableOutputCalibration();
-    k.DisableTuning();
-    k.EnableLocalConvolution();
-    k.EnableGroupedConvolution();
-    return k;
-}
-
-KernelsData ConvolutionKernel_bfzyx_Ref::GetKernelsData(const Params& params, const optional_params& options) const {
-    return GetTunedKernelsDataByIndex(params, options);
-}
-}  // namespace kernel_selector
\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.cpp
new file mode 100644 (file)
index 0000000..2f3b21e
--- /dev/null
@@ -0,0 +1,192 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "convolution_kernel_fs_byx_fsv32_depthwise.h"
+#include <vector>
+
+namespace kernel_selector {
+
+static constexpr size_t subGroupSize = 16;
+static constexpr size_t fsv = 32;
+static constexpr size_t fsvPerThread = fsv / subGroupSize;
+
+ConvolutionKernel_fs_byx_fsv32_depthwise::ConvolutionKernel_fs_byx_fsv32_depthwise()
+    : ConvolutionKernelBase("convolution_gpu_fs_byx_fsv32_depthwise") {
+    std::vector<size_t> blockWidths = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+    std::vector<std::string> executionModes = ConvolutionKernelBase::autoTuneOptions;
+
+    for (auto w : blockWidths) {
+        for (auto exeMode : executionModes) {
+            autoTuneOptions.emplace_back(AutoTuneOption{w, exeMode});
+        }
+    }
+}
+
+ParamsKey ConvolutionKernel_fs_byx_fsv32_depthwise::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableInputLayout(DataLayout::fs_b_yx_fsv32);
+    k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32);
+    k.EnableBiasPerFeature();
+    k.EnableBiasPerOutput();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableDilation();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableDepthwiseSeparableOpt();
+    k.EnableGroupedConvolution();
+    return k;
+}
+
+size_t ConvolutionKernel_fs_byx_fsv32_depthwise::getInputWidth(const convolution_params& arg, size_t blockWidth) const {
+    return (blockWidth - 1) * arg.stride.x + (arg.filterSize.x - 1) * arg.dilation.x + 1;
+}
+
+size_t ConvolutionKernel_fs_byx_fsv32_depthwise::getMinRegisterUsage(const convolution_params& arg, size_t blockWidth) const {
+    size_t weightsRegisters = 2;
+    size_t outputRegisters = blockWidth * 2;
+    size_t inputRegisters = getInputWidth(arg, blockWidth) * 2;
+
+    return weightsRegisters + outputRegisters + inputRegisters;
+}
+
+ConvolutionKernel_fs_byx_fsv32_depthwise::AutoTuneOption ConvolutionKernel_fs_byx_fsv32_depthwise::GetAutoTuneOptions(
+    const Params& arg,
+    int autoTuneIndex) const {
+    if (autoTuneIndex >= 0 && autoTuneIndex < static_cast<int>(autoTuneOptions.size()))
+        return autoTuneOptions[autoTuneIndex];
+
+    const convolution_params& cp = static_cast<const convolution_params&>(arg);
+
+    const size_t regThreshold = 64;
+
+    std::vector<size_t> nonOptBlockWidths = {3, 2, 1};  // This will most likely be memory bound
+    std::vector<size_t> optBlockWidths = {8, 7, 6, 5, 4};
+
+    // Check if output can be evenly divided into large blocks
+    for (auto w : optBlockWidths) {
+        if (cp.output.X().v % w == 0 && getMinRegisterUsage(cp, w) < regThreshold)
+            return {w, AGE_BASED};
+    }
+
+    // Try to find large blocks with smallest offset
+    size_t minLeftover = static_cast<size_t>(-1);
+    size_t foundWidth = 0;
+    for (auto w : optBlockWidths) {
+        if (getMinRegisterUsage(cp, w) < regThreshold && Pad(cp.output.X().v, w) < minLeftover) {
+            minLeftover = Pad(cp.output.X().v, w);
+            foundWidth = w;
+        }
+    }
+
+    if (foundWidth != 0)
+        return {foundWidth, AGE_BASED};
+
+    // Check small and memory bound block sizes
+    for (auto w : nonOptBlockWidths) {
+        if (cp.output.X().v % w == 0 && getMinRegisterUsage(cp, w) < regThreshold)
+            return {w, AGE_BASED};
+    }
+
+    // This means all previous block sizes consumed too much registers, fallback to block width = 1
+    return {1, AGE_BASED};
+}
+
+ConvolutionKernelBase::DispatchData ConvolutionKernel_fs_byx_fsv32_depthwise::SetDefault(const convolution_params& arg,
+                                                                               int autoTuneIndex) const {
+    DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+    AutoTuneOption option = GetAutoTuneOptions(arg, autoTuneIndex);
+
+    runInfo.effiency = FORCE_PRIORITY_3;
+
+    runInfo.cldnnStyle.blockHeight = 1;
+    runInfo.cldnnStyle.blockWidth = option.blockWidth;
+    runInfo.cldnnStyle.inputBlockWidth = getInputWidth(arg, option.blockWidth);
+
+    runInfo.lws0 = 1;
+    runInfo.lws1 = 1;
+    runInfo.lws2 = 16;
+
+    runInfo.gws0 = CeilDiv(arg.output.X().v, option.blockWidth);
+    runInfo.gws1 = arg.output.Y().v;
+    runInfo.gws2 = CeilDiv(arg.output.Feature().v, 32) * 16 * arg.output.Batch().v;
+
+    return runInfo;
+}
+
+bool ConvolutionKernel_fs_byx_fsv32_depthwise::Validate(const Params& p, const optional_params& o) const {
+    if (!ConvolutionKernelBase::Validate(p, o))
+        return false;
+
+    auto cp = static_cast<const convolution_params&>(p);
+    if (cp.groups < 16)
+        return false;
+
+    if (cp.inputs[0].Feature().v != cp.groups || cp.output.Feature().v != cp.groups)
+        return false;
+
+    // Output feature padding must be multiple of fsv to keep block alignment
+    if (cp.output.Feature().pad.before % fsv != 0)
+        return false;
+
+    return true;
+}
+
+JitConstants ConvolutionKernel_fs_byx_fsv32_depthwise::GetJitConstants(const convolution_params& params,
+                                                             const DispatchData& kd) const {
+    auto jit = ConvolutionKernelBase::GetJitConstants(params, kd);
+
+    jit.AddConstant(MakeJitConstant("INPUT_BLOCK_WIDTH", kd.cldnnStyle.inputBlockWidth));
+    jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth));
+    jit.AddConstant(MakeJitConstant("FSV", fsv));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subGroupSize));
+    jit.AddConstant(MakeJitConstant("FSV_PER_THREAD", fsvPerThread));
+
+    return jit;
+}
+
+KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetTunedKernelsDataByIndex(const Params& params,
+                                                                       const optional_params& options,
+                                                                       const int autoTuneIndex) const {
+    auto tuneOptions = GetAutoTuneOptions(params, autoTuneIndex);
+    return GetCommonKernelsData(params, options, tuneOptions.exeMode, autoTuneIndex);
+}
+
+KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetTunedKernelsDataByIndex(params, options);
+}
+
+KernelsData ConvolutionKernel_fs_byx_fsv32_depthwise::GetKernelsDataForAutoTune(const Params& params,
+                                                                      const optional_params& options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+
+    KernelsData res = {};
+
+    for (size_t i = 0; i < autoTuneOptions.size(); i++) {
+        KernelsData kd = GetTunedKernelsDataByIndex(params, options, static_cast<int>(i));
+        if (!kd.empty()) {
+            res.emplace_back(kd[0]);
+        }
+    }
+
+    return res;
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_fs_byx_fsv32_depthwise.h
new file mode 100644 (file)
index 0000000..29a4da7
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "convolution_kernel_base.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+
+class ConvolutionKernel_fs_byx_fsv32_depthwise : public ConvolutionKernelBase {
+public:
+    ConvolutionKernel_fs_byx_fsv32_depthwise();
+    virtual ~ConvolutionKernel_fs_byx_fsv32_depthwise() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+    KernelsData GetTunedKernelsDataByIndex(const Params& params,
+                                           const optional_params& options,
+                                           int autoTuneIndex = -1) const override;
+
+protected:
+    std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override {
+        return {WeightsLayout::os_iyx_osv32};
+    }
+
+    bool Validate(const Params& p, const optional_params& o) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+    bool NeedPaddedInput() const override { return true; }
+
+private:
+    struct AutoTuneOption {
+        size_t blockWidth;
+        std::string exeMode;
+    };
+
+    std::vector<AutoTuneOption> autoTuneOptions;
+    AutoTuneOption GetAutoTuneOptions(const Params& arg, int autoTuneIndex) const;
+    size_t getInputWidth(const convolution_params &arg, size_t blockWidth) const;
+    size_t getMinRegisterUsage(const convolution_params &arg, size_t blockWidth) const;
+};
+
+}  // namespace kernel_selector
index 42b49bd..0e96003 100644 (file)
@@ -102,7 +102,7 @@ JitConstants ConvolutionKernel_imad_3x3::GetJitConstants(const convolution_param
     auto mem_consts = Parent::GetJitConstants(params, kd);
 
     auto activation_constants =
-        MakeActivationJitConstants(params.activation, "_CONV");
+        MakeActivationJitConstants(params.activations, "_CONV");
     mem_consts.Merge(activation_constants);
 
     const auto& input = params.inputs[0];
index c739beb..0fa9def 100644 (file)
@@ -42,6 +42,10 @@ ParamsKey ConvolutionKernel_Ref::GetSupportedKey() const {
     k.EnableOutputLayout(DataLayout::byxf);
     k.EnableInputLayout(DataLayout::yxfb);
     k.EnableOutputLayout(DataLayout::yxfb);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableDilation();
@@ -71,7 +75,7 @@ JitConstants ConvolutionKernel_Ref::GetJitConstants(const convolution_params& pa
     // TODO: This gives both ACTIVATION and ACTIVATION_TYPED. Should we
     // factor that out into a virtual function to avoid creation of similar
     // yet distinct macros?
-    jit.Merge(MakeActivationJitConstants(params.activation, "_CONV_TYPED", true));
+    jit.Merge(MakeActivationJitConstants(params.activations, "_CONV_TYPED", true));
     // Needs to be done on host to get _MAX_VAL/_MIN_VAL/TO_TYPE macros
     // available (will be used in the activation).
     //
@@ -99,7 +103,8 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_Ref::SetDefault(const conv
     // Just set the correct value for a particular implementation here,
     // until the whole hierarchy is re-written.
     const auto& out = params.output;
-    std::vector<size_t> global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v};
+    std::vector<size_t> global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
+
     auto local = GetOptimalLocalWorkGroupSizes(global);
 
     kd.gws0 = global[0];
index f42a0eb..0aa118d 100644 (file)
@@ -37,6 +37,8 @@ protected:
             WeightsLayout::iyxo,
             WeightsLayout::oyxi,
             WeightsLayout::bf_lyx_yx,
+            WeightsLayout::oizyx,
+            WeightsLayout::o_i_zyx_i16_o16,
         };
     }
     JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
index a33f05b..4cf0917 100644 (file)
@@ -26,7 +26,6 @@
 #include "convolution_kernel_yxfb_yxio_b8.h"
 #include "convolution_kernel_yxfb_yxio_b1_block.h"
 #include "convolution_kernel_yxfb_yxio_b1_block_multiple_x.h"
-#include "convolution_kernel_tutorial.h"
 // #include "convolution_kernel_bfyx_3x3_dw_opt.h"
 #include "convolution_kernel_winograd_2x3_s1.h"
 #include "convolution_kernel_bfyx_1x1.h"
 #include "convolution_kernel_imad_3x3.h"
 #include "convolution_kernel_imad_1x1.h"
 #include "convolution_kernel_imad_7x7.h"
-#include "convolution_kernel_bfzyx_ref.h"
 #include "convolution_kernel_fs_byx_fsv32.h"
 #include "convolution_kernel_fs_byx_fsv32_1x1.h"
 #include "convolution_kernel_bfyx_to_fs_byx_fsv32.h"
+#include "convolution_kernel_fs_byx_fsv32_depthwise.h"
 #include "convolution_kernel_bfyx_f16_depthwise.h"
 #include "convolution_kernel_bfyx_f16_1x1.h"
 #include "convolution_kernel_bfyx_f16.h"
@@ -62,6 +61,8 @@
 #include "deformable_convolution_kernel_bfyx_ref.h"
 #include "deformable_convolution_kernel_bfyx_conv.h"
 #include "deformable_convolution_kernel_bfyx_interp.h"
+#include "convolution_kernel_bfzyx_f16_fp32.h"
+#include "convolution_kernel_bfzyx_f16_fp16.h"
 
 namespace kernel_selector {
 convolution_kernel_selector::convolution_kernel_selector() {
@@ -98,20 +99,20 @@ convolution_kernel_selector::convolution_kernel_selector() {
     Attach<ConvolutionKernel_mmad_batched_block>();
     Attach<ConvolutionKernel_mmad_batched_block_1x1>();
     //        Attach<ConvolutionKernel_mmad_32x32sg_slm_int8>();
-    // Attach<ConvolutionKernel_Tutorial>(); //In order to use this implementation for tutorial purposes please
-    // uncomment this line
     Attach<ConvolutionKernel_imad_3x3>();
     Attach<ConvolutionKernel_imad_1x1>();
     Attach<ConvolutionKernel_imad_7x7>();
-    Attach<ConvolutionKernel_bfzyx_Ref>();
     Attach<ConvolutionKernel_fs_byx_fsv32>();
     Attach<ConvolutionKernel_fs_byx_fsv32_1x1>();
+    Attach<ConvolutionKernel_fs_byx_fsv32_depthwise>();
     Attach<ConvolutionKernel_bfyx_to_fs_byx_fsv32>();
     Attach<ConvolutionKernel_bfyx_f16_depthwise>();
     Attach<ConvolutionKernel_bfyx_f16_1x1>();
     Attach<ConvolutionKernel_bfyx_f16>();
     Attach<ConvolutionKernel_bfyx_to_bfyx_f16>();
     Attach<DeformableConvolutionKernel_bfyx_Ref>();
+    Attach<ConvolutionKernel_bfzyx_f16_fp32>();
+    Attach<ConvolutionKernel_bfzyx_f16_fp16>();
 }
 
 KernelsData convolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.cpp
deleted file mode 100644 (file)
index 8f4fd76..0000000
+++ /dev/null
@@ -1,184 +0,0 @@
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "convolution_kernel_tutorial.h"
-
-namespace kernel_selector {
-
-// Step 0:
-//
-// take a look on convolution_kernel_tutorial.h
-
-ParamsKey ConvolutionKernel_Tutorial::GetSupportedKey() const {
-    // Step 1:
-    // - Update the features supported by the kernel below
-
-    ParamsKey k;
-
-    // Supported data type
-    k.EnableInputDataType(Datatype::F16);
-    k.EnableInputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::F16);
-    k.EnableOutputDataType(Datatype::F32);
-    k.EnableInputWeightsType(WeightsType::F16);
-    k.EnableInputWeightsType(WeightsType::F32);
-
-    // Supported layout
-    k.EnableInputLayout(DataLayout::bfyx);
-    k.EnableOutputLayout(DataLayout::bfyx);
-    k.EnableInputLayout(DataLayout::yxfb);
-    k.EnableOutputLayout(DataLayout::yxfb);
-
-    // Supported tensor offset/pitch/padding
-    k.EnableTensorOffset();
-    k.EnableTensorPitches();
-    k.EnableBatching();
-
-    // Supported convolution extra data
-    k.EnableDilation();
-    k.EnableBiasPerFeature();
-    k.EnableBiasPerOutput();
-    k.EnableNonBiasTerm();
-
-    // Supported convolution which get a split index and uses it as a view on the input/output
-    k.EnableSplitSupport();
-
-    // Supported convoltuion with depth separable optimization flag
-    k.EnableDepthwiseSeparableOpt();
-
-    return k;
-}
-
-#ifdef BASIC_TUTORIAL
-
-KernelsData ConvolutionKernel_Tutorial::GetKernelsData(const Params& /*params*/,
-                                                       const optional_params& /*options*/) const {
-    return {};
-
-    // Step 2:
-    // - Uncomment and update the following lines
-
-    // assert(params.GetType() == KernelType::CONVOLUTION && options.GetType() == KernelType::CONVOLUTION);
-    //
-    // const uint32_t numOfkernels = 1;
-    // KernelData kd = KernelData::Default<ConvolutionParams>(params, numOfkernels);
-    // ConvolutionParams& newParams = *static_cast<ConvolutionParams*>(kd.params.get());
-    // const ConvolutionOptionalParams& optParams = static_cast<const ConvolutionOptionalParams&>(options);
-    // auto& kernel = kd.kernels[0];
-
-    // Step 3:
-    // - make sure that the input weights tensor fit to this kernel needs.
-    //   in case it's not and the flag "optParams.allowWeightsReorder" set to "true", please update
-    //   the member "kd.weightsReorderParams" with the right OpenCL/CPU kernel which will be used to reorder the
-    //   weights in the loading time.
-    //   you have three options:
-    //   - provide a cpu code - inherit from "CPUKernel" and implement "Execute" function.
-    //      (by default the input layout of CPU kernel is simple bfyx, and clDNN will reorder it for you before calling
-    //      to Execute function)
-    //   - provide a GPU code by filling clKernelData.
-    //   - use existing layouts which clDNN support and use the auxiliary function "UpdateWeightsParams"
-
-    // Step 4:
-    // - make sure that the input tensor fits to this kernel's needs.
-    //   make sure that you have the proper padding area with a proper padding value, and a proper alignment.
-    //   currently Convolution in clDNN doesn't allow the kernel to ask reordering
-
-    // Step 5:
-    // - fill "kernel.kernelString"
-    //   - fill "kernel.kernelString->str"                  - the source of the kernel.
-    //     please use "db.get(kernelName)" in case you use "*.cl" file which located under
-    //     "kernel_selector\core\cl_kernels\".
-    //   - fill "kernel.kernelString->jit"                  - Dynamic jit of this params.
-    //   - fill "kernel.kernelString->options"              - options which pass to cl program build functions (like
-    //   "-cl-no-subgroup-ifp")
-    //   - fill "kernel.kernelString->entry_point"          - kernel entry point
-    //   - fill "kernel.kernelString->batch_compilation"    - A flag that allow clDNN kernel to compile this kernel as a
-    //   part of a program
-    //                                                        NOTE: this can only be used if you prevent symbol
-    //                                                        conflicts with other kernels (#undef is done automatically
-    //                                                        by clDNN)
-
-    // Step 6:
-    // - fill "kernel.WorkGroupSizes" - local/global work group sizes for OpenCL kernel
-
-    // Step 7:
-    // - fill "kernel.arguments" - which describe the argument of the kernel.
-    //   in this tutorial you can use:
-    //     kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); // "0" mean index of the input in case
-    //     of multiple inputs. kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
-    //     kernel.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 });
-    //     kernel.arguments.push_back({ ArgumentDescriptor::Types::BIAS, 0 });
-    //
-    //   in case that you have more than one kernel, you probably need an intermediate buffers.
-    //   in order to support that you have to describe the buffer size in kd.internalBufferSizes and add a kernel
-    //   argument like:
-    //     kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, <index to kd.internalBufferSize> });
-
-    // Step 8:
-    // - estimate the kernel's execution time. currently it's under development so please use FORCE_PRIORITY_<X> - lower
-    // is better.
-
-    // return{ kd };
-}
-
-#else
-
-ConvolutionKernel_Tutorial::Parent::DispatchData ConvolutionKernel_Tutorial::SetDefault(
-    const convolution_params& params,
-    int autoTuneIndex) const {
-    DispatchData runInfo = Parent::SetDefault(params, autoTuneIndex);
-
-    // Step 2:
-    //
-    // Init runInfo, and set kernel efficiency
-    runInfo.effiency = TUTORIAL_PRIORITY;
-
-    return runInfo;
-}
-
-bool ConvolutionKernel_Tutorial::Validate(const Params& p, const optional_params& o) const {
-    if (!Parent::Validate(p, o)) {
-        return false;
-    }
-
-    // Step 3:
-    //
-    // Validate this kernel support params and optional params. use:
-    // const ConvolutionParams& params = static_cast<const ConvolutionParams&>(p);
-    // const ConvolutionOptionalParams& options = static_cast<const ConvolutionOptionalParams&>(o);
-
-    return true;
-}
-
-JitConstants ConvolutionKernel_Tutorial::GetJitConstants(const convolution_params& params,
-                                                         const DispatchData& kd) const {
-    auto jit = Parent::GetJitConstants(params, kd);
-    jit.AddConstant(MakeJitConstant("ADVANCED_TUTORIAL", ""));
-
-    // Step 4:
-    //
-    // Add you own jit constants. for example
-    // jit.AddConstant(MakeJitConstant("<MY_CONST>", <my val>));
-    // - "my val" can be most of KernelSelector/C++ common types
-
-    return jit;
-}
-
-KernelsData ConvolutionKernel_Tutorial::GetKernelsData(const Params& params, const optional_params& options) const {
-    return GetTunedKernelsDataByIndex(params, options);
-}
-
-#endif
-}  // namespace kernel_selector
\ No newline at end of file
index 3b656d4..1578f1e 100644 (file)
@@ -27,21 +27,6 @@ namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct convolution_params : public weight_bias_params {
     convolution_params() : weight_bias_params(KernelType::CONVOLUTION) {}
-
-    struct fused_operation_desc {
-        enum class Type : uint8_t {
-            ELTWISE = 0,
-            UNDEFINED
-        };
-
-        Type type;
-        size_t dep_idx_start;
-        size_t dep_size;
-        MultiDataTensor tensors;
-        kernel_selector::base_activation_params activation;
-    };
-
-
     uSize filterSize;
     uSize stride;
     uSize dilation;
@@ -61,7 +46,6 @@ struct convolution_params : public weight_bias_params {
 
     MultiDataTensor weights_quantization_factors;
     MultiDataTensor output_calibration_factors;
-    std::vector<fused_operation_desc> fused_ops = {};
     std::string to_string() const override;
     ParamsKey GetParamsKey() const override;
 };
index a95e1e4..38181ed 100644 (file)
@@ -69,7 +69,7 @@ DeformableConvolutionKernel_bfyx_conv::DispatchData DeformableConvolutionKernel_
 }
 
 JitConstants DeformableConvolutionKernel_bfyx_conv::GetJitConstants(const convolution_params& params,
-                                                                    const DispatchData& kd) const {
+                                                                    const DispatchData& /*kd*/) const {
     JitConstants jit = WeightBiasKernelBase::GetJitConstants(params);
     jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", 16));
     jit.AddConstant(MakeJitConstant("INPUT_CHANNELS", params.inputs[0].Feature().v / params.weights.X().v / params.weights.Y().v));
index 3029f6a..6af8523 100644 (file)
@@ -39,6 +39,29 @@ std::string deconvolution_params::to_string() const {
     return s.str();
 }
 
+bool DeconvolutionKernelBase::Validate(const Params& p, const optional_params& o) const {
+    if (p.GetType() != KernelType::DECONVOLUTION || o.GetType() != KernelType::DECONVOLUTION) {
+        return false;
+    }
+
+    const deconvolution_params& params = static_cast<const deconvolution_params&>(p);
+    const deconvolution_optional_params& optParams = static_cast<const deconvolution_optional_params&>(o);
+
+    bool bSupportedWeightsLayout = false;
+
+    for (WeightsLayout l : GetSupportedWeightLayouts(params)) {
+        bSupportedWeightsLayout |= params.weights.GetLayout() == l;
+    }
+
+    const bool bWeightsOK = bSupportedWeightsLayout || optParams.allowStaticInputReordering;
+
+    if (!bWeightsOK) {
+        return false;
+    }
+
+    return true;
+}
+
 JitConstants DeconvolutionKernelBase::GetJitConstants(const deconvolution_params& dp) const {
     JitConstants jit = WeightBiasKernelBase::GetJitConstants(dp);
     const auto& padding = dp.padding;
@@ -86,21 +109,16 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernelBase::SetDefault(const
 KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const optional_params& options) const {
     assert(params.GetType() == KernelType::DECONVOLUTION);
 
-    const deconvolution_params& orgParams = static_cast<const deconvolution_params&>(params);
-
-    const std::vector<WeightsLayout> weightsLayouts = {
-        WeightsLayout::oiyx,
-        WeightsLayout::iyxo,
-        WeightsLayout::yxio,
-        WeightsLayout::oyxi,
-        WeightsLayout::oizyx,
-    };
+    if (!Validate(params, options)) {
+        return{};
+    }
 
+    const deconvolution_params& orgParams = static_cast<const deconvolution_params&>(params);
     DispatchData runInfo = SetDefault(orgParams);
     KernelData kd = KernelData::Default<deconvolution_params>(params);
     deconvolution_params& newParams = *static_cast<deconvolution_params*>(kd.params.get());
 
-    bool succeed = UpdateWeightsParams(newParams, options, weightsLayouts, kd.weightsReorderParams);
+    bool succeed = UpdateWeightsParams(newParams, options, GetSupportedWeightLayouts(newParams), kd.weightsReorderParams);
 
     if (!succeed) {
         return {};
index 222a15a..b25878a 100644 (file)
@@ -18,6 +18,7 @@
 #include "weight_bias_kernel_base.h"
 #include "kernel_selector_params.h"
 #include <string>
+#include <vector>
 
 namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -81,5 +82,15 @@ protected:
     virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const;
     virtual JitConstants GetJitConstants(const deconvolution_params& params) const;
     virtual DispatchData SetDefault(const deconvolution_params& params) const;
+    virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const deconvolution_params&) const {
+        return {
+            WeightsLayout::oiyx,
+            WeightsLayout::iyxo,
+            WeightsLayout::yxio,
+            WeightsLayout::oyxi,
+            WeightsLayout::oizyx
+        };
+    }
+    bool Validate(const Params& p, const optional_params& o) const override;
 };
 }  // namespace kernel_selector
\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfzyx_f16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfzyx_f16.cpp
new file mode 100644 (file)
index 0000000..087c94f
--- /dev/null
@@ -0,0 +1,152 @@
+//
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "deconvolution_kernel_bfzyx_f16.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+static const size_t sub_group_size = 16;
+static const size_t feature_block_size = 16;
+
+ParamsKey DeconvolutionKernel_bfzyx_f16::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputWeightsType(WeightsType::F32);
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableSubGroup();
+    k.EnableSubGroupShort();
+    return k;
+}
+
+DeconvolutionKernelBase::DispatchData DeconvolutionKernel_bfzyx_f16::SetDefault(const deconvolution_params& params) const {
+    DispatchData kd = DeconvolutionKernelBase::SetDefault(params);
+
+    const auto& out = params.output;
+
+    auto x = out.X().v;
+    auto y = out.Y().v;
+    auto z = out.Z().v;
+    auto f = out.Feature().v;
+    auto b = out.Batch().v;
+
+    kd.gws0 = f;
+    kd.gws1 = x * y * z;
+    kd.gws2 = CeilDiv(b, 16);
+
+    kd.lws0 = sub_group_size;
+    kd.lws1 = 1;
+    kd.lws2 = 1;
+
+    if (b == 1)
+        kd.effiency = FORCE_PRIORITY_2;
+    else
+        kd.effiency = FORCE_PRIORITY_7;
+
+    return kd;
+}
+
+bool DeconvolutionKernel_bfzyx_f16::Validate(const Params& p, const optional_params& o) const {
+    if (!DeconvolutionKernelBase::Validate(p, o)) {
+        return false;
+    }
+
+    const auto& params = static_cast<const deconvolution_params&>(p);
+
+    const auto& input = params.inputs[0];
+    const auto& output = params.output;
+
+    if (output.Feature().v % feature_block_size != 0)
+        return false;
+
+    if (input.Feature().v % feature_block_size != 0)
+        return false;
+
+    // Check that padding before features doesn't miss-align the blocks
+    if (input.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
+        return false;
+    }
+
+    return true;
+}
+
+JitConstants DeconvolutionKernel_bfzyx_f16::GetJitConstants(const deconvolution_params& params) const {
+    auto input = params.inputs[0];
+    auto output = params.output;
+    auto jit = Parent::GetJitConstants(params);
+
+    jit.AddConstant(MakeJitConstant("VER_8OW16C", 1));
+    jit.AddConstant(MakeJitConstant("OC_BLOCK", 16));
+    jit.AddConstant(MakeJitConstant("NCHW", 1));
+    jit.AddConstant(MakeJitConstant("CASE_3D", 1));
+
+    if (output.GetDType() == Datatype::F32)
+        jit.AddConstant(MakeJitConstant("DT_F32", 1));
+    else
+        jit.AddConstant(MakeJitConstant("DT_F16", 1));
+
+    // the conditional code below was replaced to fix security issue
+    // auto is_1stconv = false;
+    // auto mb_block =(is_1stconv && output.Batch().v % 16 == 0) ? 16 : 1;
+    // auto ic_block = (is_1stconv) ? 1 : 16;
+    auto mb_block = 1;
+    auto ic_block = 16;
+
+    jit.AddConstant(MakeJitConstant("MB_BLOCK", mb_block));
+    jit.AddConstant(MakeJitConstant("MB_LAST", (output.Batch().v / 16) * 16));
+    jit.AddConstant(MakeJitConstant("IC_BLOCK", ic_block));
+    jit.AddConstant(MakeJitConstant("G", params.split));
+    jit.AddConstant(MakeJitConstant("DD", params.dilation.z - 1));
+    jit.AddConstant(MakeJitConstant("DH", params.dilation.y - 1));
+    jit.AddConstant(MakeJitConstant("DW", params.dilation.x - 1));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
+    jit.AddConstant(MakeJitConstant("IS_DW", "DEPTHWISE_SEPARABLE_OPT"));
+    jit.AddConstant(MakeJitConstant("BWD_DATA", 1));
+    jit.AddConstant(MakeJitConstant("WITH_BIAS", "BIAS_TERM"));
+
+    jit.AddConstant(MakeJitConstant("MB", "OUTPUT_BATCH_NUM"));
+    jit.AddConstant(MakeJitConstant("OC", "INPUT0_FEATURE_NUM"));
+    jit.AddConstant(MakeJitConstant("OD", "INPUT0_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("OH", "INPUT0_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("OW", "INPUT0_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("IC", "OUTPUT_FEATURE_NUM"));
+    jit.AddConstant(MakeJitConstant("ID", "OUTPUT_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("IH", "OUTPUT_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("IW", "OUTPUT_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("KD", "FILTER_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("KH", "FILTER_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("KW", "FILTER_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("SD", "STRIDE_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("SH", "STRIDE_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("SW", "STRIDE_SIZE_X"));
+    jit.AddConstant(MakeJitConstant("PD", "PADDING_SIZE_Z"));
+    jit.AddConstant(MakeJitConstant("PH", "PADDING_SIZE_Y"));
+    jit.AddConstant(MakeJitConstant("PW", "PADDING_SIZE_X"));
+
+    return jit;
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfzyx_f16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfzyx_f16.h
new file mode 100644 (file)
index 0000000..1339c77
--- /dev/null
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#pragma once
+
+#include "deconvolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class DeconvolutionKernel_bfzyx_f16 : public DeconvolutionKernelBase {
+public:
+    using Parent = DeconvolutionKernelBase;
+
+    DeconvolutionKernel_bfzyx_f16() : DeconvolutionKernelBase("gen9_common_conv_bwd_data") {}
+    virtual ~DeconvolutionKernel_bfzyx_f16() {}
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    std::vector<WeightsLayout> GetSupportedWeightLayouts(const deconvolution_params&) const override {
+        return {
+            WeightsLayout::i_o_zyx_o16_i16,
+        };
+    }
+    bool Validate(const Params& p, const optional_params& o) const override;
+    CommonDispatchData SetDefault(const deconvolution_params& arg) const override;
+    JitConstants GetJitConstants(const deconvolution_params& params) const override;
+};
+}  // namespace kernel_selector
index fa55e54..2d49f59 100644 (file)
@@ -28,10 +28,12 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableInputLayout(DataLayout::byxf);
     k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
     k.EnableOutputLayout(DataLayout::yxfb);
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::byxf);
     k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBiasPerFeature();
index c7bdf38..3afbca5 100644 (file)
 #include "deconvolution_kernel_selector.h"
 #include "deconvolution_kernel_ref.h"
 #include "deconvolution_kernel_bfyx_opt.h"
+#include "deconvolution_kernel_bfzyx_f16.h"
 
 namespace kernel_selector {
 deconvolution_kernel_selector::deconvolution_kernel_selector() {
     Attach<DeconvolutionKernelRef>();
     Attach<DeconvolutionKernel_bfyx_opt>();
+    Attach<DeconvolutionKernel_bfzyx_f16>();
 }
 
 KernelsData deconvolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
index 6de2eb1..50f314a 100644 (file)
@@ -23,7 +23,7 @@ namespace kernel_selector {
 // depth_to_space_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct depth_to_space_params : public base_params {
-    depth_to_space_params() : base_params(KernelType::DEPTH_TO_SPACE) {}
+    depth_to_space_params() : base_params(KernelType::DEPTH_TO_SPACE), block_size(0) {}
 
     size_t block_size;
 
index ce172f7..c4a9cd7 100644 (file)
@@ -112,14 +112,14 @@ bool EltwiseKernelBase::Validate(const Params& p, const optional_params& o) cons
 JitConstants EltwiseKernelBase::GetJitConstantsCommon(const eltwise_params& params, bool useVload8) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
 
-    auto GetIdxOrderForLayout = [&](DataLayout l, bool layoutBased, uSize stride) -> std::string {
+    auto GetIdxOrderVecForLayout = [&](DataLayout l, bool layoutBased, uSize stride) -> std::vector<std::string> {
         // TODO: Generalize this method
         std::vector<std::string> bfyx_idx_order = {};
         if (layoutBased) {
             bfyx_idx_order = { "d4", "d3", "d2", "d1" };
         } else {
             if (l == DataLayout::yxfb) {
-                bfyx_idx_order = { "d1", "d2", "d3", "d4" };
+                bfyx_idx_order = { "d1", "d2", "d4", "d3" };
             } else if (l == DataLayout::fyxb) {
                 bfyx_idx_order = { "d1", "d4", "d3", "d2" };
             } else {
@@ -132,6 +132,12 @@ JitConstants EltwiseKernelBase::GetJitConstantsCommon(const eltwise_params& para
             bfyx_idx_order[3] = "(" + bfyx_idx_order[3] + "*" + std::to_string(stride.x) + ")";
         }
 
+        return bfyx_idx_order;
+    };
+
+    auto GetIdxOrderStringForLayout = [&](DataLayout l, bool layoutBased, uSize stride) -> std::string {
+        std::vector<std::string> bfyx_idx_order = GetIdxOrderVecForLayout(l, layoutBased, stride);
+
         return bfyx_idx_order[0] + "," +
                bfyx_idx_order[1] + "," +
                bfyx_idx_order[2] + "," +
@@ -168,9 +174,9 @@ JitConstants EltwiseKernelBase::GetJitConstantsCommon(const eltwise_params& para
         } else {
             size_t out_c = DataTensor::ChannelsCount(params.output.GetLayout());
             if (out_c <= 4) {
-                jit.AddConstant(MakeJitConstant(out_idx_order, GetIdxOrderForLayout(params.output.GetLayout(),
-                                                                                    params.layoutBased || params.broadcast,
-                                                                                    out_stride)));
+                jit.AddConstant(MakeJitConstant(out_idx_order, GetIdxOrderStringForLayout(params.output.GetLayout(),
+                                                                                          params.layoutBased || params.broadcast,
+                                                                                          out_stride)));
             } else if (out_c == 5) {
                 jit.AddConstant(MakeJitConstant(out_idx_order, "d5,d4,d3,d2,d1"));
             } else {
@@ -219,9 +225,9 @@ JitConstants EltwiseKernelBase::GetJitConstantsCommon(const eltwise_params& para
                 size_t out_c = DataTensor::ChannelsCount(params.output.GetLayout());
                 auto in_stride = params.stride.empty() ? out_stride : params.stride[i];
                 if (out_c <= 4 && in_c <= 4) {
-                    jit.AddConstant(MakeJitConstant(idx_order, GetIdxOrderForLayout(params.inputs[i].GetLayout(),
-                                                                                    params.layoutBased || params.broadcast,
-                                                                                    in_stride)));
+                    jit.AddConstant(MakeJitConstant(idx_order, GetIdxOrderStringForLayout(params.inputs[i].GetLayout(),
+                                                                                          params.layoutBased || params.broadcast,
+                                                                                          in_stride)));
                 } else if (out_c == 5) {
                     if (in_c < 5) {
                         // Skip Z coord for 4d tensors
@@ -474,7 +480,7 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
         }
 
         size_t n_dims;
-        if (out.GetLayout() == DataLayout::bfzyx)
+        if ((out.GetLayout() == DataLayout::bfzyx)  || (out.GetLayout() == DataLayout::bfzyx_f16))
             n_dims = 5;
         else
             n_dims = 4;
index e7f1e1e..ba09bf7 100644 (file)
@@ -23,6 +23,7 @@ ParamsKey EltwiseKernelRef::GetSupportedKey() const {
     k.EnableInputDataType(Datatype::F16);
     k.EnableInputDataType(Datatype::F32);
     k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
     k.EnableInputDataType(Datatype::INT32);
     k.EnableInputDataType(Datatype::INT64);
     k.EnableOutputDataType(Datatype::F16);
index 08b0f3d..889b748 100644 (file)
 
 
 #include "fully_connected_block_kernel_base.h"
+#include <algorithm>
 
 namespace kernel_selector {
+
+    size_t FullyConnectedBlockKernelBase::GetBatchesPerWorkItem(const fully_connected_params& params) const {
+        auto batchSize = params.output.Batch().v;
+        return std::min(batchSize, static_cast<size_t>(32U));
+    }
+
+    size_t FullyConnectedBlockKernelBase::GetLocalGroupsSize(const fully_connected_params& params) const {
+        auto batchSize = params.output.Batch().v;
+        return std::max(static_cast<size_t>(1U), batchSize / GetBatchesPerWorkItem(params));
+    }
+
 JitConstants FullyConnectedBlockKernelBase::GetJitConstants(
     const fully_connected_params& params,
     const FullyConnectedBlockKernelBase::DispatchData& data) const {
@@ -34,4 +46,4 @@ JitConstants FullyConnectedBlockKernelBase::GetJitConstants(
     return cldnnJit;
 }
 
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
index 48f3723..33097aa 100644 (file)
@@ -15,7 +15,7 @@
 #pragma once
 
 #include "fully_connected_kernel_base.h"
-#include <algorithm>
+
 
 namespace kernel_selector {
 class FullyConnectedBlockKernelBase : public FullyConnectedKernelBase {
@@ -27,15 +27,9 @@ protected:
     JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
 
     // how many batches will a single work item compute
-    static size_t GetBatchesPerWorkItem(const fully_connected_params& params) {
-        auto batchSize = params.output.Batch().v;
-        return std::min(batchSize, static_cast<size_t>(32U));
-    }
+    virtual size_t GetBatchesPerWorkItem(const fully_connected_params& params) const;
 
-    static size_t GetLocalGroupsSize(const fully_connected_params& params) {
-        auto batchSize = params.output.Batch().v;
-        return std::max(static_cast<size_t>(1U), batchSize / GetBatchesPerWorkItem(params));
-    }
+    size_t GetLocalGroupsSize(const fully_connected_params& params) const;
 
     // how many neurons for a single batch will a single work item produce
     static size_t GetNeuronsPerWorkItem(const fully_connected_params& params) {
@@ -47,4 +41,4 @@ protected:
             return 1;
     }
 };
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
index b3eed55..6f8108e 100644 (file)
@@ -30,18 +30,18 @@ public:
     virtual ~FullyConnectedKernelBase() {}
 
     struct DispatchData : public CommonDispatchData {
-        uint32_t unit_byte_size;
+        uint32_t unit_byte_size = 0;
         const char* chunk_type;
-        uint32_t chunk_byte_size;
-        uint32_t units_per_chunk;
-        uint32_t bytes_per_sg_read;
-        uint32_t units_per_sg_read;
-        uint32_t responses_per_sg_exec;
-        uint32_t in_chunk_prefetch_size;
-        uint32_t filter_chunk_prefetch_size;
+        uint32_t chunk_byte_size = 0;
+        uint32_t units_per_chunk = 0;
+        uint32_t bytes_per_sg_read = 0;
+        uint32_t units_per_sg_read = 0;
+        uint32_t responses_per_sg_exec = 0;
+        uint32_t in_chunk_prefetch_size = 0;
+        uint32_t filter_chunk_prefetch_size = 0;
 
-        uint32_t last_rg_size;
-        uint32_t rg_count;
+        uint32_t last_rg_size = 0;
+        uint32_t rg_count = 0;
     };
 
     std::string GetAutoTuneOptions(int autoTuneIndex) const;
index db61e0b..dc3aa69 100644 (file)
@@ -14,6 +14,7 @@
 
 
 #include "fully_connected_kernel_fb_io_b8_f8.h"
+#include <algorithm>
 
 namespace kernel_selector {
 ParamsKey FullyConnected_fb_io_b8_f8::GetSupportedKey() const {
@@ -33,6 +34,18 @@ ParamsKey FullyConnected_fb_io_b8_f8::GetSupportedKey() const {
     return k;
 }
 
+size_t FullyConnected_fb_io_b8_f8::GetBatchesPerWorkItem(const fully_connected_params& params) const {
+    auto batch_size = params.output.Batch().v;
+
+    if (batch_size % 32 == 0)
+        return std::min(batch_size, static_cast<size_t>(32U));
+
+    if (batch_size % 16 == 0)
+        return std::min(batch_size, static_cast<size_t>(16U));
+
+    return std::min(batch_size, static_cast<size_t>(8U));
+}
+
 FullyConnected_fb_io_b8_f8::DispatchData FullyConnected_fb_io_b8_f8::SetDefault(const fully_connected_params& arg,
                                                                                 int) const {
     auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
index 4970965..70428ef 100644 (file)
@@ -22,12 +22,12 @@ namespace kernel_selector {
 class FullyConnected_fb_io_b8_f8 : public FullyConnectedBlockKernelBase {
 public:
     FullyConnected_fb_io_b8_f8() : FullyConnectedBlockKernelBase("fully_connected_gpu_fb_io_b8_f8_vload") {}
-
     KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
     ParamsKey GetSupportedKey() const override;
 
 protected:
     bool Validate(const Params& p, const optional_params& o) const override;
     DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
+    size_t GetBatchesPerWorkItem(const fully_connected_params& params) const override;
 };
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.cpp
deleted file mode 100644 (file)
index 5a17e05..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "fully_connected_kernel_image_tutorial.h"
-#include "kernel_selector_utils.h"
-#include <vector>
-
-namespace kernel_selector {
-ParamsKey FullyConnected_image_tutorial::GetSupportedKey() const {
-    ParamsKey k;
-    k.EnableInputDataType(Datatype::F16);
-    k.EnableInputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::F16);
-    k.EnableOutputDataType(Datatype::F32);
-    k.EnableInputWeightsType(WeightsType::F16);
-    k.EnableInputWeightsType(WeightsType::F32);
-    k.EnableAllInputLayout();
-    k.EnableInputLayout(DataLayout::bf);
-    k.EnableOutputLayout(DataLayout::bf);
-    k.EnableBiasPerOutput();
-    k.EnableBiasPerFeature();
-    k.EnableNonBiasTerm();
-    k.EnableTensorOffset();
-    k.EnableTensorPitches();
-    k.EnableBatching();
-    return k;
-}
-
-FullyConnected_image_tutorial::DispatchData FullyConnected_image_tutorial::SetDefault(
-    const fully_connected_params& params,
-    int) const {
-    auto runInfo = Parent::SetDefault(params);
-
-    std::vector<size_t> global = {params.output.Feature().v, params.output.Batch().v};
-    std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = 1;
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = 1;
-
-    runInfo.effiency = TUTORIAL_PRIORITY;
-
-    return runInfo;
-}
-
-KernelsData FullyConnected_image_tutorial::GetKernelsData(const Params& params, const optional_params& options) const {
-    KernelsData res = {};
-    for (size_t i = 0; i < autoTuneOptions.size(); i++) {
-        KernelsData kd = GetTunedKernelsDataByIndex(params,
-                                                    options,
-                                                    DataLayout::bfyx,
-                                                    {WeightsLayout::image_2d_weights_c4_fyx_b},
-                                                    DONT_USE_IF_HAVE_SOMETHING_ELSE,
-                                                    static_cast<int>(i));
-        if (!kd.empty()) {
-            res.emplace_back(kd[0]);
-        }
-    }
-    return res;
-}
-}  // namespace kernel_selector
index 77ae7eb..c4cf6a4 100644 (file)
@@ -27,7 +27,6 @@
 #include "fully_connected_kernel_fb_io_b8_f8.h"
 #include "fully_connected_kernel_fb_io_block.h"
 #include "fully_connected_kernel_bf_io_input_spatial.h"
-#include "fully_connected_kernel_image_tutorial.h"
 #include "fully_connected_kernel_mmad.h"
 #include "fully_connected_kernel_mmad_batched.h"
 #include "fully_connected_kernel_imad.h"
index 3a3c8f6..afc5a91 100644 (file)
@@ -33,10 +33,10 @@ std::string fused_conv_eltwise_params::to_string() const {
         s << "bias_" << bias[0].PhysicalSize() << "_";
     }
 
-    s << conv.filterSize.x << "_" << conv.filterSize.y << "_";
-    s << conv.stride.x << "_" << conv.stride.y << "_";
-    s << conv.dilation.x << "_" << conv.dilation.y << "_";
-    s << conv.padding.x << "_" << conv.padding.y << "_";
+    s << conv.filterSize.x << "_" << conv.filterSize.y << "_" << conv.filterSize.z << "_";
+    s << conv.stride.x << "_" << conv.stride.y << "_" << conv.stride.z << "_";
+    s << conv.dilation.x << "_" << conv.dilation.y << "_" << conv.dilation.z << "_";
+    s << conv.padding.x << "_" << conv.padding.y << "_" << conv.padding.z << "_";
     s << conv.split;
 
     return s.str();
@@ -49,7 +49,7 @@ ParamsKey fused_conv_eltwise_params::GetParamsKey() const {
         k.EnableFusedConvEltwSplitSupport();
     }
 
-    if (conv.dilation.x != 1 || conv.dilation.y != 1) {
+    if (conv.dilation.x != 1 || conv.dilation.y != 1 || conv.dilation.z != 1) {
         k.EnableFusedConvEltwDilation();
     }
 
@@ -140,24 +140,27 @@ JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_el
         mem_consts.AddConstants({MakeJitConstant("LOCAL_CONVOLUTION", params.conv.local_convolution)});
     }
 
-    JitConstants eltw_activations = MakeActivationJitConstants(params.activation, "_ELTW");
+    JitConstants eltw_activations = MakeActivationJitConstants(params.activations, "_ELTW");
     mem_consts.Merge(eltw_activations);
-    JitConstants conv_activations = MakeActivationJitConstants(params.conv.activation, "_CONV");
+    JitConstants conv_activations = MakeActivationJitConstants(params.conv.activations, "_CONV");
     mem_consts.Merge(conv_activations);
     mem_consts.AddConstant(MakeJitConstant("ELTW_CALIBRATION_TERM", params.eltw.output_calibration));
 
     if (!params.eltw.stride.empty()) {
         mem_consts.AddConstant(MakeJitConstant("ELTW_STRIDE_X", params.eltw.stride[0].x));
         mem_consts.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", params.eltw.stride[0].y));
+        mem_consts.AddConstant(MakeJitConstant("ELTW_STRIDE_Z", params.eltw.stride[0].z));
     } else {
         mem_consts.AddConstant(MakeJitConstant("ELTW_STRIDE_X", 1));
         mem_consts.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", 1));
+        mem_consts.AddConstant(MakeJitConstant("ELTW_STRIDE_Z", 1));
     }
 
     mem_consts.AddConstant(MakeJitConstant("IN_OUT_OPT", params.second_input_in_output ? 1 : 0));
 
     std::vector<uint32_t> unrollLoopParams{params.conv.filterSize.x,
                                            params.conv.filterSize.y,
+                                           params.conv.filterSize.z,
                                            (uint32_t)kd.gemmStyle.globalWorkSizeDX,
                                            (uint32_t)kd.gemmStyle.globalWorkSizeDY,
                                            (uint32_t)kd.gemmStyle.globalWorkSizeDZ,
@@ -228,10 +231,11 @@ fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_base::Set
     const auto& out = params.output;
     kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
     std::vector<size_t> global;
-    if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf) {
-        global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v};
+    if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf ||
+        params.output.GetLayout() == DataLayout::bfzyx || params.output.GetLayout() == DataLayout::bfzyx_f16) {
+        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
     } else {
-        global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
+        global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v * out.Z().v };
     }
 
     auto local = GetOptimalLocalWorkGroupSizes(global);
@@ -358,31 +362,38 @@ KernelsData fused_conv_eltwise_kernel_base::GetKernelsDataForAutoTune(const Para
 }
 
 static DataTensor GetConvolutionBFYXPaddedTensor(const fused_conv_eltwise_params& cp) {
-    assert(cp.inputs[0].GetDims().size() == 4U);
-
     DataTensor t = cp.inputs[0];
-    std::vector<Tensor::Pad> pad{{0, 0}, {0, 0}, {0, 0}, {0, 0}};
+    std::vector<Tensor::Pad> pad{{0, 0}, {0, 0}, {0, 0}, {0, 0}, { 0, 0 } };
 
     auto& conv = cp.conv;
 
     pad[0].before = conv.padding.x;
     pad[1].before = conv.padding.y;
+    pad[2].before = conv.padding.z;
 
     const auto inputLimitX = (cp.output.X().v - 1) * conv.stride.x + (conv.filterSize.x - 1) * conv.dilation.x + 1;
     const auto inputLimitY = (cp.output.Y().v - 1) * conv.stride.y + (conv.filterSize.y - 1) * conv.dilation.y + 1;
+    const auto inputLimitZ = (cp.output.Z().v - 1) * conv.stride.z + (conv.filterSize.z - 1) * conv.dilation.z + 1;
 
     pad[0].after = (size_t)std::max(static_cast<int>(inputLimitX) - static_cast<int>(t.X().v) - static_cast<int>(pad[0].before), static_cast<int>(0));
     pad[1].after = (size_t)std::max(static_cast<int>(inputLimitY) - static_cast<int>(t.Y().v) - static_cast<int>(pad[1].before), static_cast<int>(0));
+    pad[2].after = (size_t)std::max(static_cast<int>(inputLimitZ) - static_cast<int>(t.Z().v) - static_cast<int>(pad[2].before), static_cast<int>(0));
 
-    Tensor::NDims dims(4);
+    Tensor::NDims dims(5);
     const Tensor::NDims& orgDims = cp.inputs[0].GetDims();
     size_t pitch = 1;
-    for (size_t i = 0; i < dims.size(); i++) {
+    size_t i;
+    for (i = 0; i < orgDims.size(); i++) {
         dims[i].pad = pad[i];
         dims[i].v = orgDims[i].v;
         dims[i].pitch = pitch;
         pitch *= dims[i].LogicalDimPadded();
     }
+    for (size_t j = i; j < dims.size(); j++) {
+        dims[i].pad = { 0, 0 };
+        dims[i].v = 1;
+        dims[i].pitch = pitch;
+    }
 
     return {dims, t.GetDType(), t.GetLayout()};
 }
@@ -390,16 +401,19 @@ static DataTensor GetConvolutionBFYXPaddedTensor(const fused_conv_eltwise_params
 bool CheckConvolutionPaddedInputDesc(const fused_conv_eltwise_params& params, const DataTensor& reqDesc) {
     bool properPadding = reqDesc.X().pad.before <= params.inputs[0].X().pad.before &&
                          reqDesc.Y().pad.before <= params.inputs[0].Y().pad.before &&
+                         reqDesc.Z().pad.before <= params.inputs[0].Z().pad.before &&
                          reqDesc.Feature().pad.before <= params.inputs[0].Feature().pad.before &&
                          reqDesc.Batch().pad.before <= params.inputs[0].Batch().pad.before;
 
     properPadding &= reqDesc.X().pad.after <= params.inputs[0].X().pad.after &&
                      reqDesc.Y().pad.after <= params.inputs[0].Y().pad.after &&
+                     reqDesc.Z().pad.after <= params.inputs[0].Z().pad.after &&
                      reqDesc.Feature().pad.after <= params.inputs[0].Feature().pad.after &&
                      reqDesc.Batch().pad.after <= params.inputs[0].Batch().pad.after;
 
     properPadding &=
-        ((params.conv.padding.x == 0 && params.conv.padding.y == 0) || params.inputs[0].GetPaddedVal() == 0.f);
+        ((params.conv.padding.x == 0 && params.conv.padding.y == 0 && params.conv.padding.z == 0) ||
+            params.inputs[0].GetPaddedVal() == 0.f);
 
     return properPadding;
 }
index ec9a9cb..e7526f2 100644 (file)
@@ -45,7 +45,7 @@ struct fused_conv_eltwise_params : public weight_bias_params {
         MultiDataTensor weights_quantization_factors;
         MultiDataTensor output_calibration_factors;
 
-        base_activation_params activation;
+        std::vector<base_activation_params> activations;
     } conv;
 
     struct eltw_data {
index 438b04c..d4f4430 100644 (file)
@@ -34,6 +34,10 @@ ParamsKey fused_conv_eltwise_kernel_ref::GetSupportedKey() const {
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableInputLayout(DataLayout::yxfb);
     k.EnableOutputLayout(DataLayout::yxfb);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableNonBiasTerm();
@@ -77,8 +81,8 @@ JitConstants fused_conv_eltwise_kernel_ref::GetJitConstants(const fused_conv_elt
     // TODO: This gives both ACTIVATION and ACTIVATION_TYPED. Should we
     // factor that out into a virtual function to avoid creation of similar
     // yet distinct macros?
-    jit.Merge(MakeActivationJitConstants(params.conv.activation, "_CONV_TYPED", true));
-    jit.Merge(MakeActivationJitConstants(params.activation, "_ELTW_TYPED", true));
+    jit.Merge(MakeActivationJitConstants(params.conv.activations, "_CONV_TYPED", true));
+    jit.Merge(MakeActivationJitConstants(params.activations, "_ELTW_TYPED", true));
     // Needs to be done on host to get _MAX_VAL/_MIN_VAL/TO_TYPE macros
     // available (will be used in the activations).
     //
index 8cadbc8..64d1925 100644 (file)
@@ -41,6 +41,7 @@ protected:
             WeightsLayout::yxio,
             WeightsLayout::iyxo,
             WeightsLayout::oyxi,
+            WeightsLayout::oizyx,
             // TODO: Verify that local convolution works as expected.
             // WeightsLayout::bf_lyx_yx,
         };
index 4d06b9f..e92b6da 100644 (file)
@@ -23,7 +23,7 @@ namespace kernel_selector {
 // gather_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct gather_params : public base_params {
-    gather_params() : base_params(KernelType::GATHER) {}
+    gather_params() : base_params(KernelType::GATHER), axis(GatherAxis::BATCH) {}
 
     GatherAxis axis;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.cpp
new file mode 100644 (file)
index 0000000..95d6d25
--- /dev/null
@@ -0,0 +1,63 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gather_tree_kernel_base.h"
+#include <vector>
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+    JitConstants GatherTreeKernelBase::GetJitConstants(const gather_tree_params & params) const {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+        return jit;
+    }
+
+    GatherTreeKernelBase::DispatchData GatherTreeKernelBase::SetDefault(const gather_tree_params & params) const {
+        std::vector<size_t> global{
+                                    params.output.Y().v,  // beam
+                                    params.output.Feature().v,  // batch
+                                    1
+                                  };
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+        /*
+            b -> time
+            f -> batch
+            y -> beam
+        */
+        DispatchData data;
+        data.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+        data.gws0 = global[0];
+        data.gws1 = global[1];
+        data.gws2 = global[2];
+        data.lws0 = local[0];
+        data.lws1 = local[1];
+        data.lws2 = local[2];
+        return data;
+    }
+
+    KernelsData GatherTreeKernelBase::GetCommonKernelsData(const Params& params,
+                                                            const optional_params& options,
+                                                            float estimated_time) const {
+        assert(params.GetType() == KernelType::GATHER_TREE);
+        const auto& gt_params = static_cast<const gather_tree_params&>(params);
+
+        auto run_info = SetDefault(gt_params);
+        auto kernel_data = KernelData::Default<gather_tree_params>(params);
+        auto cldnn_jit = GetJitConstants(gt_params);
+        auto entry_point = GetEntryPoint(kernelName, gt_params.layerID, options);
+        auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+        FillCLKernelData(kernel_data.kernels[0], run_info, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 4);
+        kernel_data.estimatedTime = estimated_time;
+        return { kernel_data };
+    }
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_base.h
new file mode 100644 (file)
index 0000000..4e5de0f
--- /dev/null
@@ -0,0 +1,47 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+namespace kernel_selector {
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// gather_tree_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct gather_tree_params : public base_params {
+    gather_tree_params() : base_params(KernelType::GATHER_TREE) {}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// gather_tree_optional_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct gather_tree_optional_params : optional_params {
+    gather_tree_optional_params() : optional_params(KernelType::GATHER_TREE) {}
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// BorderKernelBase
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class GatherTreeKernelBase : public common_kernel_base {
+public:
+    using common_kernel_base::common_kernel_base;
+    using DispatchData = CommonDispatchData;
+
+    protected:
+        JitConstants GetJitConstants(const gather_tree_params& params) const;
+        DispatchData SetDefault(const gather_tree_params& params) const;
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_ref.cpp
new file mode 100644 (file)
index 0000000..eb3e029
--- /dev/null
@@ -0,0 +1,41 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gather_tree_kernel_ref.h"
+
+namespace kernel_selector {
+KernelsData GatherTreeKernelRef::GetKernelsData(const Params & params, const optional_params & options) const {
+    return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+}
+
+ParamsKey GatherTreeKernelRef::GetSupportedKey() const {
+    ParamsKey k;
+
+    k.EnableInputDataType(Datatype::INT32);
+    k.EnableOutputDataType(Datatype::INT32);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F32);
+
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableInputLayout(DataLayout::yxfb);
+    k.EnableOutputLayout(DataLayout::yxfb);
+    k.EnableOutputLayout(DataLayout::byxf);
+    k.EnableInputLayout(DataLayout::byxf);
+
+    k.EnableBatching();
+
+    return k;
+}
+}  // namespace kernel_selector
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #pragma once
 
-#include "fully_connected_kernel_base.h"
+#include "gather_tree_kernel_base.h"
 
 namespace kernel_selector {
-
-class FullyConnected_image_tutorial : public FullyConnectedKernelBase {
+class GatherTreeKernelRef : public GatherTreeKernelBase {
 public:
-    using Parent = FullyConnectedKernelBase;
-
-    FullyConnected_image_tutorial() : Parent("fully_connected_gpu_image_tutorial") {}
+    GatherTreeKernelRef() : GatherTreeKernelBase("gather_tree_gpu_ref") {}
 
     KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
     ParamsKey GetSupportedKey() const override;
-
-protected:
-    DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
 };
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_selector.cpp
new file mode 100644 (file)
index 0000000..adf9ae1
--- /dev/null
@@ -0,0 +1,25 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gather_tree_kernel_selector.h"
+#include "gather_tree_kernel_ref.h"
+
+namespace kernel_selector {
+    gather_tree_kernel_selector::gather_tree_kernel_selector() { Attach<GatherTreeKernelRef>(); }
+
+    KernelsData gather_tree_kernel_selector::GetBestKernels(const Params& params,
+                                                            const optional_params& options) const {
+        return GetNaiveBestKernel(params, options, KernelType::GATHER_TREE);
+    }
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather_tree/gather_tree_kernel_selector.h
new file mode 100644 (file)
index 0000000..a250640
--- /dev/null
@@ -0,0 +1,31 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+class gather_tree_kernel_selector : public kernel_selector_base {
+public:
+    static gather_tree_kernel_selector& Instance() {
+        static gather_tree_kernel_selector instance;
+        return instance;
+    }
+
+    gather_tree_kernel_selector();
+
+    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+};
+}  // namespace kernel_selector
index b01e6ac..a221f41 100644 (file)
@@ -27,7 +27,7 @@ struct lookup_table_params : public base_params {
     lookup_table_params() : base_params(KernelType::LOOKUP_TABLE) {}
 
     LookUpTableAxis lookUpTableAxis = LookUpTableAxis::XYF;
-    uint32_t numberOfValues;
+    uint32_t numberOfValues = 0;
     DataTensor inputIndices;
 
     virtual ParamsKey GetParamsKey() const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp
new file mode 100644 (file)
index 0000000..a2245ad
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "lstm_dynamic/lstm_dynamic_input_bfyx_opt.h"
+#include "kernel_selector_utils.h"
+
+#include <vector>
+
+namespace kernel_selector {
+
+ParamsKey LSTM_DynamicInputKernelBfyxOpt::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputWeightsType(WeightsType::F32);
+    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableDifferentTypes();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    k.EnableLSTMGEMMBias();
+    k.EnableNonBiasTerm();
+    k.EnableBiasPerFeature();
+    k.EnableBiasPerOutput();
+    k.EnableSubGroup();
+    k.EnableSubGroupShort();
+    return k;
+}
+
+bool kernel_selector::LSTM_DynamicInputKernelBfyxOpt::Validate(const Params & p, const optional_params & o) const {
+    if (!LSTM_DynamicInputKernelBase::Validate(p, o)) {
+        return false;
+    }
+
+    const auto& params = static_cast<const lstm_dynamic_input_params&>(p);
+
+    const auto& weights  = params.weights;
+    const auto weights_x = weights.X().v;
+    const auto weights_y = weights.Y().v;
+    const auto& input = params.inputs[0];
+    const auto& out   = params.output;
+
+    bool input_X_div_by_8 = input.X().v % 8 == 0;
+    bool weights_X_div_by_8 = weights_x % 8 == 0;
+    bool weights_Y_div_by_8_x_simd_size = weights_y % (8 * simd_size) == 0;
+    bool gws0_size = out.X().v / simd_size <= 512;  // ToDo remove condition and update .cl code for bigger gws0
+
+    if (!input_X_div_by_8 ||
+        !weights_X_div_by_8 ||
+        !weights_Y_div_by_8_x_simd_size ||
+        !gws0_size)
+        return false;
+    return true;
+}
+
+KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params, const optional_params& options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+
+    DispatchData run_info;
+
+    KernelData kd = KernelData::Default<lstm_dynamic_input_params>(params);
+    lstm_dynamic_input_params& dlstm_params = *static_cast<lstm_dynamic_input_params*>(kd.params.get());
+
+    const auto& out = dlstm_params.output;
+    auto hidden_size = out.X().v;
+
+    std::vector<size_t> global = { hidden_size / simd_size, out.Batch().v * out.Y().v, out.Feature().v };
+    const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+    run_info.gws0 = global[0];
+    run_info.gws1 = global[1];
+    run_info.gws2 = global[2];
+
+    run_info.lws0 = local[0];
+    run_info.lws1 = local[1];
+    run_info.lws2 = local[2];
+
+    run_info.fp16UnitUsed = dlstm_params.inputs[0].GetDType() == Datatype::F16;
+
+    bool succeed = UpdateWeightsParams(dlstm_params,
+        options,
+        { WeightsLayout::dlstm_dir_io },
+        kd.weightsReorderParams,
+        GetSupportedKey());
+
+    if (!succeed) {
+        return {};
+    }
+
+    auto cldnn_jit = GetJitConstants(dlstm_params);
+    auto entry_point = GetEntryPoint(kernelName, dlstm_params.layerID, options);
+    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+    auto& kernel = kd.kernels[0];
+    kernel.workGroups.global = { run_info.gws0, run_info.gws1, run_info.gws2 };
+    kernel.workGroups.local = { run_info.lws0, run_info.lws1, run_info.lws2 };
+    kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
+    SetKernelArguments(dlstm_params, kernel);
+
+    kd.estimatedTime = FORCE_PRIORITY_5;
+    return { kd };
+}
+}  // namespace kernel_selector
@@ -1,4 +1,5 @@
-// Copyright (c) 2019 Intel Corporation
+/*
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
+*/
 
 #pragma once
 
-#include "convolution_kernel_base.h"
-#include <vector>
+#include "lstm_dynamic_input_kernel_base.h"
 
 namespace kernel_selector {
-
-class ConvolutionKernel_bfzyx_Ref : public ConvolutionKernelBase {
+class LSTM_DynamicInputKernelBfyxOpt : public LSTM_DynamicInputKernelBase {
 public:
-    ConvolutionKernel_bfzyx_Ref() : ConvolutionKernelBase("convolution_gpu_bfzyx_ref") {}
-    virtual ~ConvolutionKernel_bfzyx_Ref() {}
+    LSTM_DynamicInputKernelBfyxOpt() : LSTM_DynamicInputKernelBase("lstm_dynamic_input_bfyx_opt") {}
 
+    virtual ~LSTM_DynamicInputKernelBfyxOpt() {}
     KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-    ParamsKey GetSupportedKey() const override;
 
 protected:
-    std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override {
-        return {
-            WeightsLayout::oizyx,
-        };
-    }
+    ParamsKey GetSupportedKey() const override;
+    bool Validate(const Params& p, const optional_params& o) const;
+
+private:
+    const uint32_t simd_size = 8;
 };
 }  // namespace kernel_selector
index a731c1d..f1d3a57 100644 (file)
@@ -23,33 +23,13 @@ namespace kernel_selector {
 JitConstants LSTM_DynamicInputKernelBase::GetJitConstants(const lstm_dynamic_input_params& params) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
 
-    const auto& out = params.output;
-    size_t hidden_size = out.X().v / 4;
-
-    // [1] Certainties
-    jit.AddConstants({
-        // IE default: fizo
-        MakeJitConstant("GEMM_OFFSET_I", 1 * hidden_size),
-        MakeJitConstant("GEMM_OFFSET_O", 3 * hidden_size),
-        MakeJitConstant("GEMM_OFFSET_F", 0 * hidden_size),
-        MakeJitConstant("GEMM_OFFSET_Z", 2 * hidden_size),
-    });
-
     jit.AddConstants({MakeJitConstant("WEIGHTS", params.weights),
                       MakeJitConstant("DYN_LENGTH", params.inputs.at(1)),
-                      MakeJitConstant("HIDDEN_SIZE", hidden_size),
                       MakeJitConstant("MAX_SEQUENCE_LENGTH", params.inputs.at(0).Feature().v)});
 
     // [2] Optionals
-    if (params.has_hidden) {
-        const auto& hidden = params.hidden;
-        jit.AddConstants({
-            MakeJitConstant("INIT_HIDDEN_TERM", true),
-            MakeJitConstant("INIT_HIDDEN", hidden),
-        });
-    }
-    if (params.has_bias) {
-        jit.AddConstants({MakeJitConstant("BIAS", params.bias), MakeJitConstant("BIAS_TERM", true)});
+    if (!params.bias.empty()) {
+        jit.AddConstants({MakeJitConstant("BIAS", params.bias[0]), MakeJitConstant("BIAS_TERM", true)});
     }
 
     return jit;
@@ -76,6 +56,16 @@ LSTM_DynamicInputKernelBase::DispatchData LSTM_DynamicInputKernelBase::SetDefaul
     return kd;
 }
 
+void kernel_selector::LSTM_DynamicInputKernelBase::SetKernelArguments(const lstm_dynamic_input_params& params, clKernelData& kernel) const {
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 });
+    if (!params.bias.empty()) {
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::BIAS, 0 });
+    }
+}
+
 KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& params,
                                                               const optional_params& options,
                                                               float estimated_time) const {
@@ -95,17 +85,7 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para
     auto& kernel = k_data.kernels[0];
     kernel.workGroups.global = {run_info.gws0, run_info.gws1, run_info.gws2};
     kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
-    uint32_t input_idx = 0;
-    kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, input_idx++});
-    kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, input_idx++});
-    kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
-    kernel.arguments.push_back({ArgumentDescriptor::Types::WEIGHTS, 0});
-    if (orgParams.has_hidden) {
-        kernel.arguments.push_back({ArgumentDescriptor::Types::HIDDEN, 0});
-    }
-    if (orgParams.has_bias) {
-        kernel.arguments.push_back({ArgumentDescriptor::Types::BIAS, 0});
-    }
+    SetKernelArguments(orgParams, kernel);
 
     k_data.estimatedTime = estimated_time;
     return {k_data};
index fa21ab0..41ce4ea 100644 (file)
@@ -15,7 +15,7 @@
 */
 
 #pragma once
-
+#include "weight_bias_params.h"
 #include "common_kernel_base.h"
 #include "kernel_selector_params.h"
 
@@ -23,43 +23,17 @@ namespace kernel_selector {
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // lstm_dynamic_input_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct lstm_dynamic_input_params : public base_params {
-    lstm_dynamic_input_params() : base_params(KernelType::LSTM_DYNAMIC_INPUT) {}
-
-    DataTensor weights;
-    DataTensor bias;
-    DataTensor hidden;
+struct lstm_dynamic_input_params : public weight_bias_params {
+    lstm_dynamic_input_params() : weight_bias_params(KernelType::LSTM_DYNAMIC_INPUT) {}
 
-    bool has_bias = false;
-    bool has_hidden = false;
     int32_t direction = 1;
-
-    void set_bias(const DataTensor& v) {
-        bias = v;
-        has_bias = true;
-    }
-
-    void set_hidden(const DataTensor& v) {
-        hidden = v;
-        has_hidden = true;
-    }
-
-    ParamsKey GetParamsKey() const override {
-        ParamsKey k = base_params::GetParamsKey();
-
-        if (has_bias) {
-            k.EnableLSTMGEMMBias();
-        }
-
-        return k;
-    }
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // lstm_dynamic_input_optional_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct lstm_dynamic_input_optional_params : optional_params {
-    lstm_dynamic_input_optional_params() : optional_params(KernelType::LSTM_DYNAMIC_INPUT) {}
+struct lstm_dynamic_input_optional_params : weight_bias_optional_params {
+    lstm_dynamic_input_optional_params() : weight_bias_optional_params(KernelType::LSTM_DYNAMIC_INPUT) {}
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -78,6 +52,7 @@ protected:
     KernelsData GetCommonKernelsData(const Params& params,
                                      const optional_params& optParams,
                                      float estimated_time) const;
+    void SetKernelArguments(const lstm_dynamic_input_params& params, clKernelData& k_data) const;
 
     bool Validate(const Params& p, const optional_params&) const override {
         if (p.GetType() != KernelType::LSTM_DYNAMIC_INPUT) {
index 1c4de69..b8cf2b2 100644 (file)
 
 #include "lstm_dynamic_input_kernel_selector.h"
 #include "lstm_dynamic_input_ref_kernel.h"
+#include "lstm_dynamic_input_bfyx_opt.h"
 
 namespace kernel_selector {
-lstm_dynamic_input_kernel_selector::lstm_dynamic_input_kernel_selector() { Attach<LSTM_DynamicInputKernelRef>(); }
+lstm_dynamic_input_kernel_selector::lstm_dynamic_input_kernel_selector() {
+    Attach<LSTM_DynamicInputKernelRef>();
+    Attach<LSTM_DynamicInputKernelBfyxOpt>();
+}
 
 KernelsData lstm_dynamic_input_kernel_selector::GetBestKernels(const Params& params,
                                                                const optional_params& options) const {
index 390faa2..94458b5 100644 (file)
@@ -25,6 +25,8 @@ ParamsKey LSTM_DynamicInputKernelRef::GetSupportedKey() const {
     k.EnableInputDataType(Datatype::F32);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableInputWeightsType(WeightsType::F32);
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableDifferentTypes();
@@ -32,6 +34,9 @@ ParamsKey LSTM_DynamicInputKernelRef::GetSupportedKey() const {
     k.EnableTensorPitches();
     k.EnableBatching();
     k.EnableLSTMGEMMBias();
+    k.EnableNonBiasTerm();
+    k.EnableBiasPerFeature();
+    k.EnableBiasPerOutput();
     return k;
 }
 
index 611c383..190d13f 100644 (file)
@@ -106,6 +106,27 @@ LSTM_DynamicTimeloopKernelBase::DispatchData LSTM_DynamicTimeloopKernelBase::Set
     return kd;
 }
 
+void kernel_selector::LSTM_DynamicTimeloopKernelBase::SetKernelArguments(const lstm_dynamic_timeloop_params& params, clKernelData& kernel) const {
+    uint32_t input_idx = 0;
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ });
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ });
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
+    kernel.arguments.push_back({ ArgumentDescriptor::Types::RECURRENT, 0 });
+    if (params.has_hidden) {
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::HIDDEN, 0 });
+    }
+    if (params.has_cell) {
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::CELL, 0 });
+    }
+    if (params.has_last_hidden_output) {
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ });
+    }
+    if (params.has_last_cell_output) {
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ });
+    }
+}
+
+
 KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& params,
                                                                  const optional_params& options,
                                                                  float estimated_time) const {
@@ -126,23 +147,7 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p
     kernel.workGroups.global = {run_info.gws0, run_info.gws1, run_info.gws2};
     kernel.workGroups.local  = {run_info.lws0, run_info.lws1, run_info.lws2};
     kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
-    uint32_t input_idx = 0;
-    kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, input_idx++});
-    kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, input_idx++});
-    kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
-    kernel.arguments.push_back({ArgumentDescriptor::Types::RECURRENT, 0});
-    if (org_params.has_hidden) {
-        kernel.arguments.push_back({ArgumentDescriptor::Types::HIDDEN, 0});
-    }
-    if (org_params.has_cell) {
-        kernel.arguments.push_back({ArgumentDescriptor::Types::CELL, 0});
-    }
-    if (org_params.has_last_hidden_output) {
-        kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, input_idx++});
-    }
-    if (org_params.has_last_cell_output) {
-        kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, input_idx++});
-    }
+    SetKernelArguments(org_params, kernel);
     k_data.estimatedTime = estimated_time;
     return {k_data};
 }
index b7f958d..cc13b64 100644 (file)
@@ -106,7 +106,7 @@ protected:
     KernelsData GetCommonKernelsData(const Params& params,
                                      const optional_params& optParams,
                                      float estimated_time) const;
-
+    void SetKernelArguments(const lstm_dynamic_timeloop_params& params, clKernelData& k_data) const;
     bool Validate(const Params& p, const optional_params&) const override {
         if (p.GetType() != KernelType::LSTM_DYNAMIC_TIMELOOP) {
             return false;
index 2daa0f0..0e96adf 100644 (file)
@@ -31,6 +31,8 @@ ParamsKey MVNKernelRef::GetSupportedKey() const {
     k.EnableOutputLayout(DataLayout::byxf);
     k.EnableInputLayout(DataLayout::bfzyx);
     k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
index cfede9c..1f93750 100644 (file)
@@ -22,7 +22,8 @@ namespace kernel_selector {
 // one_hot_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct one_hot_params : public base_params {
-    one_hot_params() : base_params(KernelType::ONE_HOT) {}
+    one_hot_params() : base_params(KernelType::ONE_HOT),
+    one_hot_axis(0), one_hot_limit(0), on_value(1.0), off_value(1.0) {}
     uint16_t one_hot_axis;
     int32_t one_hot_limit;
     float on_value;
index 560e78a..3926fc5 100644 (file)
@@ -74,7 +74,7 @@ PoolingKernelBase::DispatchData PoolingKernelBase::SetDefault(const pooling_para
     kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 
     if (output.GetLayout() == DataLayout::bfyx || output.GetLayout() == DataLayout::byxf ||
-        output.GetLayout() == DataLayout::bfzyx) {
+        output.GetLayout() == DataLayout::bfzyx || output.GetLayout() == DataLayout::bfzyx_f16) {
         // Determine global work sizes.
         kd.gws2 = output.Batch().v * output.Feature().v;  // B, F
         kd.gws0 = Align(output.X().v, 32);                // X
index 80a2e65..ef90e97 100644 (file)
@@ -36,7 +36,7 @@ bool PoolingKernelGPUAverageOpt::Validate(const Params& p, const optional_params
 
     const pooling_params& params = static_cast<const pooling_params&>(p);
 
-    if (params.activation.function != ActivationFunction::NONE) {
+    if (!params.activations.empty()) {
         return {};
     }
 
index 083f70a..6b68575 100644 (file)
@@ -26,10 +26,12 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const {
     k.EnableInputLayout(DataLayout::yxfb);
     k.EnableInputLayout(DataLayout::byxf);
     k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::yxfb);
     k.EnableOutputLayout(DataLayout::byxf);
     k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp
new file mode 100644 (file)
index 0000000..077a733
--- /dev/null
@@ -0,0 +1,87 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include <iostream>
+#include "quantize_kernel_base.h"
+#include "kernel_selector_utils.h"
+#include <string>
+
+namespace kernel_selector {
+
+bool QuantizeKernelBase::Validate(const Params& p, const optional_params&) const {
+    const quantize_params& params = static_cast<const quantize_params&>(p);
+    if (params.inputs.size() != 5)
+        return false;
+
+    // Binary packed output is possible only with bfyx input and b_fs_yx_32fp output
+    if (params.output.GetDType() == Datatype::BINARY &&
+        (params.output.GetLayout() != DataLayout::b_fs_yx_32fp || params.inputs[0].GetLayout() != DataLayout::bfyx))
+        return false;
+
+    return true;
+}
+
+JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params) const {
+    JitConstants jit = MakeBaseParamsJitConstants(params);
+
+    if (params.packed_binary_output) {
+        jit.AddConstant(MakeJitConstant("PACKED_BINARY_OUTPUT", params.packed_binary_output));
+        jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_PACKED", CeilDiv(params.output.Feature().v, 32)));
+        jit.AddConstant(MakeJitConstant("OC_BLOCK_SIZE", 32));
+        if ((params.inputs[3].LogicalSize() == 1 && params.inputs[4].LogicalSize() == 1) ||
+            (params.inputs[3].LogicalSize() == params.inputs[3].Batch().v &&
+             params.inputs[4].LogicalSize() == params.inputs[4].Batch().v)) {
+            jit.AddConstant(MakeJitConstant("SINGLE_OUT_VAL", 1));
+
+        } else if (params.inputs[3].LogicalSize() == params.output.Feature().v &&
+                   params.inputs[4].LogicalSize() == params.output.Feature().v) {
+            jit.AddConstant(MakeJitConstant("PER_CHANNEL_OUT_VAL", 1));
+        } else {
+            throw std::runtime_error("Unsupported const blob shape in node " + params.layerID);
+        }
+    }
+
+    jit.AddConstant(MakeJitConstant("LEVELS", params.levels));
+
+    return jit;
+}
+
+KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optional_params& options) const {
+    assert(params.GetType() == KernelType::QUANTIZE);
+
+    KernelData kd = KernelData::Default<quantize_params>(params);
+    quantize_params& newParams = *static_cast<quantize_params*>(kd.params.get());
+
+    if (!Validate(params, options)) {
+        return {};
+    }
+
+    auto runInfo = SetDefault(newParams, options);
+    auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+    auto cldnn_jit = GetJitConstants(newParams);
+    std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+    auto& kernel = kd.kernels[0];
+
+    kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
+    kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
+    kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
+    kernel.arguments = GetArgsDesc(static_cast<int>(newParams.inputs.size()), false, false, false, false);
+
+    kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+    return {kd};
+}
+}  // namespace kernel_selector
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 
 #pragma once
 
-#include "activation_kernel_base.h"
-
-// Step 0:
-//
-// 1. choose a tutorial mode
-// 2. modify activation_tutorial.cl as well
-
-#define ADVANCED_TUTORIAL  // simple runnable example with explanations
+#include "common_kernel_base.h"
+#include "quantize_kernel_params.h"
 
 namespace kernel_selector {
 
-class ActivationKernel_Tutorial : public ActivationKernelBase {
+class QuantizeKernelBase : public common_kernel_base {
 public:
-    using Parent = ActivationKernelBase;
-    ActivationKernel_Tutorial() : Parent("activation_tutorial") {}
-    virtual ~ActivationKernel_Tutorial() {}
+    using common_kernel_base::common_kernel_base;
+    virtual ~QuantizeKernelBase() {}
 
-    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-    ParamsKey GetSupportedKey() const override;
-
-protected:
-#ifdef ADVANCED_TUTORIAL
-    DispatchData SetDefault(const activation_params& arg) const override;
+    virtual JitConstants GetJitConstants(const quantize_params& params) const;
+    virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0;
     bool Validate(const Params& p, const optional_params& o) const override;
-    JitConstants GetJitConstants(const activation_params& params, DispatchData) const override;
-#endif
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
 };
 }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h
new file mode 100644 (file)
index 0000000..f86572f
--- /dev/null
@@ -0,0 +1,46 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector {
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// quantize_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct quantize_params : public base_params {
+    quantize_params() : base_params(KernelType::QUANTIZE),
+    levels(0), packed_binary_output(false) {}
+
+    int levels;
+    bool packed_binary_output;
+
+    virtual ParamsKey GetParamsKey() const {
+        auto k = base_params::GetParamsKey();
+        if (packed_binary_output)
+            k.EnableQuantizePackedBinaryOutput();
+        return k;
+    }
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// quantize_optional_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct quantize_optional_params : optional_params {
+    quantize_optional_params() : optional_params(KernelType::QUANTIZE) {}
+};
+
+}  // namespace kernel_selector
index cf11097..01486a4 100644 (file)
@@ -25,14 +25,18 @@ ParamsKey QuantizeKernelRef::GetSupportedKey() const {
     k.EnableInputDataType(Datatype::F32);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::INT8);
     k.EnableOutputDataType(Datatype::BINARY);
     k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableInputLayout(DataLayout::bfyx_f16);
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::b_fs_yx_32fp);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
     k.EnableDifferentTypes();
+    k.EnableQuantizePackedBinaryOutput();
     return k;
 }
 
@@ -55,50 +59,22 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params,
 }
 
 JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params) const {
-    JitConstants jit = MakeBaseParamsJitConstants(params);
-
-    jit.AddConstant(MakeJitConstant("PACKED_BINARY_OUTPUT", params.packed_binary_output));
-    assert(params.inputs.size() == 5);
-    if (params.packed_binary_output) {
-        if ((params.inputs[3].LogicalSize() == 1 && params.inputs[4].LogicalSize() == 1) ||
-            (params.inputs[3].LogicalSize() == params.inputs[3].Batch().v &&
-             params.inputs[4].LogicalSize() == params.inputs[4].Batch().v)) {
-            jit.AddConstant(MakeJitConstant("SINGLE_OUT_VAL", 1));
-
-        } else if (params.inputs[3].LogicalSize() == params.output.Feature().v &&
-                   params.inputs[4].LogicalSize() == params.output.Feature().v) {
-            jit.AddConstant(MakeJitConstant("PER_CHANNEL_OUT_VAL", 1));
-        } else {
-            throw std::runtime_error("Unsupported const blob shape in node " + params.layerID);
-        }
-    }
-    jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_PACKED", CeilDiv(params.output.Feature().v, 32)));
-    jit.AddConstant(MakeJitConstant("OC_BLOCK_SIZE", 32));
-    jit.AddConstant(MakeJitConstant("LEVELS", params.levels));
-
+    JitConstants jit = Parent::GetJitConstants(params);
     return jit;
 }
 
-KernelsData QuantizeKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
-    assert(params.GetType() == KernelType::QUANTIZE);
-
-    KernelData kd = KernelData::Default<quantize_params>(params);
-    quantize_params& newParams = *static_cast<quantize_params*>(kd.params.get());
-
-    auto runInfo = SetDefault(newParams, options);
-    auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
-    auto cldnn_jit = GetJitConstants(newParams);
-    std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
-
-    auto& kernel = kd.kernels[0];
-
-    kernel.workGroups.global = {runInfo.gws0, runInfo.gws1, runInfo.gws2};
-    kernel.workGroups.local = {runInfo.lws0, runInfo.lws1, runInfo.lws2};
-    kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
-    kernel.arguments = GetArgsDesc(static_cast<int>(newParams.inputs.size()), false, false, false, false);
-
-    kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-
-    return {kd};
+bool QuantizeKernelRef::Validate(const Params& p, const optional_params&) const {
+    const quantize_params& params = static_cast<const quantize_params&>(p);
+    if (params.inputs.size() != 5)
+        return false;
+
+    // Binary packed output is possible only with b_fs_yx_32fp output layout and some input layouts
+    if (params.output.GetDType() == Datatype::BINARY &&
+        (params.output.GetLayout() != DataLayout::b_fs_yx_32fp ||
+        (params.inputs[0].GetLayout() != DataLayout::bfyx &&
+         params.inputs[0].GetLayout() != DataLayout::bfyx_f16)))
+        return false;
+    return true;
 }
+
 }  // namespace kernel_selector
index de744eb..ca4287b 100644 (file)
 
 #pragma once
 
-#include "common_kernel_base.h"
+#include "quantize_kernel_base.h"
 
 namespace kernel_selector {
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// quantize_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct quantize_params : public base_params {
-    quantize_params() : base_params(KernelType::QUANTIZE) {}
 
-    int levels;
-    bool packed_binary_output;
-
-    virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// quantize_optional_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct quantize_optional_params : optional_params {
-    quantize_optional_params() : optional_params(KernelType::QUANTIZE) {}
-};
-
-class QuantizeKernelRef : public common_kernel_base {
+class QuantizeKernelRef : public QuantizeKernelBase {
 public:
-    QuantizeKernelRef() : common_kernel_base("quantize_ref") {}
+    using Parent = QuantizeKernelBase;
+
+    QuantizeKernelRef() : QuantizeKernelBase("quantize_gpu_ref") {}
     virtual ~QuantizeKernelRef() {}
 
-    virtual JitConstants GetJitConstants(const quantize_params& params) const;
-    virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const;
-    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    JitConstants GetJitConstants(const quantize_params& params) const override;
+    CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override;
+    bool Validate(const Params& p, const optional_params& o) const override;
     ParamsKey GetSupportedKey() const override;
 };
 }  // namespace kernel_selector
index 4c85ea0..aa0d14d 100644 (file)
@@ -18,7 +18,9 @@
 
 namespace kernel_selector {
 
-quantize_kernel_selector::quantize_kernel_selector() { Attach<QuantizeKernelRef>(); }
+quantize_kernel_selector::quantize_kernel_selector() {
+    Attach<QuantizeKernelRef>();
+}
 
 KernelsData quantize_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
     return GetNaiveBestKernel(params, options, KernelType::QUANTIZE);
index 76c1950..955b42b 100644 (file)
@@ -24,7 +24,7 @@ namespace kernel_selector {
 // reduce_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct reduce_params : public base_params {
-    reduce_params() : base_params(KernelType::REDUCE) {}
+    reduce_params() : base_params(KernelType::REDUCE), reduceMode(ReduceMode::MAX), keepDims(0) {}
 
     ReduceMode reduceMode;
     std::vector<uint16_t> reduceAxes;
index 42a4ffa..826535e 100644 (file)
@@ -24,7 +24,8 @@ namespace kernel_selector {
 // region_yolo_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct region_yolo_params : public base_params {
-    region_yolo_params() : base_params(KernelType::REGION_YOLO) {}
+    region_yolo_params() : base_params(KernelType::REGION_YOLO),
+    coords(0), classes(0), num(0), mask_size(0), do_softmax(false) {}
 
     uint32_t coords;
     uint32_t classes;
index 3b80487..c9a6ec6 100644 (file)
@@ -34,6 +34,8 @@ inline uint32_t SubGroupSize(WeightsLayout l) {
         case WeightsLayout::os_is_yx_osv32_isv32p:
         case WeightsLayout::o_i_yx_i16_o16:
         case WeightsLayout::oiyx_o16:
+        case WeightsLayout::o_i_zyx_i16_o16:
+        case WeightsLayout::i_o_zyx_o16_i16:
             return 16;
         case WeightsLayout::os_i_osv8__ai8:
         case WeightsLayout::iy_xs_os_xsv2_osv8__ao32:
@@ -102,7 +104,7 @@ JitConstants ReorderKernelBase::GetJitConstants(const reorder_params& params) co
 
     // half->half without subtraction and activation (so plain reorder) can be done on shorts without explicit fp16 support
     bool useUshort = (params.inputs[0].GetDType() == Datatype::F16 && params.output.GetDType() == Datatype::F16 &&
-                      params.mode == MeanSubtractMode::NONE && params.activation.function == ActivationFunction::NONE);
+                      params.mode == MeanSubtractMode::NONE && params.activations.empty());
 
     Datatype calc_type = useUshort ? Datatype::UINT16 : params.inputs[0].GetDType();
     Datatype output_reorder_type = useUshort ? Datatype::UINT16 : params.output.GetDType();
@@ -115,7 +117,7 @@ JitConstants ReorderKernelBase::GetJitConstants(const reorder_params& params) co
     jit.AddConstant(MakeJitConstant("MEAN_OP(val, mean_val)", getMeanOpString(params.mean_op)));
 
     // Type parametrized activation:
-    jit.Merge(MakeActivationJitConstants(params.activation, "_TYPED", true));
+    jit.Merge(MakeActivationJitConstants(params.activations, "_TYPED", true));
 
     // TODO: Move to lower classes
     jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", SubGroupSize(params.output.GetLayout())));
index 6fb157c..6877538 100644 (file)
@@ -24,7 +24,8 @@ namespace kernel_selector {
 // reorder_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct reorder_params : public base_params {
-    reorder_params() : base_params(KernelType::REORDER) {}
+    reorder_params() : base_params(KernelType::REORDER),
+    winograd_input_offset_x(0), winograd_input_offset_y(0), winograd_nr_tiles_x(0) {}
 
     MeanSubtractMode mode = MeanSubtractMode::NONE;
     MeanOp mean_op = MeanOp::SUB;
index 3e16d8a..7ddc661 100644 (file)
@@ -24,10 +24,13 @@ ParamsKey ReorderKernelBinary::GetSupportedKey() const {
     k.EnableInputDataType(Datatype::F32);
     k.EnableInputDataType(Datatype::BINARY);
     k.EnableOutputDataType(Datatype::BINARY);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
     k.EnableDifferentTypes();
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableInputLayout(DataLayout::b_fs_yx_32fp);
     k.EnableOutputLayout(DataLayout::b_fs_yx_32fp);
+    k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
@@ -42,13 +45,17 @@ JitConstants ReorderKernelBinary::GetJitConstants(const reorder_params& params)
     const auto& input = newParams.inputs[0];
     jit.AddConstant(MakeJitConstant("ELEMENTS_COUNT", input.LogicalSize()));
     jit.AddConstant(MakeJitConstant("IFM_PACK_SIZE", 32));
-    jit.AddConstant(MakeJitConstant("OUTPUT_PACKED_FEATURES_NUM", CeilDiv(params.output.Feature().v, 32)));
 
     if (input.GetDType() == Datatype::BINARY) {
         jit.AddConstant(MakeJitConstant("BINARY_INPUT", 1));
         jit.AddConstant(MakeJitConstant("INPUT_PACKED_FEATURES_NUM", CeilDiv(input.Feature().v, 16)));
     }
 
+    if (params.output.GetDType() == Datatype::BINARY) {
+        jit.AddConstant(MakeJitConstant("BINARY_OUTPUT", 1));
+        jit.AddConstant(MakeJitConstant("OUTPUT_PACKED_FEATURES_NUM", CeilDiv(params.output.Feature().v, 32)));
+    }
+
     return jit;
 }
 
@@ -76,10 +83,18 @@ KernelsData ReorderKernelBinary::GetKernelsData(const Params& params, const opti
 
     const reorder_params& orgParams = static_cast<const reorder_params&>(params);
 
+    if (orgParams.inputs[0].GetDType() != Datatype::BINARY &&
+        orgParams.output.GetDType() != Datatype::BINARY)
+        return {};
+
     if (orgParams.inputs[0].GetDType() == Datatype::BINARY &&
         orgParams.inputs[0].GetLayout() != DataLayout::b_fs_yx_32fp)
         return {};
 
+    if (orgParams.output.GetDType() == Datatype::BINARY &&
+        orgParams.output.GetLayout() != DataLayout::b_fs_yx_32fp)
+        return {};
+
     auto estimatedTime = FORCE_PRIORITY_6;
 
     return GetCommonKernelsData(orgParams, options, estimatedTime);
index b3d5363..6cecc71 100644 (file)
@@ -24,7 +24,7 @@ namespace kernel_selector {
 // reorg_yolo_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct reorg_yolo_params : public base_params {
-    reorg_yolo_params() : base_params(KernelType::REORG_YOLO) {}
+    reorg_yolo_params() : base_params(KernelType::REORG_YOLO), stride(0) {}
 
     uint32_t stride;
 
index b779fc1..5db66fa 100644 (file)
@@ -21,7 +21,8 @@ namespace kernel_selector {
 // reverse_sequence_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct reverse_sequence_params : public base_params {
-    reverse_sequence_params() : base_params(KernelType::REVERSE_SEQUENCE) {}
+    reverse_sequence_params() : base_params(KernelType::REVERSE_SEQUENCE),
+    seq_axis(0), batch_axis(0) {}
 
     int32_t seq_axis;
     int32_t batch_axis;
index 5f11cde..b084ac4 100644 (file)
@@ -55,7 +55,7 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params,
     assert(params.GetType() == KernelType::ROI_POOLING);
     const roi_pooling_params& orgParams = static_cast<const roi_pooling_params&>(params);
 
-    if (orgParams.activation.function != ActivationFunction::NONE) {
+    if (!orgParams.activations.empty()) {
         return {};
     }
 
index bbdeb54..38f77e7 100644 (file)
@@ -23,7 +23,7 @@ namespace kernel_selector {
 // shuffle_channels_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct shuffle_channels_params : public base_params {
-    shuffle_channels_params() : base_params(KernelType::SHUFFLE_CHANNELS) {}
+    shuffle_channels_params() : base_params(KernelType::SHUFFLE_CHANNELS), group(0), axis(0) {}
 
     int32_t group;
     int32_t axis;
index e1c77ce..e09130d 100644 (file)
@@ -22,6 +22,7 @@ ParamsKey SoftmaxItemsClassKernelBase::GetDefaultSupportedKey() {
     k.EnableInputDataType(Datatype::F32);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputLayout(DataLayout::byxf);
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableInputLayout(DataLayout::yxfb);
     k.EnableInputLayout(DataLayout::bf);
@@ -29,11 +30,14 @@ ParamsKey SoftmaxItemsClassKernelBase::GetDefaultSupportedKey() {
     k.EnableInputLayout(DataLayout::bfzyx);
     k.EnableInputLayout(DataLayout::f);
     k.EnableOutputLayout(DataLayout::f);
+    k.EnableInputLayout(DataLayout::bfzyx_f16);
     k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::byxf);
     k.EnableOutputLayout(DataLayout::yxfb);
     k.EnableOutputLayout(DataLayout::bf);
     k.EnableOutputLayout(DataLayout::fb);
     k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfzyx_f16);
     k.EnableSoftmaxDim(SoftmaxDim::X);
     k.EnableSoftmaxDim(SoftmaxDim::Y);
     k.EnableSoftmaxDim(SoftmaxDim::Z);
index 05de772..d01c5ed 100644 (file)
@@ -92,7 +92,7 @@ bool SoftmaxKernelBaseBF::Validate(const Params& p, const optional_params& o) co
     const softmax_params& params = static_cast<const softmax_params&>(p);
     const auto& input = params.inputs[0];
 
-    if (params.activation.function != ActivationFunction::NONE) {
+    if (!params.activations.empty()) {
         return false;
     }
 
index f5791fd..f2464b0 100644 (file)
@@ -21,7 +21,7 @@ namespace kernel_selector {
 // tile_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct tile_params : public base_params {
-    tile_params() : base_params(KernelType::TILE) {}
+    tile_params() : base_params(KernelType::TILE), axis(TileAxis::BATCH), tiles(0) {}
 
     TileAxis axis;
     int tiles;
index 3df5d2b..1573fbd 100644 (file)
@@ -93,6 +93,7 @@ void AutoTuner::StoreKernel(const std::string& cacheFilePath,
     std::ofstream cachedKernelsFile(cacheFilePath);
     rapidjson::StringBuffer buffer(0, 1024);
     rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
+    writer.SetFormatOptions(rapidjson::PrettyFormatOptions::kFormatSingleLineArray);
     onlineCache->Accept(writer);
     auto temp = buffer.GetString();
     cachedKernelsFile << temp;
index 7e96586..260f591 100644 (file)
 {
     "24": {
+        "11460891889180307970": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5643924526605879168": ["convolution_gpu_bfyx_os_iyx_osv16", 1102],
+        "14198463555297179999": ["convolution_gpu_bfyx_f16", 8],
+        "9820219997540294747": ["convolution_gpu_bfyx_os_iyx_osv16", 746],
+        "16598220433310484103": ["convolution_gpu_bfyx_f16", 0],
+        "1132589293248085972": ["convolution_gpu_bfyx_os_iyx_osv16", 344],
+        "9751859564693419826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14126491856050876512": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17284989371701058847": ["convolution_gpu_bfyx_os_iyx_osv16", 1102],
+        "18186300610687882698": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17323620992879479455": ["convolution_gpu_bfyx_f16", 8],
+        "10782643446733040985": ["convolution_gpu_bfyx_f16", 0],
+        "8696280797132799196": ["convolution_gpu_bfyx_os_iyx_osv16", 693],
+        "7955761151310219022": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5206589624074157418": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "15804259593852912096": ["convolution_gpu_bfyx_os_iyx_osv16", 1049],
+        "9667577643691138471": ["convolution_gpu_bfyx_f16", 8],
+        "9587296295017154035": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "624896425985287215": ["convolution_gpu_bfyx_f16", 0],
+        "4143776775548070480": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "1941626906605395126": ["convolution_gpu_bfyx_os_iyx_osv16", 1055],
+        "17730451527258141168": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3538746967389669479": ["convolution_gpu_bfyx_os_iyx_osv16", 678],  
+        "12008052381997574625": ["convolution_gpu_bfyx_os_iyx_osv16", 98],
+        "14579060801049956629": ["convolution_gpu_bfyx_os_iyx_osv16", 111],
+        "2475732477128179942": ["convolution_gpu_bfyx_os_iyx_osv16", 92],
+        "7425269551190332752": ["convolution_gpu_bfyx_os_iyx_osv16", 51],
+        "2523330181210520033": ["convolution_gpu_bfyx_os_iyx_osv16", 814],
+        "9252516395349163399": ["fused_conv_eltwise_gpu_ref", 0],
+        "3025740595729338904": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "12700938470888412097": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "142182287837081331": ["convolution_gpu_bfyx_os_iyx_osv16", 65],
+        "4925269744341463388": ["convolution_gpu_bfyx_os_iyx_osv16", 212],
+        "7404267750384901384": ["convolution_gpu_bfyx_os_iyx_osv16", 863],
+        "1136134476921992394": ["convolution_gpu_bfyx_os_iyx_osv16", 663],
+        "12274460319290366021": ["convolution_gpu_bfyx_os_iyx_osv16", 55],
+        "1936271684402780579": ["convolution_gpu_bfyx_os_iyx_osv16", 917],
+        "7797907475238799442": ["fused_conv_eltwise_gpu_ref", 2],
+        "107092103514596960": ["convolution_gpu_bfyx_os_iyx_osv16", 758],
+        "8153466715673110154": ["convolution_gpu_bfyx_os_iyx_osv16", 378],
+        "13707460333812965439": ["convolution_gpu_bfyx_os_iyx_osv16", 848],   
+        "3057483147285040704": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "4460662214292495759": ["convolution_gpu_bfyx_f16", 8],
+        "17632851940131114495": ["convolution_gpu_bfyx_f16", 8],
+        "7945867532035693686": ["convolution_gpu_bfyx_f16", 8],
+        "10798155343477437060": ["convolution_gpu_bfyx_f16", 8],
+        "14191150640021059705": ["convolution_gpu_bfyx_f16", 8],
+        "14593228968660512118": ["convolution_gpu_bfyx_f16", 8],
+        "5573515532668433114": ["convolution_gpu_bfyx_f16", 8],
+        "11642934660277782628": ["convolution_gpu_bfyx_f16", 8],
+        "4825553592910970555": ["convolution_gpu_bfyx_f16", 8],
+        "17245530055973419690": ["convolution_gpu_bfyx_f16", 8],
+        "14644519840111409049": ["convolution_gpu_bfyx_f16", 8],
+        "15093112872571669071": ["convolution_gpu_bfyx_f16", 8],
+        "6172925429706792586": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16888914688498810916": ["convolution_gpu_bfyx_f16", 8],
+        "7094210524110336636": ["convolution_gpu_bfyx_f16", 8],
+        "1102719880087191972": ["convolution_gpu_bfyx_f16", 8],
+        "17635368969132641763": ["convolution_gpu_bfyx_f16", 8],
+        "6444855710931944326": ["convolution_gpu_bfyx_f16", 8],
+        "3685203889040861337": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8983142397488339162": ["convolution_gpu_bfyx_f16", 8],
+        "2942771097961823034": ["convolution_gpu_bfyx_f16", 8],
+        "16912834065670733738": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "2419223013209835757": ["convolution_gpu_bfyx_os_iyx_osv16", 368],
+        "11179881900554989521": ["convolution_gpu_bfyx_f16", 8],
+        "16511126264743737451": ["convolution_gpu_bfyx_f16", 8],
+        "10100289629103173958": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "9258363108725341315": ["convolution_gpu_bfyx_f16", 8],
+        "13334138861096017540": ["convolution_gpu_bfyx_f16", 8],
+        "6513616579637283618": ["convolution_gpu_bfyx_f16", 8],
+        "881483878813237044": ["convolution_gpu_bfyx_f16", 8],
+        "9696420455787045679": ["convolution_gpu_bfyx_f16", 8],
+        "7480696988694183789": ["convolution_gpu_bfyx_f16", 8],
+        "9560848299493464065": ["convolution_gpu_bfyx_f16", 8],
+        "4670244085889208769": ["convolution_gpu_bfyx_f16", 8],
+        "11349612635173553035": ["convolution_gpu_bfyx_f16", 8],
+        "6259794269666057674": ["convolution_gpu_bfyx_f16", 8],
+        "5786551708845072629": ["convolution_gpu_bfyx_f16", 8],
+        "16619951395310930207": ["convolution_gpu_bfyx_f16", 8],
+        "3173655881192997611": ["convolution_gpu_bfyx_f16", 8],
+        "6211510258514141464": ["convolution_gpu_bfyx_f16", 7],
+        "14941982212174570311": ["convolution_gpu_bfyx_f16", 5],
+        "11364624703533653571": ["convolution_gpu_bfyx_f16", 8],
+        "338313831905889757": ["convolution_gpu_bfyx_f16", 3],
+        "13154424438571292174": ["convolution_gpu_bfyx_f16", 5],
+        "14845639704528269654": ["convolution_gpu_bfyx_f16", 8],
+        "12200202041476611175": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14166499608250271507": ["convolution_gpu_bfyx_f16", 5],
+        "13694208494559240243": ["convolution_gpu_bfyx_f16", 8],
+        "14476260143987433871": ["convolution_gpu_bfyx_f16", 5],
+        "6145395374917324923": ["convolution_gpu_bfyx_f16", 5],
+        "2094686947151722271": ["convolution_gpu_bfyx_os_iyx_osv16", 78],
+        "11589833946098195323": ["convolution_gpu_bfyx_os_iyx_osv16", 61],
+        "11775116692122787310": ["convolution_gpu_bfyx_os_iyx_osv16", 799],
+        "570493430126610249": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "17743072683947532579": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18382443157447369363": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5689213766720451736": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11153755804932874939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13074790088623248655": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14552950763379636885": ["convolution_gpu_bfyx_f16", 8],
+        "1094600023872583173": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16827633753490728058": ["convolution_gpu_bfyx_f16", 8],
+        "6130516122331504865": ["convolution_gpu_bfyx_f16", 8],
+        "7670629548971090825": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5029322578170351026": ["convolution_gpu_bfyx_f16", 8],
+        "11682717086936489649": ["convolution_gpu_bfyx_f16", 8],
+        "9372644596618467274": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14183733053550126939": ["convolution_gpu_bfyx_f16", 8],
+        "5642981720905097704": ["convolution_gpu_bfyx_f16", 5],
+        "3924580903671169312": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17700105511171786728": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14998223809620050073": ["convolution_gpu_bfyx_f16", 5],
+        "419201770890811765": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7063350782589593425": ["convolution_gpu_bfyx_f16", 7],
+        "10687898799916833174": ["convolution_gpu_bfyx_f16", 8],
+        "5341504900604548311": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8082311845702095517": ["convolution_gpu_bfyx_f16", 6],
+        "5769891345892528049": ["convolution_gpu_bfyx_f16", 6],
+        "5034821474694053994": ["convolution_gpu_bfyx_f16", 8],
+        "2717532297792072749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "368628635269408785": ["convolution_gpu_bfyx_f16", 6],
+        "10159612784755046280": ["convolution_gpu_bfyx_f16", 8],
+        "15051374440521170869": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17031332595095892437": ["convolution_gpu_bfyx_f16", 6],
+        "6938198718430530942": ["convolution_gpu_bfyx_f16", 8],
+        "2358029178760210430": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16131007134197705525": ["convolution_gpu_bfyx_f16", 7],
+        "6612035874395100788": ["convolution_gpu_bfyx_f16", 6],
+        "15022677981959490269": ["convolution_gpu_bfyx_f16", 8],
+        "11900509609879947992": ["convolution_gpu_bfyx_f16", 3],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3968445701280656378": ["convolution_gpu_bfyx_f16", 8],
+        "7463742252314920613": ["convolution_gpu_bfyx_f16", 8],
+        "17747065651432157057": ["convolution_gpu_bfyx_f16", 8],
+        "2951437417233062866": ["convolution_gpu_bfyx_f16", 8],
+        "4695031178096669813": ["convolution_gpu_bfyx_f16", 8],
+        "13200791011072363046": ["convolution_gpu_bfyx_f16", 5],
+        "7702483443698911725": ["convolution_gpu_bfyx_f16", 5],
+        "3225276687886679210": ["convolution_gpu_bfyx_f16", 8],
+        "8406061878298060171": ["convolution_gpu_bfyx_f16", 8],
+        "11861948300376902542": ["convolution_gpu_bfyx_f16", 8],
+        "18047654118875021903": ["convolution_gpu_bfyx_f16", 8],
+        "3876838946012690078": ["convolution_gpu_bfyx_f16", 8],
+        "11532811324432477051": ["convolution_gpu_bfyx_f16", 8],
+        "16482627014547828135": ["convolution_gpu_bfyx_f16", 6],
+        "4565106422618308814": ["convolution_gpu_bfyx_f16", 8],
+        "16991433003318725315": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "16286683168753184722": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7074368169815304627": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10702490327714920783": ["convolution_gpu_bfyx_gemm_like", 2],
+        "964168479107166949": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6252510766878541979": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1012052068628903875": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15499166167392043521": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14327383763442344255": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18371627210590255356": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13185859115957551268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15052792752810689842": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17918808521142517830": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1644157325342654261": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12198018126650448419": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9714393675511550323": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4928366179227934688": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15361605271135812199": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10267714663732575502": ["convolution_gpu_bfyx_1x1", 2],
+        "9990965405769569785": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10493403039286551634": ["convolution_gpu_bfyx_1x1", 1],
+        "18324310183763016728": ["convolution_gpu_bfyx_os_iyx_osv16", 376],
+        "6002923098500991259": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "3429780644945779272": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6005067060818453503": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3676547304316346974": ["convolution_gpu_bfyx_f16", 8],
+        "8412675332215210248": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14477382651380138146": ["convolution_gpu_bfyx_f16", 8],
+        "15899888589766240554": ["convolution_gpu_bfyx_f16", 8],
+        "4529376177404929890": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7210896246223636810": ["convolution_gpu_bfyx_f16", 8],
+        "2775471071662652034": ["convolution_gpu_bfyx_f16", 8],
+        "17132456912135683375": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15563691660506818555": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9997402509928965207": ["convolution_gpu_bfyx_f16", 8],
+        "7793754164423097155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4639865771698877244": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1766192115208251594": ["convolution_gpu_bfyx_f16", 8],
+        "2015853414727933068": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10306264176864957825": ["convolution_gpu_bfyx_f16", 7],
+        "4871044181497936479": ["convolution_gpu_bfyx_f16", 7],
+        "8396548857016837452": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12714653556587252941": ["convolution_gpu_bfyx_f16", 6],
+        "1967886437456544865": ["convolution_gpu_bfyx_f16", 7],
+        "11350907923254547441": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12282274184666824734": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16866941685634953173": ["convolution_gpu_bfyx_f16", 6],
+        "6312283149621718315": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9795822066940245604": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "7256380059517365529": ["convolution_gpu_bfyx_f16", 7],
+        "11966909558503849515": ["convolution_gpu_bfyx_f16", 8],
+        "11277466712159791917": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4992371260504969141": ["convolution_gpu_bfyx_f16", 6],
+        "15043181455492553716": ["convolution_gpu_bfyx_f16", 7],
+        "8399107263382557054": ["convolution_gpu_bfyx_f16", 8],
+        "6350452055467384023": ["convolution_gpu_bfyx_f16", 8],
+        "14026570177552137240": ["convolution_gpu_bfyx_os_iyx_osv16", 595],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like", 2],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16", 341],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "4917807560042671575": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "44341776758472069": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "3584869801682702110": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "13032463401326344281": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "12074020528214820344": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10792368710075698135": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14773903272136532468": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "4459329337183571568": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "17247158622529817069": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "15248304664655540462": ["convolution_gpu_bfyx_os_iyx_osv16", 261],
+        "8737603244374483727": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6375630142791083064": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16951442326148701883": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "8824140014793073324": ["convolution_gpu_bfyx_os_iyx_osv16", 1014],
+        "6420666457275061685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18191060893922845906": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4914314319075651246": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2995522243104361971": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12727854191946007642": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "3260693384502698965": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8599674766060889778": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8021852643758937492": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "2492924011838985637": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "4309855944835724499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14741878965259218163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180612484034524170": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13300287078635373813": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13378751364754764186": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6449257695177020930": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "17627392788011440461": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "13831493475156855535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16483429728914404238": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "3860080842190932938": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12349486511618981663": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15798538366019336375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17142061595610833587": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "413520381980740601": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "15678637644328155655": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "6526747512277607691": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16117940336643166742": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5991582579063082343": ["convolution_gpu_bfyx_os_iyx_osv16", 1037],
+        "3294597200237228703": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "16191151963860109032": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4092109744625924274": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4849563739505810631": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3411824370004173602": ["convolution_gpu_bfyx_os_iyx_osv16", 1037],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "15344685054531225492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14837032904820198149": ["convolution_gpu_bfyx_f16", 8],
+        "14191080790860851837": ["convolution_gpu_bfyx_f16", 8],
+        "17023834849779428858": ["convolution_gpu_bfyx_f16", 8],
+        "3329139872094988661": ["convolution_gpu_bfyx_f16", 8],
+        "4450424283454693457": ["convolution_gpu_bfyx_f16", 4],
+        "6264730897461114496": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16058636937964624617": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "499215221217528434": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14655897748934541342": ["convolution_gpu_bfyx_f16", 8],
+        "15662090780385020537": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7311728100823416883": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7221666363928264914": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "949611108582310305": ["convolution_gpu_bfyx_f16", 5],
+        "398119457330194405": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18306921825426259074": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14045661362966364917": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11211712695622132026": ["convolution_gpu_bfyx_f16", 8],
+        "13777550841624006577": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4765385132115618850": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16898905631497333152": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2688905295933725456": ["convolution_gpu_bfyx_f16", 7],
+        "10325568251605243952": ["convolution_gpu_bfyx_f16", 8],
+        "4697609485293892109": ["convolution_gpu_bfyx_f16", 8],
+        "15761571492230997960": ["convolution_gpu_bfyx_f16", 8],
+        "10403493618856101043": ["convolution_gpu_bfyx_f16", 7],
+        "15694677292906293678": ["convolution_gpu_bfyx_f16", 8],
+        "11385013883660304429": ["convolution_gpu_bfyx_f16", 8],
+        "8155797389244290087": ["convolution_gpu_bfyx_f16", 1],
+        "16706121580364790904": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16", 717],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16", 200],
+        "4305463771822108179": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "2079318718874681198": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "17439941375453858836": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12467583825605788345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9058857190661793339": ["fused_conv_eltwise_gpu_ref", 2],
+        "11620974866622716017": ["fused_conv_eltwise_gpu_ref", 0],
+        "8857009061371774666": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5756084360647965669": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3975438095352877013": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3345987020362642539": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16755500582498207386": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1636861132129961823": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9793091808041097161": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2080318501154291605": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "13813582937323882369": ["fully_connected_gpu_bf_io_ref", 1],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16", 1096],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16", 368],
+        "2877876834438717783": ["convolution_gpu_bfyx_os_iyx_osv16", 949],
+        "9156649014297448284": ["convolution_gpu_bfyx_os_iyx_osv16", 674],
+        "13660470643303663441": ["convolution_gpu_bfyx_os_iyx_osv16", 158],
+        "8081997809574506331": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "8199400320947837516": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "11460891889180307970": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5643924526605879168": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14198463555297179999": ["convolution_gpu_bfyx_f16", 8],
+        "9820219997540294747": ["convolution_gpu_bfyx_os_iyx_osv16", 312],
+        "16598220433310484103": ["convolution_gpu_bfyx_f16", 8],
+        "13332579082252874358": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10148956417804060854": ["convolution_gpu_bfyx_f16", 8],
+        "16052199780545784176": ["convolution_gpu_bfyx_f16", 8],
+        "17284989371701058847": ["convolution_gpu_bfyx_os_iyx_osv16", 1097],
+        "18186300610687882698": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17323620992879479455": ["convolution_gpu_bfyx_f16", 8],
+        "10782643446733040985": ["convolution_gpu_bfyx_f16", 8],
+        "3080843366919845836": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16898206352994894714": ["convolution_gpu_bfyx_f16", 8],
+        "17502393571772755646": ["convolution_gpu_bfyx_os_iyx_osv16", 91],
+        "12982233543299343225": ["convolution_gpu_bfyx_os_iyx_osv16", 437],
+        "5609871805820255743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "7971259885907841252": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15956442448148612253": ["convolution_gpu_bfyx_os_iyx_osv16", 313],
+        "7600980811977404651": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "6051578359778554994": ["convolution_gpu_bfyx_os_iyx_osv16", 373],
+        "14591236937522474591": ["convolution_gpu_bfyx_os_iyx_osv16", 1121],
+        "380671738106280681": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "16581313033870107357": ["convolution_gpu_bfyx_os_iyx_osv16", 691],
+        "15132868076468531540": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "4964421818619633295": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "5206589624074157418": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "15804259593852912096": ["convolution_gpu_bfyx_os_iyx_osv16", 1055],
+        "9667577643691138471": ["convolution_gpu_bfyx_f16", 8],
+        "9587296295017154035": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "624896425985287215": ["convolution_gpu_bfyx_f16", 8],
+        "13698491289625410930": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "5501294609610168354": ["convolution_gpu_bfyx_f16", 8],
+        "14684726385174603824": ["convolution_gpu_bfyx_f16", 8],
+        "3538746967389669479": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "5442728869442056950": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "17446903112723559991": ["convolution_gpu_bfyx_f16", 8],
+        "17314761693722740561": ["convolution_gpu_bfyx_f16", 8],
+        "12956681231908531328": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "17836528995874415642": ["convolution_gpu_bfyx_f16", 8],
+        "8896717627818724430": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "14716947061630316041": ["convolution_gpu_bfyx_os_iyx_osv16", 813],
+        "9735141117399046903": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18366465884925728820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17388129439366166721": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "13724220569112734431": ["convolution_gpu_bfyx_os_iyx_osv16", 972],
+        "5529587475911632254": ["convolution_gpu_bfyx_os_iyx_osv16", 254],
+        "11660089067798953391": ["convolution_gpu_bfyx_os_iyx_osv16", 627],
+        "5181665423821543629": ["convolution_gpu_bfyx_os_iyx_osv16", 251],
+        "8048807352445331657": ["convolution_gpu_bfyx_os_iyx_osv16", 688],
+        "3470485673426524224": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "3135008557801015427": ["convolution_gpu_bfyx_os_iyx_osv16", 1008],
+        "14966985685297154154": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2035874178080637954": ["convolution_gpu_bfyx_os_iyx_osv16", 312],
+        "5013120291092844103": ["convolution_gpu_bfyx_os_iyx_osv16", 745],
+        "4460181251394130653": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "5117453858905614531": ["convolution_gpu_bfyx_os_iyx_osv16", 1121],
+        "8461950668910238851": ["convolution_gpu_bfyx_os_iyx_osv16", 368],
+        "1805006234516270784": ["convolution_gpu_bfyx_os_iyx_osv16", 316],
+        "2718931301666622839": ["convolution_gpu_bfyx_os_iyx_osv16", 688],
+        "7124614724653589875": ["convolution_gpu_bfyx_os_iyx_osv16", 725],
+        "7805147511722673361": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "18231162877100499337": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1081152612562015774": ["convolution_gpu_bfyx_os_iyx_osv16", 641],
+        "14118692364036816874": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "2771511633327598307": ["convolution_gpu_bfyx_os_iyx_osv16", 221],
+        "2043520288487456245": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "10128395594093504455": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "1986294224967713231": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "8596104233487286880": ["convolution_gpu_bfyx_os_iyx_osv16", 688],
+        "14619067706344498943": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "16247177074403714471": ["convolution_gpu_bfyx_os_iyx_osv16", 646],
+        "17302460560764241489": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "11296929673638920561": ["convolution_gpu_bfyx_os_iyx_osv16", 270],
+        "2856387545805299627": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "9602711901243573665": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1389991763672509207": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7273251266921979515": ["convolution_gpu_bfyx_os_iyx_osv16", 116],
+        "7271698086258726731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11671327445697582898": ["convolution_gpu_bfyx_gemm_like", 0],
+        "17570554483516474486": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3950135144885165819": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16452498081261682201": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15356297740028337585": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8878636242180690359": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12899244091844068967": ["convolution_gpu_bfyx_gemm_like", 1],
+        "18197774991654792135": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11881486982136101383": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5087105232357685910": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6362183633269684086": ["convolution_gpu_bfyx_gemm_like", 2],
+        "665553611665131720": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15800366255097765592": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3441148927037088426": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10113814865022625794": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9290161943539060420": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6575054771337880905": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6930297774765427265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14687805371465731129": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17348903837738033737": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5058042344671975771": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4296792831323727718": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16569718635085620248": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6479800863775629782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8225524724938376205": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4610533059559454932": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10523639436634369983": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17500026797620139898": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9352837842671844352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15431710492660944867": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4094966339608175937": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10293540888522108040": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2178813930852805198": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4172720860698260594": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11035900209971591093": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16347989689011736788": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18426670112574344316": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9075740348545764459": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16177541412848844107": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7413356361797538770": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11878518514118760052": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1483436564981355857": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2583631235760101021": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10826337022193127499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "883141931001824331": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18286338108393131357": ["convolution_gpu_bfyx_gemm_like", 2],
+        "513328329651240169": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2866563084547740589": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6618382574307554008": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1564076582163500801": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2980118259786021998": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14006008710769892285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11047701363022632258": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17824545902528351132": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13411717706564225997": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2553988022244380074": ["convolution_gpu_bfyx_os_iyx_osv16", 468],
+        "2007784578504956396": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2128232248278266618": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180218859472587238": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17950127156676619192": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11734174131078900953": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17769805455612014213": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "354985206063783019": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12008052381997574625": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14579060801049956629": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2475732477128179942": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7425269551190332752": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "2523330181210520033": ["convolution_gpu_bfyx_os_iyx_osv16", 60],
+        "9252516395349163399": ["fused_conv_eltwise_gpu_ref", 2],
+        "3025740595729338904": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9221796417553554577": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "142182287837081331": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16817115615539634498": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "846549121454126986": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "1865437550609663375": ["fully_connected_gpu_bf_io_gemm", 2],
+        "14491983419826529399": ["convolution_gpu_bfyx_os_iyx_osv16", 79],
+        "11866343372130060111": ["convolution_gpu_bfyx_os_iyx_osv16", 61],
+        "3750595711145201146": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "555112033233919049": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "9449916193007510499": ["fully_connected_gpu_bf_io_gemm", 2],
+        "821153009898835283": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10053897550646291639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "166522152877705111": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8194080531314571831": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "8462596687449136841": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16641148739441654579": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3012332306785177280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1667559253581127345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17950962563816983793": ["convolution_gpu_bfyx_os_iyx_osv16", 465],
+        "15920581282829793263": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "4931844549089354374": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11227326613484178737": ["convolution_gpu_bfyx_os_iyx_osv16", 470],
+        "8926339988827333993": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14947161471102583853": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7959005479751426244": ["convolution_gpu_bfyx_os_iyx_osv16", 926],
+        "13876295120508241721": ["convolution_gpu_bfyx_os_iyx_osv16", 84],
+        "5450799298000231966": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "745049678230480319": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17799305583546345514": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "15448134419455024563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10247046915015701375": ["convolution_gpu_bfyx_os_iyx_osv16", 471],
+        "818326236814735107": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "11621993279519931789": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10879300979808656559": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "6931984251726006059": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "6196230740484518313": ["convolution_gpu_bfyx_os_iyx_osv16", 737],
+        "9158058375618670219": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "11236152897678664523": ["convolution_gpu_bfyx_os_iyx_osv16", 227],
+        "3406694758050234432": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "10974039527048973006": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15899184198611288897": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1113],
+        "5208730096669264907": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "12427052259883778985": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11537945670773619430": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "3449889481023274859": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "14190614451726695163": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "12928525615597254487": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9516426687291882678": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17048242738976449237": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 366],
+        "9268483331991252048": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "6122901745362984256": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "5485405121200417034": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11959778533528884090": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "10066541947363706408": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12190338269093090393": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18114410819861988056": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9399757365169066601": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18002225531765237416": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "493140137361754334": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1260161648603954768": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5667262301262234990": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "9589013771119948673": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12893936099497050507": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "5453417400746204459": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "16783136590567851390": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11157538002790552612": ["convolution_gpu_bfyx_os_iyx_osv16", 359],
+        "232009389683898587": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11216071562773188709": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "12325371158799601152": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11203921208856246202": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "7748329451001058910": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4699825578606954745": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9148351131305560328": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5864010731331844548": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8354231196544866003": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3293708605626705859": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3238880006626116922": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6138876053139091484": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12144683985655531326": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "251191902439549345": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3094287673106030943": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15664461533342111743": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12991662142109741177": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "1919860812260988485": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12975178408849254081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "560685047966004607": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "1411165869695864657": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6971410560932215974": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17372785589054562125": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1164314361485656318": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9516217840174015532": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12868299597160484729": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16492694273514080106": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "14567423858977789672": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "1069440014730910857": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "4511752002127622518": ["convolution_gpu_bfyx_gemm_like", 2],
+        "670011076817691046": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "10000917296337062736": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "12236539205690542952": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "12066036542483319741": ["convolution_gpu_bfyx_os_iyx_osv16", 365],
+        "16551989359219084137": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14375560443851968119": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "4925269744341463388": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "7404267750384901384": ["convolution_gpu_bfyx_os_iyx_osv16", 487],
+        "1136134476921992394": ["convolution_gpu_bfyx_os_iyx_osv16", 284],
+        "12274460319290366021": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "1936271684402780579": ["convolution_gpu_bfyx_os_iyx_osv16", 164],
+        "7797907475238799442": ["fused_conv_eltwise_gpu_ref", 2],
+        "107092103514596960": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8153466715673110154": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13707460333812965439": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13482095577300687063": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12240700165957110598": ["convolution_gpu_bfyx_os_iyx_osv16", 222],
+        "13922184309028933319": ["convolution_gpu_bfyx_os_iyx_osv16", 62],
+        "4503204697730374875": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "15444198622559010805": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "3399502934446395571": ["fully_connected_gpu_bf_io_gemm", 2],
+        "13954223602112460287": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10986360375271263743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "1716892750352083242": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "9725379584761388986": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "8812448421277455303": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10082079116080072102": ["convolution_gpu_bfyx_gemm_like", 2],
+        "399551887429980535": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4993763244005264691": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16584618141013506079": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2812521679999989071": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13008426794683170889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12575702775019311249": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7546191118828069537": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16065515254801458590": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3425550832073889758": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12158565214239239362": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4674402155077047884": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17292794084187069459": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "2786512217326082861": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14350551992529551543": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "2393708926889890184": ["convolution_gpu_bfyx_os_iyx_osv16", 246],
+        "14469325606711534393": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16745817877996028596": ["convolution_gpu_bfyx_os_iyx_osv16", 462],
+        "5009730037803270259": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12990454601941366626": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "16417111816169006680": ["convolution_gpu_bfyx_os_iyx_osv16", 803],
+        "133571575038273240": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14248134542225645633": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9431127887153901797": ["convolution_gpu_bfyx_f16", 8],
+        "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "11176513032851549145": ["convolution_gpu_bfyx_f16", 6],
+        "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13523379689227815262": ["convolution_gpu_bfyx_f16", 5],
+        "6721716607254493168": ["convolution_gpu_bfyx_f16", 5],
+        "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11234282887624973651": ["convolution_gpu_bfyx_f16", 8],
+        "13831173402079080202": ["convolution_gpu_bfyx_f16", 4],
+        "8326492500469570449": ["convolution_gpu_bfyx_f16", 8],
+        "3264529476730576869": ["convolution_gpu_bfyx_f16", 4],
+        "7297268657172014757": ["convolution_gpu_bfyx_f16", 2],
+        "1625066159015188551": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6691529969484361871": ["convolution_gpu_bfyx_f16", 2],
+        "15713770358690264680": ["convolution_gpu_bfyx_f16", 7],
+        "16321675691643798095": ["convolution_gpu_bfyx_f16", 2],
+        "11669126976746433467": ["convolution_gpu_bfyx_f16", 4],
+        "343301842058050721": ["convolution_gpu_bfyx_f16", 2],
+        "2752323179285263511": ["convolution_gpu_bfyx_f16", 2],
+        "2102366789632970362": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1306385926849681711": ["convolution_gpu_bfyx_f16", 2],
+        "126985649265174875": ["convolution_gpu_bfyx_f16", 2],
+        "1398008210451653662": ["convolution_gpu_bfyx_f16", 2],
+        "18349997465728341610": ["convolution_gpu_bfyx_f16", 2],
+        "6014604866075552044": ["convolution_gpu_bfyx_f16", 2],
+        "10704627126748844083": ["convolution_gpu_bfyx_f16", 1],
+        "2290627489333161117": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9281553420666514549": ["convolution_gpu_bfyx_f16", 2],
+        "5307698759365425674": ["convolution_gpu_bfyx_f16", 8],
+        "16618476158797450107": ["convolution_gpu_bfyx_f16", 2],
+        "6448987340419115272": ["convolution_gpu_bfyx_f16", 2],
+        "7649625315489330023": ["convolution_gpu_bfyx_f16", 2],
+        "1407861661939721927": ["convolution_gpu_bfyx_f16", 5],
+        "7240814723112731361": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6941932686830863618": ["convolution_gpu_bfyx_f16", 8],
+        "2035160132949629453": ["convolution_gpu_bfyx_f16", 8],
+        "17827286460954881640": ["convolution_gpu_bfyx_f16", 3],
+        "6051363798671277490": ["convolution_gpu_bfyx_f16", 8],
+        "7990676476696328795": ["convolution_gpu_bfyx_f16", 2],
+        "9594336645019216285": ["convolution_gpu_bfyx_f16", 8],
+        "2826412019603377751": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "10171778444869246611": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "13742305118572588823": ["convolution_gpu_bfyx_f16", 8],
+        "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14821402568274932830": ["binary_convolution_gpu_1x1", 0],
+        "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10657672650587258853": ["convolution_gpu_bfyx_f16", 5],
+        "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13626797216057420236": ["convolution_gpu_bfyx_f16", 8],
+        "2506095387855338923": ["convolution_gpu_bfyx_f16", 3],
+        "562790620732503479": ["convolution_gpu_bfyx_f16", 8],
+        "1176788949160939554": ["convolution_gpu_bfyx_f16", 3],
+        "11395171679618279746": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9681660158274412796": ["convolution_gpu_bfyx_f16", 7],
+        "17157976605793655955": ["convolution_gpu_bfyx_f16", 1],
+        "13706914146179028206": ["convolution_gpu_bfyx_f16", 1],
+        "1586631406027561282": ["convolution_gpu_bfyx_f16", 2],
+        "9177089521763332472": ["convolution_gpu_bfyx_f16", 2],
+        "7623827168813087262": ["convolution_gpu_bfyx_f16", 2],
+        "2251294131085073114": ["convolution_gpu_bfyx_f16", 6],
+        "11257985397820322504": ["convolution_gpu_bfyx_f16", 5],
+        "3873298083628570562": ["convolution_gpu_bfyx_f16", 4],
+        "15662803497226104305": ["convolution_gpu_bfyx_f16", 5],
+        "8980088396308495358": ["convolution_gpu_bfyx_f16", 2],
+        "8049787711095084959": ["convolution_gpu_bfyx_os_iyx_osv16", 109],
+        "8361191677655973935": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "6455326407035817823": ["convolution_gpu_bfyx_os_iyx_osv16", 89],
+        "4549875381866576113": ["convolution_gpu_bfyx_os_iyx_osv16", 285],
+        "14780479128645572595": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 352],
+        "9221666339438514459": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "17091218700152862273": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9951123692498529061": ["convolution_gpu_bfyx_os_iyx_osv16", 286],
+        "15226633731441516361": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "4453349487216529991": ["convolution_gpu_bfyx_os_iyx_osv16", 166],
+        "17929115705990268026": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6621532750524834097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1104],
+        "16562571407098459049": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "2873284221161386597": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3769897639705493224": ["convolution_gpu_bfyx_os_iyx_osv16", 925],
+        "5447803100312758964": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 959],
+        "9163238347824560017": ["convolution_gpu_bfyx_os_iyx_osv16", 160],
+        "1688979903294911182": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9338092674592431198": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "15522545626077485199": ["convolution_gpu_bfyx_os_iyx_osv16", 537],
+        "1797489112792772811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5478531388148194783": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "3289369122755371980": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 207],
+        "14572382016053496602": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "16841168676076935693": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18407347961782182453": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 835],
+        "8695092335925023399": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "14168685794682021826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12423218459706339590": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8734189831526420226": ["convolution_gpu_bfyx_os_iyx_osv16", 182],
+        "14362182205968229036": ["convolution_gpu_bfyx_os_iyx_osv16", 621],
+        "13157476677873103938": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "4583484812233029888": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8595156989254845134": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14493123117003003092": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "12372261924257291610": ["convolution_gpu_bfyx_os_iyx_osv16", 616],
+        "1547771611689525848": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15727110405754725012": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10890620280807224744": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "16079792265815446547": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15384055407657760803": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2464531851392092325": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "5613964218561759893": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11460648773146310189": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6593870431636005244": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11529036254499853035": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2726453304845436156": ["convolution_gpu_bfyx_os_iyx_osv16", 498],
+        "2607416795507802412": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "17010201596936918243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "8480598154536665021": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17881013712456488163": ["convolution_gpu_bfyx_os_iyx_osv16", 831],
+        "9336215801757107337": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "8174421295799601683": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "1967655354607438665": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "4972222030950072866": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "18113157997465675692": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1472667774257971884": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "7480855342650290772": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "17244746622354078542": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "251775001146378096": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "14235558866846276172": ["convolution_gpu_bfyx_os_iyx_osv16", 201],
+        "18066867692765966577": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "1264200731459756446": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "6968087469917482002": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "1607381610581485984": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "17234843749633035510": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "11516168882438876247": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "8919164618663601566": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "16853448010512574338": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3010644722195354051": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "17062011653598617580": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "4614875083188849196": ["convolution_gpu_bfyx_os_iyx_osv16", 834],
+        "10859023312681572942": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "1377210419756613502": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17391465283540972493": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1514213112647467874": ["convolution_gpu_bfyx_os_iyx_osv16", 844],
+        "17268633106022870055": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "8140122945471321201": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "15079423575410353790": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "13787398748724798340": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "9739119866883611322": ["convolution_gpu_bfyx_os_iyx_osv16", 470],
+        "7151167803631697120": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "16532386511585070092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910582540370962997": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12335148041391647118": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "10689880083512104726": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "8870164706606458004": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "9269498023794081940": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "6779832349039897240": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "13942354789498444722": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "14294764660016835141": ["convolution_gpu_bfyx_os_iyx_osv16", 114],
+        "12323510278692809329": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "5728070995112243570": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5381496395266530071": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9712640406795417230": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "15036737419347383878": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "11552594222313787816": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "9399255910184037480": ["convolution_gpu_bfyx_os_iyx_osv16", 498],
+        "10594581016504135920": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "15640487942881889055": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "14165417928501578590": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "12251989236991754721": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "6675363512560434713": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "9831713940431605743": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "6531349504807709133": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "2726501303929773572": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "10439704858943788014": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "18137994263450376706": ["convolution_gpu_bfyx_os_iyx_osv16", 810],
+        "5711991739289045727": ["convolution_gpu_bfyx_os_iyx_osv16", 715],
+        "15255831401757117660": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "3906658058160172747": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "15823433297099049221": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "7829483638597533960": ["convolution_gpu_bfyx_os_iyx_osv16", 203],
+        "14092273913846393837": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "3746578485711843646": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "12228183555926126959": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "8776893332387904786": ["convolution_gpu_bfyx_os_iyx_osv16", 810],
+        "16672299044236704672": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "13309889945947393850": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "15966815420067673043": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "7415938485228396256": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "9655590024687998403": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "14798289196964890724": ["convolution_gpu_bfyx_os_iyx_osv16", 72],
+        "9794684437872784678": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "16729204245488754836": ["convolution_gpu_bfyx_os_iyx_osv16", 83],
+        "15185983488152870534": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "13821372148587948765": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "4727004015814244856": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "1738348894912205653": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "559491455289877068": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "17312172687490475177": ["convolution_gpu_bfyx_os_iyx_osv16", 460],
+        "3470176432841342662": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "8950283515337670839": ["convolution_gpu_bfyx_os_iyx_osv16", 965],
+        "3995072673238444396": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "1238913228370790536": ["convolution_gpu_bfyx_os_iyx_osv16", 844],
+        "928677976151553489": ["convolution_gpu_bfyx_os_iyx_osv16", 952],
+        "4059887681292863495": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "5665180797552893949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7180904384828396567": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17041465029020839746": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8648502659728489503": ["convolution_gpu_bfyx_os_iyx_osv16", 220],
+        "2007359338465363037": ["convolution_gpu_bfyx_os_iyx_osv16", 191],
+        "16300204511212928772": ["convolution_gpu_bfyx_os_iyx_osv16", 606],
+        "10636266218009746496": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "17502734572225953539": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "9266211532252099402": ["fully_connected_gpu_fb_oi_ref", 1],
+        "6763848192987176713": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6123737429963241103": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "10102406370623883494": ["convolution_gpu_bfyx_os_iyx_osv16", 109],
+        "16125206369312086947": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "16927483709629289661": ["convolution_gpu_bfyx_os_iyx_osv16", 862],
+        "3196823812655863240": ["convolution_gpu_bfyx_os_iyx_osv16", 940],
+        "7968691295772769464": ["convolution_gpu_bfyx_os_iyx_osv16", 588],
+        "6100031133333761315": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "4055514200737135942": ["fully_connected_gpu_fb_oi_ref", 2],
+        "8508119169246513026": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14616145871710456304": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5168719682914827724": ["convolution_gpu_bfyx_os_iyx_osv16", 118],
+        "9473263513191498949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13461678175466315866": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5434387853485184980": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1580848418974169308": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6784038318046980185": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6248879028648699716": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "1436424324238684653": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13189391944650202330": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1199836165181399413": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5850612837647497531": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14740129361300854586": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5500102903434438965": ["convolution_gpu_bfyx_os_iyx_osv16", 317],
+        "7297288884568452370": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5136459381906620211": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17411381157694639837": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8553537608760917592": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12734736056404146766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "706526643700857104": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14937087468947592213": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10242452169628899571": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16629319403227634487": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3072344987020666532": ["convolution_gpu_bfyx_os_iyx_osv16", 552],
+        "5932710369376133446": ["convolution_gpu_bfyx_os_iyx_osv16", 176],
+        "15493383292734604744": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5089311900051393846": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8721087995946196075": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14614506535270942373": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1289727743091243002": ["convolution_gpu_bfyx_os_iyx_osv16", 662],
+        "18141581865855554514": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16956102699411887521": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11526253915485637934": ["convolution_gpu_bfyx_os_iyx_osv16", 632],
+        "15696872908795836832": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15332512198621601617": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5702206454207934253": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "15414564531144316178": ["convolution_gpu_bfyx_gemm_like", 2],
+        "386448290084824203": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15390537225231495870": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10038180349007230302": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 733],
+        "6817180081986948843": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1527649565538821618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7004336584711849988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2157468701794819044": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1111],
+        "15920115680945815097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 662],
+        "17778554668592635168": ["convolution_gpu_bfyx_os_iyx_osv16", 95],
+        "6999571050665340986": ["convolution_gpu_bfyx_os_iyx_osv16", 844],
+        "9879436330613366129": ["convolution_gpu_bfyx_gemm_like", 2],
+        "726019095679197164": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7606282654661282476": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6201358671959761215": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4829111442270007186": ["convolution_gpu_bfyx_os_iyx_osv16", 722],
+        "7267651931396380072": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "1279682391530947146": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2655979063469551930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14425547983540742516": ["convolution_gpu_bfyx_gemm_like", 2],
+        "981419593633555198": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12324657364444167791": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3246153532847702583": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4202705710324555180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12272318018055307535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "396815044270978782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15633173680908856082": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16635731992372618666": ["convolution_gpu_bfyx_os_iyx_osv16", 725],
+        "10418466892824851134": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "3244777852750357718": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "2443758478383854939": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "13503934436248311972": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "2594310972560076285": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2424349375092546581": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "7104985983444651979": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "13518747015059826801": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "11675809062974151496": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "4725349695436675084": ["convolution_gpu_bfyx_os_iyx_osv16", 748],
+        "17351243519367619322": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17026338651868178077": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "8730407034445893642": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "144434691308306757": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "4114184149613179671": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "2558882920723584206": ["convolution_gpu_bfyx_os_iyx_osv16", 746],
+        "16481414687792927331": ["convolution_gpu_bfyx_os_iyx_osv16", 737],
+        "17756651805686889890": ["convolution_gpu_bfyx_os_iyx_osv16", 1115],
+        "2228533392085335649": ["convolution_gpu_bfyx_os_iyx_osv16", 751],
+        "9038567144062573854": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1345293381483212104": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "729683192738752814": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "458997435535883643": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "16955907389221472146": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "17927673764274384911": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "6418222853479731432": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7539191242110313918": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "18014188548165359278": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "16640379332042800496": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "14856197725306980283": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "9279474331309267880": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "5717588912072437191": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1143426643765799488": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "1049385516019456025": ["convolution_gpu_bfyx_os_iyx_osv16", 606],
+        "10766144770072425534": ["convolution_gpu_bfyx_os_iyx_osv16", 966],
+        "6442062011017461761": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "6063490496423709036": ["convolution_gpu_bfyx_os_iyx_osv16", 605],
+        "3892512749863226006": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4970240836537468609": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 1],
+        "2017817372328795772": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18312913026696855515": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1323873987880062206": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "7947635298491683844": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "3828289925836476678": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10112041311060264798": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7966725359592006848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2213697863012348994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5200128826708487987": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910238486908592807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13616909429370698140": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5170073622279980223": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7110283028091835342": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16035239784731081694": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8190708817382075098": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14088072670684726938": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "4594156436010043898": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "11599404585487705575": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "12238796233133147488": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "16062641979970268785": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "17970835612618431265": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "2793976170555467399": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "5268998395189523109": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "10247076603819003292": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "10411646581372174184": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "3783590807023839590": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "13040613656895011417": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "3426085674061936062": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "18191480673111859449": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "3168817659922190247": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "18315877695535348266": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "12547634427503359071": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "16329007163840646462": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "10029877845127663589": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "2314415797696124986": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "16980380685273501504": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "3178865432099367094": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "14025615946937229331": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "9213611800089847066": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "16929122365386190391": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "2135878993442720196": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "9676824536524126662": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "920276615573431782": ["convolution_gpu_bfyx_os_iyx_osv16", 966],
+        "14160730014298968824": ["convolution_gpu_bfyx_os_iyx_osv16", 208],
+        "17736530310730065811": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "2980714886349866400": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16634588113528268855": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "11974061312537998708": ["convolution_gpu_bfyx_os_iyx_osv16", 200],
+        "16035580169248458433": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "9866780121729912726": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "9774829335571618473": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12220806137793480020": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18351615003377381150": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "5523604552813225273": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "7679309022130741323": ["fully_connected_gpu_fb_io_b8_f8_vload", 0],
+        "5318931986270088360": ["convolution_gpu_bfyx_gemm_like", 0],
+        "515117191459385744": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8719869282082754142": ["convolution_gpu_bfyx_os_iyx_osv16", 868],
+        "7982863980065943223": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11226945962148431484": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4241838582334505669": ["convolution_gpu_bfyx_gemm_like", 2],
+        "377042666741080260": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18145274589954906463": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6999860230736815298": ["convolution_gpu_bfyx_os_iyx_osv16", 375],
+        "16857606646270000245": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12338108420996610172": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10159450328554854004": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9170293267334520501": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "566685987437510322": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3194003345823695583": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "12107562407862382766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7161737091607459281": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9553813691004246971": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10335630215626781232": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660045223846569448": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14844074799300904420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5366152766029340057": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "8299878919282539563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18146920703695658789": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "9019625678983697946": ["convolution_gpu_bfyx_os_iyx_osv16", 317],
+        "10578264750808095350": ["convolution_gpu_bfyx_os_iyx_osv16", 317],
+        "17553228602707603911": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "11544029240137241864": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "3625681568469091400": ["convolution_gpu_bfyx_os_iyx_osv16", 691],
+        "8849298369373186729": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "10796031718453810929": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9053983956770697828": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "1865317677339946921": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12018933315566840474": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "6446557539680352152": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1642704598828904520": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "8319779172385327650": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "11579387987720364831": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12754351323109225715": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1114],
+        "7903220569487431556": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "3905190080706902824": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "8296759260312471619": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17301520533084822859": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1116],
+        "14740238736074743734": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "11837023395630571569": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18200031323963616161": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1119],
+        "4125453719396313232": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3653945386031463537": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "290357754290893078": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3852245179144851596": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "13731852935536160843": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8777588932609025138": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1316118918790851994": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "11178580933542373407": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 365],
+        "17878041282431477247": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18049861144026923516": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2141454343831534876": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9144400494257163130": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "13190119938630028553": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4903536862079845135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15066104804156933222": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13457620264718125011": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "16436525035845780373": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "11501452337228727462": ["convolution_gpu_bfyx_os_iyx_osv16", 199],
+        "14843223893923209210": ["convolution_gpu_bfyx_os_iyx_osv16", 991],
+        "3403065541792865347": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5747468958285466504": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17552192746313035704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4855884888715402777": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6932556634380539441": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9400396209180747044": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "16124622994105864663": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "10431774409348875623": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "9495099584417616887": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "9115704215611322151": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11735107098356940998": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15204384674852423405": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16866113149488400688": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15389774302738715375": ["convolution_gpu_bfyx_os_iyx_osv16", 264],
+        "8101177730804364242": ["convolution_gpu_bfyx_os_iyx_osv16", 640],
+        "10149791427786334512": ["convolution_gpu_bfyx_os_iyx_osv16", 264],
+        "11053198857132396443": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "3963577328998759824": ["fully_connected_gpu_fb_oi_ref", 1],
+        "800184023925596362": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13839532421033004873": ["convolution_gpu_bfyx_os_iyx_osv16", 942],
+        "8262487256974801864": ["convolution_gpu_bfyx_os_iyx_osv16", 938],
+        "3693217331248996607": ["convolution_gpu_bfyx_os_iyx_osv16", 942],
+        "10388555096612441710": ["convolution_gpu_bfyx_os_iyx_osv16", 191],
+        "8892698757722619628": ["convolution_gpu_bfyx_os_iyx_osv16", 940],
+        "9606108204575763003": ["convolution_gpu_bfyx_os_iyx_osv16", 619],
+        "8449999818915991236": ["fully_connected_gpu_fb_io_ref", 2],
+        "6954046921635466236": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12133573113666871990": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18286924901612269315": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16168987643236739114": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17573344121250212662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8792004303945144557": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6055054188657886157": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16692293796070898202": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18377591093081814522": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7171735046681228890": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2461164836823254208": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14430129165479757357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14698972830975282413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3479216436904445131": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5269956004669551826": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13594976208424418204": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12373590460058087695": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "4405236452109167503": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14132900527730577142": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1349033639465657142": ["convolution_gpu_bfyx_gemm_like", 2],
+        "812985719328060901": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "12407276986845062239": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9170373506597510005": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1389904024718949479": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "7933040116770016066": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1919536721555752974": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10686800639842865597": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8687217977804450176": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3954066703109036822": ["convolution_gpu_bfyx_gemm_like", 2],
+        "723914723460931977": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11198516910049713685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1635320120115967164": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15344790681368521678": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12844169781725567332": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17741034184665639196": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15923530138304858829": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10444674910548414627": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10302498589531075361": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4924266705550545296": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18358817826057771246": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5814292023792160102": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11190351855453911732": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9686754964115262880": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10699818671891976144": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11629568560686145289": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2754112975365662883": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14572211541644991947": ["convolution_gpu_bfyx_os_iyx_osv16", 417],
+        "15460159349027866277": ["convolution_gpu_bfyx_os_iyx_osv16", 456],
+        "11509503516680870396": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3553844546517243430": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "11739050017164389431": ["convolution_gpu_bfyx_os_iyx_osv16", 466],
+        "14683616789766294266": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1178443422000627700": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 363],
+        "3959894501921049830": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "6268257722565030993": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8104007721367839894": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11004242349744689661": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "18331651243656907622": ["convolution_gpu_bfyx_os_iyx_osv16", 217],
+        "165832937834890614": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13820132527548818114": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "11494973886338256684": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "9562717353252171645": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15182874743616431755": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11923231799522030843": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "7212742683076043022": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "1535659774314187616": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9077124630226762093": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "10707439442194349922": ["convolution_gpu_bfyx_os_iyx_osv16", 974],
+        "13670707208998927662": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11898738546265963886": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7218310781442328740": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 221],
+        "17307988793370069255": ["convolution_gpu_bfyx_os_iyx_osv16", 201],
+        "3159313229944494871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2202381460552007272": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "4539543204582046751": ["convolution_gpu_bfyx_os_iyx_osv16", 201],
+        "2922645767583925625": ["convolution_gpu_bfyx_os_iyx_osv16", 42],
+        "1933120851078072002": ["convolution_gpu_bfyx_os_iyx_osv16", 737],
+        "15544724104656453486": ["convolution_gpu_bfyx_os_iyx_osv16", 738],
+        "9953946296788154289": ["convolution_gpu_bfyx_os_iyx_osv16", 113],
+        "5949275355217152112": ["convolution_gpu_bfyx_os_iyx_osv16", 220],
+        "9953648472305845286": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 741],
+        "4585615709600143734": ["convolution_gpu_bfyx_os_iyx_osv16", 970],
+        "5688607327240251933": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17872945111265083716": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "7002575346587056029": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 360],
+        "4053858347143322566": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "15684381282886192452": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9172655573618628060": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 742],
+        "10794126133490266436": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13850228162972171575": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "129286539782466549": ["convolution_gpu_bfyx_gemm_like", 2],
+        "405864173902226347": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "11446357246069900060": ["convolution_gpu_bfyx_os_iyx_osv16", 1108],
+        "11612145813762780082": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15323010740285064115": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "9782042377801038578": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15340106601175659588": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3000754961057044652": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13882543862049484032": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1039],
+        "459319667430150397": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12757674875116871887": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16119575123089076330": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1039],
+        "17015151842140598799": ["convolution_gpu_bfyx_os_iyx_osv16", 376],
+        "634038212244146017": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "3726173595578668243": ["convolution_gpu_bfyx_f16", 8],
+        "1069242824083103727": ["convolution_gpu_bfyx_f16", 8],
+        "10139803717927136766": ["convolution_gpu_bfyx_f16", 8],
+        "10426525571408284384": ["convolution_gpu_bfyx_f16", 8],
+        "6036447764961737632": ["convolution_gpu_bfyx_f16", 8],
+        "16859712173301423348": ["convolution_gpu_bfyx_f16", 8],
+        "4950939249231517650": ["convolution_gpu_bfyx_f16", 8],
+        "15428640534166306063": ["convolution_gpu_bfyx_f16", 8],
+        "12539440450141711052": ["convolution_gpu_bfyx_f16", 8],
+        "4694865878411993051": ["convolution_gpu_bfyx_f16", 8],
+        "7855581105034231853": ["convolution_gpu_bfyx_f16", 8],
+        "16357120378854173738": ["convolution_gpu_bfyx_f16", 8],
+        "9788176856201644185": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3526857091962358658": ["convolution_gpu_bfyx_f16", 8],
+        "9524927752153133377": ["convolution_gpu_bfyx_f16", 5],
+        "967593872851912083": ["convolution_gpu_bfyx_f16", 8],
+        "8544250266821361254": ["convolution_gpu_bfyx_f16", 5],
+        "14702583823206509221": ["convolution_gpu_bfyx_f16", 8],
+        "6562594370920553562": ["convolution_gpu_bfyx_f16", 5],
+        "4871626169134099270": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4306257530819109379": ["convolution_gpu_bfyx_f16", 5],
+        "13097490329579729355": ["convolution_gpu_bfyx_f16", 5],
+        "7536472342317469819": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17240729682157914878": ["convolution_gpu_bfyx_f16", 5],
+        "4338687769151300794": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9217611707355973890": ["convolution_gpu_bfyx_f16", 5],
+        "16565126239389697019": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9706046427344615745": ["convolution_gpu_bfyx_f16", 5],
+        "8724624785920420532": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3678291868919586746": ["convolution_gpu_bfyx_f16", 5],
+        "357806365552700839": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13063387805113848039": ["convolution_gpu_bfyx_f16", 5],
+        "1557184360709050836": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8608461026786312785": ["convolution_gpu_bfyx_f16", 5],
+        "9987273496502066597": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "64106675123073412": ["convolution_gpu_bfyx_f16", 5],
+        "4220695701755939736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12963348434542940033": ["convolution_gpu_bfyx_f16", 8],
+        "16181124988724765560": ["convolution_gpu_bfyx_f16", 3],
+        "346998321908284784": ["convolution_gpu_bfyx_f16", 8],
+        "2318421272788358186": ["convolution_gpu_bfyx_f16", 3],
+        "15927802155084275629": ["convolution_gpu_bfyx_f16", 8],
+        "8773070973133375779": ["convolution_gpu_bfyx_f16", 4],
+        "9940763571380473237": ["convolution_gpu_bfyx_f16", 8],
+        "16277913671917468663": ["convolution_gpu_bfyx_f16", 4],
+        "1474918596978458534": ["convolution_gpu_bfyx_f16", 8],
+        "2186150200961617234": ["convolution_gpu_bfyx_f16", 3],
+        "10577259940464718041": ["convolution_gpu_bfyx_f16", 8],
+        "10352584043544857764": ["convolution_gpu_bfyx_f16", 4],
+        "9144746358156959840": ["convolution_gpu_bfyx_f16", 8],
+        "13301166545153738930": ["convolution_gpu_bfyx_f16", 4],
+        "10753675657145151848": ["convolution_gpu_bfyx_f16", 8],
+        "10604750453275830911": ["convolution_gpu_bfyx_f16", 3],
+        "9243411386937443096": ["convolution_gpu_bfyx_f16", 8],
+        "12042818423431873035": ["convolution_gpu_bfyx_f16", 4],
+        "6683976234770455967": ["convolution_gpu_bfyx_f16", 8],
+        "6298190398591064450": ["convolution_gpu_bfyx_f16", 3],
+        "17196237025206156806": ["convolution_gpu_bfyx_f16", 8],
+        "5853381784506376944": ["convolution_gpu_bfyx_f16", 4],
+        "7339440798895952661": ["convolution_gpu_bfyx_f16", 8],
+        "309066171876496786": ["convolution_gpu_bfyx_f16", 3],
+        "17843616251377971109": ["convolution_gpu_bfyx_f16", 8],
+        "12223137580096133095": ["convolution_gpu_bfyx_f16", 4],
+        "7577659638199402167": ["convolution_gpu_bfyx_f16", 8],
+        "565723015051709107": ["convolution_gpu_bfyx_f16", 3],
+        "14416887345595384816": ["convolution_gpu_bfyx_f16", 8],
+        "13314165049380641802": ["convolution_gpu_bfyx_f16", 3],
+        "7520511107200802065": ["convolution_gpu_bfyx_f16", 8],
+        "11534561269762454076": ["convolution_gpu_bfyx_f16", 3],
+        "10368570488453413379": ["convolution_gpu_bfyx_f16", 8],
+        "15747873854346463294": ["convolution_gpu_bfyx_f16", 3],
+        "7824157744505687913": ["convolution_gpu_bfyx_f16", 8],
+        "5462648317757708951": ["convolution_gpu_bfyx_f16", 4],
+        "3493741914954272091": ["convolution_gpu_bfyx_f16", 8],
+        "18286084829637877271": ["convolution_gpu_bfyx_f16", 3],
+        "260499864874634958": ["convolution_gpu_bfyx_f16", 8],
+        "10167218530612525698": ["convolution_gpu_bfyx_f16", 4],
+        "11647470184823377234": ["convolution_gpu_bfyx_f16", 8],
+        "6976222743405170101": ["convolution_gpu_bfyx_f16", 3],
+        "7655642513340250684": ["convolution_gpu_bfyx_f16", 8],
+        "2708987188750383204": ["convolution_gpu_bfyx_f16", 3],
+        "3147813143325864684": ["convolution_gpu_bfyx_f16", 8],
+        "13481932492220060429": ["convolution_gpu_bfyx_f16", 3],
+        "8069058927528586404": ["convolution_gpu_bfyx_f16", 8],
+        "9624255156096106627": ["convolution_gpu_bfyx_f16", 3],
+        "17730913632234504096": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11384790797228210583": ["convolution_gpu_bfyx_f16", 4],
+        "16177287431434086806": ["convolution_gpu_bfyx_f16", 2],
+        "2990533830830456778": ["convolution_gpu_bfyx_f16", 4],
+        "8610276394762287397": ["convolution_gpu_bfyx_f16", 2],
+        "14889103084722200470": ["convolution_gpu_bfyx_f16", 7],
+        "1845895244697890167": ["convolution_gpu_bfyx_f16", 2],
+        "9079010613051503735": ["convolution_gpu_bfyx_f16", 4],
+        "12061818277351885597": ["convolution_gpu_bfyx_f16", 2],
+        "9390843066348290833": ["convolution_gpu_bfyx_f16", 4],
+        "10509352827759959818": ["convolution_gpu_bfyx_f16", 2],
+        "7121505015354362475": ["convolution_gpu_bfyx_f16", 3],
+        "3145839553769702558": ["convolution_gpu_bfyx_f16", 2],
+        "9437978197962731993": ["convolution_gpu_bfyx_f16", 4],
+        "16274951933822979821": ["convolution_gpu_bfyx_f16", 2],
+        "14030311264395486109": ["convolution_gpu_bfyx_f16", 4],
+        "6745402198112522691": ["convolution_gpu_bfyx_f16", 2],
+        "17535374606849768070": ["convolution_gpu_bfyx_f16", 3],
+        "13107074908777587001": ["convolution_gpu_bfyx_f16", 2],
+        "12441704244463007888": ["convolution_gpu_bfyx_f16", 3],
+        "9830487478445609618": ["convolution_gpu_bfyx_f16", 1],
+        "2607686439369816702": ["convolution_gpu_bfyx_f16", 3],
+        "11952384679771234258": ["convolution_gpu_bfyx_f16", 1],
+        "3189741427811982954": ["convolution_gpu_bfyx_f16", 3],
+        "7501115822974560125": ["convolution_gpu_bfyx_f16", 2],
+        "5461533362170148981": ["convolution_gpu_bfyx_f16", 4],
+        "10622846706558433994": ["convolution_gpu_bfyx_f16", 1],
+        "14985143127047962687": ["convolution_gpu_bfyx_f16", 3],
+        "9631129065088682473": ["convolution_gpu_bfyx_f16", 2],
+        "9287906640814562678": ["convolution_gpu_bfyx_f16", 3],
+        "10312813290107807302": ["convolution_gpu_bfyx_f16", 2],
+        "12443171163993705676": ["convolution_gpu_bfyx_f16", 4],
+        "3168498630594159758": ["convolution_gpu_bfyx_f16", 2],
+        "1224004372693674977": ["convolution_gpu_bfyx_f16", 8],
+        "11479153223948565455": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15137118881649312407": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7380413826069265610": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16535858081334660130": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3621905235571219180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15946837476334836670": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "245178301664812042": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11536204967390696799": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13202661087717766278": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17082033214052891239": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10972993149458384549": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13266975232886004160": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5239323177752135143": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13950458285304028472": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1153656272296563651": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15832393447136864275": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449769853632530": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16481491209623188639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16355932574879498582": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "9885117015102902622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17948745397003387421": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6169584310346033045": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11946156629252758613": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8766639290602892682": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4124732995953832580": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14120940518810838558": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15477415938111847293": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7899374704077099747": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1738224818674864374": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4675498016268563894": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11678653628752466495": ["convolution_gpu_bfyx_gemm_like", 2],
+        "823094503720427089": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6268238156027633260": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12067387912557140291": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14700484317091478179": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5093753362153705304": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7185731190256343440": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7147929965532955967": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "11272978444176415320": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3664831747298375482": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "5055315246446375474": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11248871352103466387": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "14138271699174946769": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11248138620600796041": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "8218608499996018829": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "492405382055839338": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "13627463949725014842": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10442692749607465731": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "5257716983547940732": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4531738938698034182": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "4103900860372048770": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1763848406836981250": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "13050289716763141821": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5246872552943832761": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8103482664263052993": ["convolution_gpu_bfyx_os_iyx_osv16", 1104],
+        "4890599355418453618": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13440603011986281192": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "7470027005329223304": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10193635775409684341": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "9727214793392528330": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10481905734789810461": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "17748868035178556381": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1557394183568627973": ["convolution_gpu_bfyx_os_iyx_osv16", 1104],
+        "1431347831018127681": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11729412526159852880": ["convolution_gpu_bfyx_os_iyx_osv16", 1104],
+        "4899105740108544338": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8050406060207298909": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "7380902367877842940": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12400142005537988277": ["convolution_gpu_bfyx_os_iyx_osv16", 1104],
+        "7084726217254409262": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13881126705282937733": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "3268450385258447029": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3315969006703902437": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "7995430380267318045": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13355664807789465988": ["convolution_gpu_bfyx_os_iyx_osv16", 747],
+        "1814690350132893834": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10977798741323641518": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "16290685659520662243": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14814993085047057124": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "16036211705705298060": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3314627126439576532": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "5397150622881607923": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4417629288282219686": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "2593493324630665553": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2115136697391853510": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "3903972756038760641": ["convolution_gpu_bfyx_os_iyx_osv16", 225],
+        "18309964708787622418": ["convolution_gpu_bfyx_os_iyx_osv16", 227],
+        "10898709444676724488": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "3114718546872961667": ["convolution_gpu_bfyx_os_iyx_osv16", 977],
+        "4116817191288103322": ["convolution_gpu_bfyx_os_iyx_osv16", 225],
+        "5759507923877307269": ["convolution_gpu_bfyx_os_iyx_osv16", 604],
+        "13521523772245595449": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "7025699501997365179": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "17325198932789845471": ["convolution_gpu_bfyx_os_iyx_osv16", 981],
+        "1929216390450946038": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "9359713794448163515": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "1064765432017421754": ["convolution_gpu_bfyx_os_iyx_osv16", 225],
+        "17903113127620271097": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "955947984048164651": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "6871124717336911723": ["convolution_gpu_bfyx_os_iyx_osv16", 981],
+        "17054742656500024833": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "8735118147118298928": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "7689370938722443575": ["convolution_gpu_bfyx_os_iyx_osv16", 228],
+        "7389433284327478008": ["convolution_gpu_bfyx_os_iyx_osv16", 605],
+        "6352588504037946062": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "3420065266906936372": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "5158493429539582334": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "8584667522373731666": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "16628885743804758299": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "9979885527081183609": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "11585377068025763798": ["convolution_gpu_bfyx_os_iyx_osv16", 977],
+        "270198976247871883": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "14806119107242947719": ["convolution_gpu_bfyx_os_iyx_osv16", 605],
+        "6237698548794601324": ["convolution_gpu_bfyx_os_iyx_osv16", 229],
+        "16586342221264661586": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "8378911742901238960": ["convolution_gpu_bfyx_os_iyx_osv16", 980],
+        "8878591357527094058": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16", 8],
+        "16142734280696556211": ["convolution_gpu_bfyx_f16", 8],
+        "635140168178230171": ["convolution_gpu_bfyx_f16", 8],
+        "17935287735372634102": ["convolution_gpu_bfyx_f16", 8],
+        "15817877524852645836": ["convolution_gpu_bfyx_f16", 8],
+        "10065955805093424080": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11821370621780817632": ["convolution_gpu_bfyx_f16", 8],
+        "677921946529877110": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5361664571196670427": ["convolution_gpu_bfyx_f16", 8],
+        "2901538337520242272": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5581843211058265455": ["convolution_gpu_bfyx_f16", 8],
+        "217667049553318429": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5337496722551766654": ["convolution_gpu_bfyx_f16", 8],
+        "52740663361396709": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6991371618000668418": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2326385631302475177": ["convolution_gpu_bfyx_f16", 5],
+        "8721996744048476299": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "453498137980697662": ["convolution_gpu_bfyx_f16", 5],
+        "15807266772870766609": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6553421087532441250": ["convolution_gpu_bfyx_f16", 5],
+        "12573289076827071790": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8853947103468767323": ["convolution_gpu_bfyx_f16", 5],
+        "6453143304950619430": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1775677589702924323": ["convolution_gpu_bfyx_f16", 5],
+        "16761512340234377511": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2448165393673590598": ["convolution_gpu_bfyx_f16", 5],
+        "11041313275514857930": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8908290078256179450": ["convolution_gpu_bfyx_f16", 5],
+        "6872057470208040983": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3462663905986148169": ["convolution_gpu_bfyx_f16", 5],
+        "9998472323723395768": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9695005447848657794": ["convolution_gpu_bfyx_f16", 5],
+        "864050420562880191": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16884753149447117871": ["convolution_gpu_bfyx_f16", 5],
+        "9413300293443003372": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9584473138046573481": ["convolution_gpu_bfyx_f16", 5],
+        "17226124546002868085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5703305874425530284": ["convolution_gpu_bfyx_f16", 5],
+        "16357533604618943588": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8568882981604412701": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6735600860810305128": ["convolution_gpu_bfyx_f16", 3],
+        "9976345793999587972": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15346869959264738522": ["convolution_gpu_bfyx_f16", 4],
+        "18151038936580799249": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11956105843463290323": ["convolution_gpu_bfyx_f16", 3],
+        "2197043795215802833": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7837223160972083111": ["convolution_gpu_bfyx_f16", 3],
+        "17991319065386721750": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8684426249485914306": ["convolution_gpu_bfyx_f16", 3],
+        "15440765487742350713": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4006988924644151380": ["convolution_gpu_bfyx_f16", 3],
+        "1165323482766442288": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6216179328027568162": ["convolution_gpu_bfyx_f16", 3],
+        "5085232160533811804": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5560503476513957999": ["convolution_gpu_bfyx_f16", 4],
+        "11899886655444339788": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8035035668897300219": ["convolution_gpu_bfyx_f16", 3],
+        "15531280953380757927": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5417611188973238514": ["convolution_gpu_bfyx_f16", 3],
+        "13845305820052266938": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "156328565120552800": ["convolution_gpu_bfyx_f16", 4],
+        "15783591814248428053": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5753913342838023682": ["convolution_gpu_bfyx_f16", 3],
+        "3207990305547692029": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18084824492918706199": ["convolution_gpu_bfyx_f16", 3],
+        "8033743776899693075": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "243712386211233379": ["convolution_gpu_bfyx_f16", 3],
+        "2965177266959923348": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13237451337340946362": ["convolution_gpu_bfyx_f16", 3],
+        "9188120772772842413": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1249134296559537004": ["convolution_gpu_bfyx_f16", 3],
+        "6776437678382831419": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9140223146321937006": ["convolution_gpu_bfyx_f16", 4],
+        "7509732267784929557": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9869335174149535367": ["convolution_gpu_bfyx_f16", 4],
+        "15410089184813419927": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12736591082694609735": ["convolution_gpu_bfyx_f16", 3],
+        "10111465201148839782": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6977012639021700914": ["convolution_gpu_bfyx_f16", 3],
+        "10452382209692659038": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13099335757796409253": ["convolution_gpu_bfyx_f16", 4],
+        "8355446198162136384": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6457714394569252436": ["convolution_gpu_bfyx_f16", 4],
+        "1870949498151438396": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6325249952936664765": ["convolution_gpu_bfyx_f16", 3],
+        "4283372428897156128": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15284708683366527091": ["convolution_gpu_bfyx_f16", 4],
+        "12367140420770161260": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17302868757320805407": ["convolution_gpu_bfyx_f16", 4],
+        "12812798569408798714": ["convolution_gpu_bfyx_f16", 3],
+        "18027642894783121874": ["convolution_gpu_bfyx_f16", 6],
+        "3766561909462900481": ["convolution_gpu_bfyx_f16", 3],
+        "8126433884587687354": ["convolution_gpu_bfyx_f16", 6],
+        "7431739774665400867": ["convolution_gpu_bfyx_f16", 5],
+        "15213968303698655071": ["convolution_gpu_bfyx_f16", 6],
+        "1895954773577076065": ["convolution_gpu_bfyx_f16", 5],
+        "10820634669412096693": ["convolution_gpu_bfyx_f16", 6],
+        "9105871040526273510": ["convolution_gpu_bfyx_f16", 4],
+        "6253056982440997971": ["convolution_gpu_bfyx_f16", 6],
+        "14271936409538632354": ["convolution_gpu_bfyx_f16", 5],
+        "7830723669305086809": ["convolution_gpu_bfyx_f16", 7],
+        "16905205856195133489": ["convolution_gpu_bfyx_f16", 3],
+        "17744780595721014433": ["convolution_gpu_bfyx_f16", 6],
+        "1185658428449577287": ["convolution_gpu_bfyx_f16", 2],
+        "4322844512730914538": ["convolution_gpu_bfyx_f16", 6],
+        "8559998096869077061": ["convolution_gpu_bfyx_f16", 5],
+        "12935328860605637188": ["convolution_gpu_bfyx_f16", 7],
+        "17826095303533956022": ["convolution_gpu_bfyx_f16", 5],
+        "6059064882469521870": ["convolution_gpu_bfyx_f16", 7],
+        "17987726224817029150": ["convolution_gpu_bfyx_f16", 5],
+        "1752617074755449766": ["convolution_gpu_bfyx_f16", 7],
+        "1147527648969475665": ["convolution_gpu_bfyx_f16", 5],
+        "336079374726362009": ["convolution_gpu_bfyx_f16", 7],
+        "3956037701575034246": ["convolution_gpu_bfyx_f16", 2],
+        "9177200416044551211": ["convolution_gpu_bfyx_f16", 7],
+        "3580337905402094261": ["convolution_gpu_bfyx_f16", 5],
+        "8657404564308325878": ["convolution_gpu_bfyx_f16", 7],
+        "9660551017019324634": ["convolution_gpu_bfyx_f16", 3],
+        "2283387892607580344": ["convolution_gpu_bfyx_f16", 7],
+        "9757276965383246450": ["convolution_gpu_bfyx_f16", 2],
+        "5662627047941545281": ["convolution_gpu_bfyx_f16", 5],
+        "4652102901251847499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4834446692898125871": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "8552605555461651066": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4461989328775275994": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "4821707856043228388": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10837496380266058422": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "867673900353092030": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16839741351990811959": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "9400507072890048966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9193880745263317167": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "13391871893495885313": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10447947790216991304": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "10371076921125171059": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10315090439844489700": ["convolution_gpu_bfyx_gemm_like", 2],
+        "671453551040072499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7957019749780783255": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3916912615549949771": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5115007207028125638": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3702373232430988630": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16866405531619284081": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3118240332710616352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18271341717679165017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1520529227443340435": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6547588888976666790": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2920840796593281126": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3243287355593359731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15289152041466330689": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11745487821055710420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10330180429524641331": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2413743706626149595": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2891977832675907820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5140042030231193807": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "12362834244136780846": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16", 719],
+        "12975331316527510995": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "11296280342006832013": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "2729382724566640622": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "1973144337799131575": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "11237620198863831646": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "13522230668952002294": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16", 719],
+        "1878253869657286717": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "15537416934472628620": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "1614676161640914325": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "7887122837178625925": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16", 719],
+        "13932612600851474669": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "108336648992892440": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "10684345634354913297": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16", 719],
+        "10916615896929712681": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "6401617291202138329": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "14418145752469985573": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16", 1095],
+        "10000629948062903268": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "17913158947435785150": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "3683538222536942924": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16", 719],
+        "3497309410275654168": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "5849203144808104114": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "6079947803671938062": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "10023464714622430341": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16", 214],
+        "10125169683435871224": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16", 965],
+        "16183189414217717282": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16", 589],
+        "4778769961736466493": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16", 212],
+        "14240807033488944743": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16", 964],
+        "9611215430798915107": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16", 214],
+        "13082046205786468713": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "12207197008210652563": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16", 587],
+        "10581403540319621428": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "8583043839495629208": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16", 963],
+        "14447820502121172060": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "16001364310945493562": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "8482359546526573989": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16", 214],
+        "1012101590389722479": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16", 964],
+        "14078917033502693044": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "3484370445244910200": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16", 587],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16", 920],
+        "2438463778071005693": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10241616750018729197": ["convolution_gpu_bfyx_os_iyx_osv16", 636],
+        "16093736249698386830": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "15577855965797137317": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 2],
+        "2793239401424346732": ["fully_connected_gpu_fb_oi_ref", 2],
+        "1090168454685651958": ["fully_connected_gpu_fb_oi_ref", 2],
+        "8694043970360551765": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8773350383870039461": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "18102285308171488538": ["convolution_gpu_bfyx_os_iyx_osv16", 476],
+        "4504463103561729721": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "13026398103046869012": ["fully_connected_gpu_fb_oi_ref", 1],
+        "4938053383542014494": ["fully_connected_gpu_fb_oi_ref", 2],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16", 720],
+        "10292349730148518173": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "17322754821646330275": ["convolution_gpu_bfyx_os_iyx_osv16", 197],
+        "9463001223908267526": ["convolution_gpu_bfyx_os_iyx_osv16", 339],
+        "2819993544283340217": ["convolution_gpu_bfyx_os_iyx_osv16", 155],
+        "4891941794728322149": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "17966409116732724850": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "16009549743559486766": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14591935906857802585": ["convolution_gpu_bfyx_os_iyx_osv16", 368],
+        "111424963409848995": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2739383731123097925": ["convolution_gpu_bfyx_gemm_like", 0],
+        "17284261626529871462": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "5668693380660004839": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "12579230945548766456": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5167557197439368430": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1540552565663233708": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "16096568902203474447": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5028262864972382565": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1974417291828577": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "2599172922002088957": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11660160310320618383": ["convolution_gpu_bfyx_gemm_like", 0],
+        "6321333724966975926": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "13769852278335802471": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "79817180213970569": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7224734161984848733": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2597920881875761524": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17470658487460623535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3304768856579090475": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13876951907579147655": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "10366703264083184092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7178492718471026756": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2607889212984737257": ["convolution_gpu_bfyx_os_iyx_osv16", 1022],
+        "6528945595038330865": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9831986499172731633": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18440050172847926353": ["convolution_gpu_bfyx_os_iyx_osv16", 264],
+        "6512088599266777589": ["convolution_gpu_bfyx_os_iyx_osv16", 264],
+        "10018756206737727294": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5830779024517851317": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "7913817244562964901": ["convolution_gpu_bfyx_f16", 8],
+        "11779589567746893119": ["convolution_gpu_bfyx_f16", 8],
+        "5287441936829096354": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "16879635677321458783": ["convolution_gpu_bfyx_f16", 8],
+        "5936894667802097344": ["convolution_gpu_bfyx_f16", 8],
+        "12029555773381953470": ["convolution_gpu_bfyx_f16", 8],
+        "1395714970525756800": ["convolution_gpu_bfyx_f16", 5],
+        "18366381433142273315": ["convolution_gpu_bfyx_f16", 8],
+        "17839315025229585473": ["convolution_gpu_bfyx_f16", 8],
+        "7428339090190576585": ["convolution_gpu_bfyx_f16", 8],
+        "16427721132197847241": ["convolution_gpu_bfyx_f16", 8],
+        "929038963682864275": ["convolution_gpu_bfyx_f16", 8],
+        "6348679735483401866": ["convolution_gpu_bfyx_f16", 8],
+        "17409943223289937333": ["convolution_gpu_bfyx_f16", 8],
+        "10896472785943286419": ["convolution_gpu_bfyx_f16", 8],
+        "8675423965229942895": ["convolution_gpu_bfyx_f16", 8],
+        "15359653790909326580": ["convolution_gpu_bfyx_f16", 7],
+        "937772044105590355": ["convolution_gpu_bfyx_f16", 5],
+        "11630003841984891663": ["convolution_gpu_bfyx_f16", 8],
+        "15721323944762357421": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18032560040713612222": ["convolution_gpu_bfyx_f16", 5],
+        "16185581163541386950": ["convolution_gpu_bfyx_f16", 8],
+        "7296460872108123423": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18375557444371775299": ["convolution_gpu_bfyx_f16", 5],
+        "10922059457537054563": ["convolution_gpu_bfyx_f16", 8],
+        "122295605901184339": ["convolution_gpu_bfyx_f16", 5],
+        "12164250230746861951": ["convolution_gpu_bfyx_f16", 5],
+        "10631671892805059138": ["convolution_gpu_bfyx_os_iyx_osv16", 908],
+        "13517627553690454113": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "17864904691465978047": ["convolution_gpu_bfyx_os_iyx_osv16", 911],
+        "7688613129211669281": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15276587352894128846": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "16583642152876546031": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "1540351396976309640": ["convolution_gpu_bfyx_os_iyx_osv16", 908],
+        "12018060391889249406": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1703594828023385832": ["convolution_gpu_bfyx_os_iyx_osv16", 585],
+        "16671415101494484639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4660166087476681397": ["convolution_gpu_bfyx_gemm_like", 2],
+        "824242000358871449": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "9894766303335506733": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17266480567140619519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13198159541095771298": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "3117760785038488579": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "1476464784116064433": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4485934013026623941": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16673650204498772920": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8328912827514946731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15425046562310745575": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9559768114277499815": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "8904325051665606784": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2193347488577584488": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6699483770041820657": ["convolution_gpu_bfyx_os_iyx_osv16", 996],
+        "789366296550494453": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14473138580870542149": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1428800094127546021": ["convolution_gpu_bfyx_os_iyx_osv16", 620],
+        "14142504888572786665": ["convolution_gpu_bfyx_os_iyx_osv16", 271],
+        "1984025014517619256": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7957167898986800985": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "412995552853553524": ["convolution_gpu_bfyx_f16", 8],
+        "7058232330882130703": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15549425900373079382": ["convolution_gpu_bfyx_f16", 8],
+        "2713038204741622907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1878980012173918209": ["convolution_gpu_bfyx_f16", 8],
+        "12468208151780727122": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6674575974748163031": ["convolution_gpu_bfyx_f16", 8],
+        "5591111867402032949": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3413916493145831316": ["convolution_gpu_bfyx_f16", 8],
+        "12421615174911349736": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689084255978323672": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12474210147973914830": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14174889288973953645": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18224887830367116006": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16049847963625476676": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "3817623781909159313": ["convolution_gpu_bfyx_f16", 5],
+        "3004968067582685285": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6876765637331622545": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6802301901709446085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13245964863324091195": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "953254263392356310": ["convolution_gpu_bfyx_f16", 3],
+        "5388858533648189105": ["convolution_gpu_bfyx_f16", 3],
+        "3226238265868290723": ["convolution_gpu_bfyx_f16", 7],
+        "10098858620420134682": ["convolution_gpu_bfyx_f16", 8],
+        "18308172581381789101": ["convolution_gpu_bfyx_f16", 4],
+        "12846183737006963638": ["convolution_gpu_bfyx_f16", 4],
+        "8746233054079242877": ["convolution_gpu_bfyx_f16", 8],
+        "7516276889336424671": ["convolution_gpu_bfyx_f16", 8],
+        "8240661672477348007": ["convolution_gpu_bfyx_f16", 2],
+        "7421142512620741721": ["convolution_gpu_bfyx_f16", 5],
+        "17095633565672192085": ["convolution_gpu_bfyx_f16", 8],
+        "7381046541836362634": ["convolution_gpu_bfyx_f16", 2],
+        "7006663637645720459": ["convolution_gpu_bfyx_f16", 2],
+        "554667746487334145": ["convolution_gpu_bfyx_f16", 3],
+        "1899794088311416867": ["convolution_gpu_bfyx_f16", 8],
+        "4461871297663195464": ["convolution_gpu_bfyx_f16", 1],
+        "845238018552466931": ["convolution_gpu_bfyx_f16", 1],
+        "1588946175550138318": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15493305609986974083": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18266429764179335648": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4773783671939023015": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4841057875316789358": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10434845132440395347": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4971104866692187809": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3918510119122483722": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10511458406494047485": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4104477639131772427": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14619253124444303162": ["convolution_gpu_bfyx_os_iyx_osv16", 347],
+        "2303241947828987936": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15440788136860909526": ["convolution_gpu_bfyx_os_iyx_osv16", 1029],
+        "5886674354741908134": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8121822626577551399": ["convolution_gpu_bfyx_os_iyx_osv16", 278],
+        "6561450336890348030": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16", 644],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16", 630],
+        "6864098212683093769": ["convolution_gpu_bfyx_os_iyx_osv16", 1046],
+        "12286768317527546407": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15803888689432429483": ["convolution_gpu_bfyx_os_iyx_osv16", 273],
+        "2969163284049372725": ["convolution_gpu_bfyx_os_iyx_osv16", 1006],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "13943934495343791315": ["convolution_gpu_bfyx_os_iyx_osv16", 155],
+        "2864116308996401112": ["convolution_gpu_bfyx_os_iyx_osv16", 910],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16", 517],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "12840351521230542751": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "10101063893937511289": ["convolution_gpu_bfyx_os_iyx_osv16", 101],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16", 833],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "16052212361531923323": ["convolution_gpu_bfyx_os_iyx_osv16", 428],
+        "14021819955559248258": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16", 806],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16", 55],
+        "18183296320499063227": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16", 834],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "13889057206654080908": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2813710942447372241": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13633232435632839044": ["convolution_gpu_bfyx_f16", 8],
+        "2883172178329270363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9432546329737888706": ["convolution_gpu_bfyx_f16", 8],
+        "12985746913235154779": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17940668702908419725": ["convolution_gpu_bfyx_f16", 8],
+        "2064000219100642226": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5833649709217830223": ["convolution_gpu_bfyx_f16", 8],
+        "10849235794440642481": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6321445979984216128": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14697315322325185660": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "36079357617783912": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4063865474431180498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13167503358764278233": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17498603449428007802": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6304136029727027056": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "1754448782405089213": ["convolution_gpu_bfyx_f16", 8],
+        "15489166244290113065": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5756918986564223629": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8035545676843269497": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17042017278300937839": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11260048813076144906": ["convolution_gpu_bfyx_f16", 3],
+        "6873924247641352061": ["convolution_gpu_bfyx_f16", 5],
+        "6474957215284027135": ["convolution_gpu_bfyx_f16", 7],
+        "16573724507496129614": ["convolution_gpu_bfyx_f16", 6],
+        "11210971373278055121": ["convolution_gpu_bfyx_f16", 5],
+        "185717560970701618": ["convolution_gpu_bfyx_f16", 7],
+        "11817410866221484993": ["convolution_gpu_bfyx_f16", 8],
+        "9765519004693711463": ["convolution_gpu_bfyx_f16", 5],
+        "14300671725579588671": ["convolution_gpu_bfyx_f16", 4],
+        "1297549572559338433": ["convolution_gpu_bfyx_f16", 8],
+        "4346210823986581329": ["convolution_gpu_bfyx_f16", 5],
+        "2750608965765787878": ["convolution_gpu_bfyx_f16", 5],
+        "14245442283142381063": ["convolution_gpu_bfyx_f16", 5],
+        "2942593456597250269": ["convolution_gpu_bfyx_f16", 5],
+        "14807774261203767931": ["convolution_gpu_bfyx_f16", 5],
+        "2024891861044519704": ["convolution_gpu_bfyx_f16", 5],
+        "12988352411577718659": ["convolution_gpu_bfyx_f16", 5],
+        "7546167886043158750": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12777758044198094011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17999895886988202252": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7284204319739516687": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11574916930945966662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12181953262469206135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11001131415959768285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11516255774873880270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17905472119711952421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3708423242842748011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16735610121492345646": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10393786933242452104": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8593006729492614006": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8080047256092430454": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5827132729840694911": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2862262622518056270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7592655788466931007": ["convolution_gpu_bfyx_os_iyx_osv16", 659],
+        "10751447918697845967": ["convolution_gpu_bfyx_os_iyx_osv16", 253],
+        "14327549932088763609": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "9139350052341521235": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2268155498775258271": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "9252995576301318377": ["convolution_gpu_bfyx_os_iyx_osv16", 1005],
+        "16131094933895726474": ["convolution_gpu_bfyx_os_iyx_osv16", 617],
+        "2390813972238809739": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "6575286116803785519": ["convolution_gpu_bfyx_os_iyx_osv16", 911],
+        "9509860212160444680": ["convolution_gpu_bfyx_os_iyx_osv16", 908],
+        "2025729513014515133": ["convolution_gpu_bfyx_os_iyx_osv16", 876],
+        "7012386443457106080": ["convolution_gpu_bfyx_os_iyx_osv16", 155],
+        "10807317048120773939": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "13800264518247731721": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10381956671421182115": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "4874673523117573787": ["convolution_gpu_bfyx_gemm_like", 1],
+        "18140414399325733479": ["convolution_gpu_bfyx_os_iyx_osv16", 51],
+        "5854165399605633326": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "17238058461587589303": ["convolution_gpu_bfyx_os_iyx_osv16", 422],
+        "4101383449947395379": ["convolution_gpu_bfyx_os_iyx_osv16", 793],
+        "2697043651083211983": ["convolution_gpu_bfyx_os_iyx_osv16", 585],
+        "1196153439884178828": ["convolution_gpu_bfyx_os_iyx_osv16", 759],
+        "1408371298472575421": ["convolution_gpu_bfyx_os_iyx_osv16", 752],
+        "9614122272772797675": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7199567766573336359": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13573164884579883011": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15482728985931330311": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4607650298345740971": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16086873164128770879": ["convolution_gpu_bfyx_gemm_like", 2],
+        "105926781977700977": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11591232422517503119": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11582016741808877197": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16914574072145986060": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6022176855777948587": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8941858845051007302": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9357675997524716463": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3521176117120705338": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12045093589986262223": ["convolution_gpu_bfyx_os_iyx_osv16", 221],
+        "470065517654323782": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16734161909350784601": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11121230809258677064": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6349024748484491361": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9689224985169331447": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3750053020466161808": ["convolution_gpu_bfyx_os_iyx_osv16", 479],
+        "15788948623626667459": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13291988829313422545": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 1],
+        "17375427967226537519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "356011965155211999": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "10249443290070223207": ["convolution_gpu_bfyx_os_iyx_osv16", 466],
+        "11731131619682311119": ["convolution_gpu_bfyx_gemm_like", 2],
+        "499465197159774125": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "6713136765330410003": ["convolution_gpu_bfyx_os_iyx_osv16", 285],
+        "10482500982261483441": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12868046747643626115": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3118940652855466279": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "9133477146144263621": ["convolution_gpu_bfyx_os_iyx_osv16", 662],
+        "6014658843738581344": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2254000832500315403": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "2201913047888029571": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6765174963106729735": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6860612036193780126": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4053722516029644812": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1037],
+        "3872902814632377403": ["convolution_gpu_bfyx_os_iyx_osv16", 555],
+        "11807558788154880902": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11052363375504603312": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "5704480811160976661": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2631038501229053001": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11448877892018743111": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10504809699083269708": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 837],
+        "221686752427251764": ["convolution_gpu_bfyx_os_iyx_osv16", 852],
+        "8099629938775512387": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5641577920984461497": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 458],
+        "12153763576335891417": ["fully_connected_gpu_fb_io_b8_f8_vload", 1],
+        "11940005480315119153": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "1302512649939808216": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "16919811480058643640": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "5208084625746441471": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "8262549900448065079": ["convolution_gpu_bfyx_os_iyx_osv16", 798],
+        "5227665249672396809": ["convolution_gpu_bfyx_os_iyx_osv16", 46],
+        "10715707282679913174": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15195978022706554558": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4702145645721143238": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10415281487218000500": ["convolution_gpu_bfyx_gemm_like", 2],
+        "680533894953795110": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1524996376386486665": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2180727313291426024": ["convolution_gpu_bfyx_os_iyx_osv16", 573],
+        "13865408769089368168": ["convolution_gpu_bfyx_os_iyx_osv16", 159],
+        "17729561573161674389": ["convolution_gpu_bfyx_os_iyx_osv16", 535],
+        "14102092207521274159": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "14601505600623942303": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "4933328578946081154": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13882747247011638614": ["convolution_gpu_bfyx_os_iyx_osv16", 559],
+        "814582084353022226": ["convolution_gpu_bfyx_os_iyx_osv16", 553],
+        "4844820846457555156": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "6607603202773469786": ["convolution_gpu_bfyx_os_iyx_osv16", 241],
+        "15439502814859116813": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "15777107988701235428": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "12832042711454018844": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "6099745418702030715": ["convolution_gpu_bfyx_os_iyx_osv16", 167],
+        "4230880085403638923": ["convolution_gpu_bfyx_os_iyx_osv16", 795],
+        "62516450676185117": ["convolution_gpu_bfyx_os_iyx_osv16", 420],
+        "93092162022748986": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15895053123520992434": ["convolution_gpu_bfyx_os_iyx_osv16", 552],
+        "14005851072926998714": ["convolution_gpu_bfyx_os_iyx_osv16", 552],
+        "13582287631171243512": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10982128848228134282": ["convolution_gpu_bfyx_os_iyx_osv16", 140],
+        "7236965443679023925": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "1267627207431132628": ["convolution_gpu_bfyx_os_iyx_osv16", 156],
+        "2427481818567622188": ["convolution_gpu_bfyx_os_iyx_osv16", 156],
+        "9499169226931836849": ["convolution_gpu_bfyx_os_iyx_osv16", 911],
+        "14841135939793901331": ["convolution_gpu_bfyx_os_iyx_osv16", 498],
+        "13877129322236450083": ["convolution_gpu_bfyx_os_iyx_osv16", 124],
+        "17180103562901495937": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16817205245313896299": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2495268194877370173": ["convolution_gpu_bfyx_f16", 8],
+        "12476976926994223419": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "837759583632984386": ["convolution_gpu_bfyx_f16", 8],
+        "15704905077262309915": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15294932718062276977": ["convolution_gpu_bfyx_f16", 8],
+        "4080044423867161503": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12782915336639648289": ["convolution_gpu_bfyx_f16", 3],
+        "6939516498492475263": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689321018957344059": ["convolution_gpu_bfyx_f16", 3],
+        "2757721937742809580": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10786200002789430346": ["convolution_gpu_bfyx_f16", 3],
+        "1941288041804222048": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14851676883700287486": ["convolution_gpu_bfyx_f16", 3],
+        "17430311645965116316": ["convolution_gpu_bfyx_f16", 3],
+        "3115685904789548595": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12312218395355058343": ["convolution_gpu_bfyx_f16", 6],
+        "17435783978159028678": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18104511008021666751": ["convolution_gpu_bfyx_f16", 7],
+        "2889130721514872852": ["convolution_gpu_bfyx_f16", 6],
+        "6772340882401465511": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11743064882436041973": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10729082617196359413": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8688603561602716375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6216329929003742144": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5858568936289863149": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1258577325908211211": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10990147603320054495": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13137659893098575291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2695989423525253829": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3932955531996129807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14834765532454121330": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11205075769094656704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10056755067893619842": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8845972204063781512": ["convolution_gpu_bfyx_os_iyx_osv16", 669],
+        "5286686388506198758": ["convolution_gpu_bfyx_os_iyx_osv16", 1046],
+        "17044347247573802405": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16931221552471635881": ["convolution_gpu_bfyx_os_iyx_osv16", 157],
+        "3375470456077799802": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14332199338789934423": ["convolution_gpu_bfyx_os_iyx_osv16", 158],
+        "16759785658634382018": ["convolution_gpu_bfyx_os_iyx_osv16", 760],
+        "5919114362027813213": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1357304910509750335": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2624254602965505549": ["convolution_gpu_bfyx_f16", 8],
+        "5577742374711315791": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "578315994260636114": ["convolution_gpu_bfyx_f16", 8],
+        "1262880924315152695": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6168533266847660009": ["convolution_gpu_bfyx_f16", 8],
+        "14627313247209797163": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1270860549971294137": ["convolution_gpu_bfyx_f16", 8],
+        "4422458267180761143": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16820926361172105951": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7270466581298144020": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17886363415956316754": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1392628448770002052": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6733088214815340670": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9311722977080169500": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17741687009005052531": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "16599775094194414107": ["convolution_gpu_bfyx_f16", 6],
+        "17406888356387369802": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14665993929606055479": ["convolution_gpu_bfyx_f16", 6],
+        "1257358912309769908": ["convolution_gpu_bfyx_f16", 5],
+        "9833509391965801955": ["convolution_gpu_bfyx_os_iyx_osv16", 495],
+        "853439126393091889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10335429769666812841": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "9424664012357101635": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5860372371921305416": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3503193615625158929": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14744249132822614079": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5243045977966841351": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12657769780794263187": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7005710331306745857": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17732714197816812919": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2691481290737970286": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16758724324099838132": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "13321275573521697498": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17371402188380900420": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "15857411657993741130": ["convolution_gpu_bfyx_os_iyx_osv16", 253],
+        "10824769165318760081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1670508622389791801": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6601005881101223654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3767953997999748671": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12193543332391207302": ["convolution_gpu_bfyx_os_iyx_osv16", 384],
+        "2779831597589397721": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14888498856025675875": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13008816286946828339": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "14472562307183930494": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "12260051528344627305": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "12237139830764526217": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "12839904859734107448": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "2557331839687658350": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "14711934417369240383": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "16644569811401857265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14644196187730386778": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 245],
+        "15997330269289678741": ["convolution_gpu_fs_byx_fsv32", 20],
+        "9456547817322301854": ["convolution_gpu_fs_byx_fsv32", 14],
+        "14503081204981089589": ["convolution_gpu_fs_byx_fsv32", 8],
+        "17459372555428323405": ["convolution_gpu_fs_byx_fsv32", 1],
+        "11045313080354230499": ["convolution_gpu_fs_byx_fsv32_1x1", 1],
+        "11891736161858320688": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "3837179970761308107": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "1599135987505067413": ["convolution_gpu_bfyx_gemm_like", 2],
+        "88960405449779079": ["convolution_gpu_bfyx_os_iyx_osv16", 871],
+        "3983071771155729815": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "4686928543634340294": ["convolution_gpu_bfyx_os_iyx_osv16", 122],
+        "9500201961536063781": ["convolution_gpu_bfyx_os_iyx_osv16", 82],
+        "5626617363814193337": ["convolution_gpu_bfyx_os_iyx_osv16", 543],
+        "9493629616033946504": ["convolution_gpu_bfyx_os_iyx_osv16", 84],
+        "9142997105687030758": ["convolution_gpu_bfyx_os_iyx_osv16", 755],
+        "3565303211593767799": ["convolution_gpu_bfyx_os_iyx_osv16", 3],
+        "1638619072790951553": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "16187579575395923193": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14768404566434004921": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "4439755580616372110": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "12082385141539179745": ["convolution_gpu_bfyx_os_iyx_osv16", 472],
+        "80211457682233943": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "2281851137797618536": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "4306482192283599644": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "7438079994024163367": ["convolution_gpu_bfyx_os_iyx_osv16", 755],
+        "2027062613896109334": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2494989528221736054": ["convolution_gpu_bfyx_f16", 5],
+        "10481457184081052557": ["convolution_gpu_bfyx_f16", 5],
+        "17843566914419305583": ["convolution_gpu_bfyx_f16", 7],
+        "10440359951914302042": ["convolution_gpu_bfyx_f16", 4],
+        "12355534646291322950": ["convolution_gpu_bfyx_f16", 3],
+        "1312046147551402733": ["convolution_gpu_bfyx_f16", 5],
+        "17747064821498992452": ["convolution_gpu_bfyx_f16", 4],
+        "15727623554601964014": ["convolution_gpu_bfyx_f16", 4],
+        "1123438482147655288": ["convolution_gpu_bfyx_f16", 2],
+        "7126696940487701707": ["convolution_gpu_bfyx_f16", 2],
+        "3872390202906772826": ["convolution_gpu_bfyx_f16", 2],
+        "2880589787553789663": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "11505611789014119307": ["convolution_gpu_bfyx_gemm_like", 2],
+        "437815073846842580": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1950316744853763835": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2101440743856834523": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11177728104020690382": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11070046570645256268": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12619772485618838435": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2751512607890114618": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15183698566691504656": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12653721467536263212": ["convolution_gpu_bfyx_os_iyx_osv16", 883],
+        "13194232160397919757": ["convolution_gpu_bfyx_os_iyx_osv16", 807],
+        "204538163378003996": ["convolution_gpu_bfyx_os_iyx_osv16", 49],
+        "6149494643008538957": ["convolution_gpu_bfyx_os_iyx_osv16", 378],
+        "11290368603402236066": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1323592601201034234": ["convolution_gpu_bfyx_f16", 6],
+        "14798486770850675841": ["convolution_gpu_bfyx_f16", 6],
+        "11673314628747753691": ["convolution_gpu_bfyx_f16", 8],
+        "7021961511624638678": ["convolution_gpu_bfyx_f16", 4],
+        "5676198353742450430": ["convolution_gpu_bfyx_f16", 6],
+        "4929819810689803833": ["convolution_gpu_bfyx_f16", 6],
+        "240316590146675808": ["convolution_gpu_bfyx_f16", 7],
+        "17625565940895057722": ["convolution_gpu_bfyx_f16", 6],
+        "8688075088415087060": ["convolution_gpu_bfyx_f16", 8],
+        "3109943868702160503": ["convolution_gpu_bfyx_f16", 6],
+        "15650217867869430450": ["convolution_gpu_bfyx_f16", 2],
+        "17908144598228512507": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "12413306519886846795": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3485465952750021220": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16729621401445513163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5488147296483022703": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8710473738514939538": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9147606392761848284": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5087291643342132199": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11774085137209016046": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8929841836974581600": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7073670312468097760": ["convolution_gpu_bfyx_os_iyx_osv16", 424],
+        "14911211495772743601": ["convolution_gpu_bfyx_os_iyx_osv16", 800],
+        "3856389350154673872": ["convolution_gpu_bfyx_os_iyx_osv16", 805],
+        "14587774878993352201": ["convolution_gpu_bfyx_os_iyx_osv16", 378],
+        "2276167946100759891": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "18129268521578260814": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5584283929974879275": ["convolution_gpu_bfyx_f16", 8],
+        "11429584360303226064": ["convolution_gpu_bfyx_f16", 8],
+        "8686735181567651375": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "212877757325472435": ["convolution_gpu_bfyx_f16", 8],
+        "17559312741017462443": ["convolution_gpu_bfyx_f16", 8],
+        "12388383111921084595": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6044859325822961324": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15028548616895245917": ["convolution_gpu_bfyx_f16", 5],
+        "2416358280826517238": ["convolution_gpu_bfyx_f16", 8],
+        "5078717573348951772": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16307093827408988813": ["convolution_gpu_bfyx_f16", 8],
+        "7365885889295117317": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5519237001078836815": ["convolution_gpu_bfyx_f16", 6],
+        "10536941332534385779": ["convolution_gpu_bfyx_f16", 5],
+        "4735765505172334525": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11998001963634536052": ["convolution_gpu_bfyx_f16", 6],
+        "4967372874318410724": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6469943853460256537": ["convolution_gpu_bfyx_f16", 5],
+        "4356868209069762908": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10874805992997105013": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10425195772947453108": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "17029022832617859459": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2476051167651059767": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7844096932162345117": ["convolution_gpu_bfyx_f16", 3],
+        "2705126395780936342": ["convolution_gpu_bfyx_f16", 4],
+        "6904686223481502731": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3795064777145790033": ["convolution_gpu_bfyx_f16", 4],
+        "3981089957521430742": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "12864700296881955607": ["convolution_gpu_bfyx_f16", 7],
+        "183214037684746423": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "14009922923845987763": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "3202655487034498666": ["convolution_gpu_bfyx_f16", 7],
+        "756854486757180730": ["convolution_gpu_bfyx_f16", 1],
+        "1750742987566783306": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "10704219670342115822": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "559540403792175610": ["convolution_gpu_bfyx_f16", 1],
+        "11042961657717641258": ["convolution_gpu_bfyx_f16", 8],
+        "7454909001123355674": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "14140446373297940618": ["convolution_gpu_bfyx_os_iyx_osv16", 758],
+        "12992827495874215098": ["convolution_gpu_bfyx_f16", 4],
+        "17560813776447689945": ["convolution_gpu_bfyx_f16", 7],
+        "12650154599281162622": ["convolution_gpu_bfyx_os_iyx_osv16", 6],
+        "9985311646893058565": ["convolution_gpu_bfyx_os_iyx_osv16", 6],
+        "17149651085328252645": ["convolution_gpu_bfyx_f16", 7],
+        "11779581889508749846": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "16817641185194791244": ["convolution_gpu_bfyx_os_iyx_osv16", 85],
+        "6143862109537773906": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6935108295659465736": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7424861737057604968": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "9640933201231819369": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17277279278034795112": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8111879884622212613": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3806358488669113143": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1711328697805315421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5288962955659199699": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17987842029397168642": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4203253185427070377": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9945419220893973658": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14309870202508661817": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18192195499329490812": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18075812052832099472": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8192820779590386413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8925814981090917840": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5081785566500341341": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3605237561875385705": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1716302732338667414": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15442445290156572536": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14397043442550652899": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2228098191590860938": ["convolution_gpu_bfyx_os_iyx_osv16", 1044],
+        "1573476283825580755": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5388852746720776479": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "16916797286744339336": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3644136601694826205": ["convolution_gpu_bfyx_f16", 8],
+        "14012991338325957511": ["convolution_gpu_bfyx_f16", 5],
+        "2398531071421825711": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14629119844168195239": ["convolution_gpu_bfyx_f16", 8],
+        "9479190421236869705": ["convolution_gpu_bfyx_f16", 5],
+        "4695468509968937176": ["convolution_gpu_bfyx_f16", 5],
+        "16869493346339355004": ["convolution_gpu_bfyx_f16", 5],
+        "15546783882511835538": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14073402551217828243": ["convolution_gpu_bfyx_f16", 5],
+        "5737532382360638209": ["convolution_gpu_bfyx_f16", 7],
+        "14178075057440850235": ["convolution_gpu_bfyx_f16", 8],
+        "1041942313707882183": ["convolution_gpu_bfyx_f16", 5],
+        "14311656444636567643": ["convolution_gpu_bfyx_f16", 7],
+        "5708322911191147507": ["convolution_gpu_bfyx_f16", 3],
+        "7237541003077150774": ["convolution_gpu_bfyx_f16", 8],
+        "17097394033112334006": ["convolution_gpu_bfyx_f16", 1],
+        "10077604090153912107": ["convolution_gpu_bfyx_f16", 6],
+        "3042641872059534006": ["convolution_gpu_bfyx_f16", 3],
+        "13168100741247170644": ["convolution_gpu_bfyx_f16", 7],
+        "3376503797303782111": ["convolution_gpu_bfyx_f16", 3],
+        "8462839229772971651": ["convolution_gpu_bfyx_f16", 8],
+        "9678831759870330874": ["convolution_gpu_bfyx_f16", 3],
+        "10053808465394315011": ["convolution_gpu_bfyx_f16", 6],
+        "325275946658065165": ["convolution_gpu_bfyx_f16", 2],
+        "11261634534154975791": ["convolution_gpu_bfyx_f16", 7],
+        "11257004820976953882": ["convolution_gpu_bfyx_os_iyx_osv16", 46],
+        "5713491991252168637": ["convolution_gpu_bfyx_os_iyx_osv16", 423],
+        "2083080453795724323": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6452660166904314994": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "10411169140360183327": ["convolution_gpu_bfyx_os_iyx_osv16", 812],
+        "9743813978144755927": ["convolution_gpu_bfyx_os_iyx_osv16", 775],
+        "15769267071952355833": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "9816235120364293291": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5617875919579896151": ["fully_connected_gpu_bf_io_gemm", 1],
+        "18309383982594141239": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18042225157963583297": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6369935194881138691": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11002183397247930282": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14040168861632997052": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10321120422537436943": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7628224528894213786": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18333490976250555089": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12240359612725499137": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5214167856473943406": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "14052955765964466465": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7134511117843066284": ["convolution_gpu_bfyx_os_iyx_osv16", 246],
+        "4293870201735626607": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "7398517597116797925": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9788174666000966313": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18042814645135189475": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "11236623772616442479": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11418806742471661595": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "18007798448985514623": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9699952679060486545": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "579905583383428310": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "11102245529349471251": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10406248465333026906": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8032924116166179276": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7025366523000457929": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "4140229891062448185": ["convolution_gpu_bfyx_f16", 6],
+        "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12777387776061796777": ["convolution_gpu_bfyx_f16", 7],
+        "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5183001506630431534": ["convolution_gpu_bfyx_f16", 6],
+        "13244421635448480964": ["convolution_gpu_bfyx_f16", 6],
+        "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12722030162332410659": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11753505949184833814": ["convolution_gpu_bfyx_f16", 4],
+        "1138805437505853389": ["convolution_gpu_bfyx_f16", 8],
+        "236844015511730537": ["convolution_gpu_bfyx_f16", 7],
+        "16372093802852963117": ["convolution_gpu_bfyx_f16", 5],
+        "8017045013578597247": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3767812249447618647": ["convolution_gpu_bfyx_f16", 4],
+        "14300856950146697632": ["convolution_gpu_bfyx_f16", 7],
+        "6658791967844021067": ["convolution_gpu_bfyx_f16", 5],
+        "16114394473926845719": ["convolution_gpu_bfyx_f16", 3],
+        "3329703306452769429": ["convolution_gpu_bfyx_f16", 1],
+        "12485385390638720435": ["convolution_gpu_bfyx_f16", 1],
+        "18325147922902896614": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "2042543291306492227": ["convolution_gpu_bfyx_f16", 2],
+        "14016387396197131183": ["convolution_gpu_bfyx_f16", 8],
+        "388252829841919694": ["convolution_gpu_bfyx_f16", 1],
+        "3224136725591132250": ["convolution_gpu_bfyx_f16", 1],
+        "11555472669677513180": ["convolution_gpu_bfyx_f16", 1],
+        "5878699865486527531": ["convolution_gpu_bfyx_f16", 2],
+        "14726692927619009109": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10286586505667471565": ["convolution_gpu_bfyx_f16", 2],
+        "8635430703217243594": ["convolution_gpu_bfyx_f16", 5],
+        "3194668567618864343": ["convolution_gpu_bfyx_f16", 2],
+        "2611344153711817460": ["convolution_gpu_bfyx_f16", 1],
+        "11818558634104933451": ["convolution_gpu_bfyx_f16", 1],
+        "5390496664798965323": ["convolution_gpu_bfyx_f16", 5],
+        "13357658087174699785": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14394195619252967214": ["convolution_gpu_bfyx_f16", 5],
+        "4669930370801439013": ["convolution_gpu_bfyx_f16", 3],
+        "1370501593495473908": ["convolution_gpu_bfyx_f16", 7],
+        "4179197899143727062": ["convolution_gpu_bfyx_f16", 8],
+        "4150158815056302279": ["convolution_gpu_bfyx_f16", 2],
+        "7454737385785852953": ["convolution_gpu_bfyx_f16", 5],
+        "17477451880893252674": ["convolution_gpu_bfyx_os_iyx_osv16", 110],
+        "5768225444324661639": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "1885336536803061563": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5714742374217969073": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10896471338854021271": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3806914827253341543": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16632515980529050205": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7466530815481157347": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9025266984842296356": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5906873273896994744": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7802957391728955870": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9353411647951951678": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3747726337434740481": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5744097132927875811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16955287132696194727": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9395452164938581548": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11451950139903792107": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10917339102734674830": ["convolution_gpu_bfyx_1x1", 2],
+        "6989917785852753861": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15880522773125518978": ["convolution_gpu_bfyx_1x1", 1],
+        "747373540539235872": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1416762068965596282": ["convolution_gpu_bfyx_os_iyx_osv16", 110],
+        "13394998921545119351": ["convolution_gpu_bfyx_os_iyx_osv16", 208],
+        "15308583448258001619": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "3947469783167254911": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16513233390968673543": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "7953644920144486409": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12990194208171226009": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "212918306790163121": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12507063931340717634": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "944308351100913426": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10557004218031162358": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "15668589103337174848": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15795875095918663327": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "719423812642773465": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17406040551647193807": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "14418191404250235680": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "4355752766807245481": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8001877558898476628": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "5663755974835173519": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15515579892465814722": ["convolution_gpu_bfyx_os_iyx_osv16", 847],
+        "6228391894735143720": ["convolution_gpu_bfyx_os_iyx_osv16", 777],
+        "17619521756514112890": ["convolution_gpu_bfyx_os_iyx_osv16", 75],
+        "7618115892322102589": ["convolution_gpu_bfyx_os_iyx_osv16", 830],
+        "10859348180122457267": ["convolution_gpu_bfyx_os_iyx_osv16", 831],
+        "8920870418107208273": ["convolution_gpu_bfyx_os_iyx_osv16", 831],
+        "15455728969592248176": ["convolution_gpu_bfyx_os_iyx_osv16", 829],
+        "17332144919524270474": ["fully_connected_gpu_bf_io_gemm", 2],
+        "9513119231978452377": ["fully_connected_gpu_bf_io_gemm", 2],
+        "6332576636757295449": ["fully_connected_gpu_bf_io_gemm", 1],
+        "2493240824080071735": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4780210213847704316": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10506991028553025432": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5428553079642989652": ["convolution_gpu_bfyx_f16", 8],
+        "16121853713631575869": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1510497042951931323": ["convolution_gpu_bfyx_f16", 8],
+        "2745430731204028395": ["convolution_gpu_bfyx_f16", 8],
+        "12876976900388547418": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16890063210386281886": ["convolution_gpu_bfyx_f16", 8],
+        "3927810740679311711": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "16684473381571424732": ["convolution_gpu_bfyx_f16", 8],
+        "13284968934065954912": ["convolution_gpu_bfyx_f16", 8],
+        "2631762936534205094": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1739904418563128064": ["convolution_gpu_bfyx_f16", 8],
+        "10980290216903708719": ["convolution_gpu_bfyx_f16", 8],
+        "735103333401104515": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6983554020850996053": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5921617358811124053": ["convolution_gpu_bfyx_f16", 5],
+        "14301841882009814238": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15717838690804403986": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "12405925645446300036": ["convolution_gpu_bfyx_f16", 8],
+        "2258154151361692964": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "209570180062724480": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "28298051505236331": ["convolution_gpu_bfyx_f16", 3],
+        "5213864300694772508": ["convolution_gpu_bfyx_f16", 8],
+        "5762331369519287189": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2276871110978868522": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14409415690583079892": ["convolution_gpu_bfyx_f16", 8],
+        "1937137823574087575": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "852636453039879630": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5479982500377449068": ["convolution_gpu_bfyx_f16", 8],
+        "4498519811904474615": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12692499579789677851": ["convolution_gpu_bfyx_f16", 4],
+        "7387278268805782919": ["convolution_gpu_bfyx_f16", 6],
+        "2438123442946203226": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13258719154936933305": ["convolution_gpu_bfyx_f16", 8],
+        "18100501541133790185": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17588841654811790691": ["convolution_gpu_bfyx_f16", 8],
+        "6689548390020199537": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17024985107770974703": ["convolution_gpu_bfyx_f16", 8],
+        "9968353980515607037": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "17524096092737615017": ["convolution_gpu_bfyx_f16", 8],
+        "18114533502018520363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5255663532662079743": ["convolution_gpu_bfyx_f16", 8],
+        "7603883354122442155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "846394177044425685": ["convolution_gpu_bfyx_f16", 8],
+        "13144168456084150868": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11614500724316937770": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14902332370005427398": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13885328145647066921": ["convolution_gpu_bfyx_f16", 8],
+        "12145548657602787381": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13518278887400015765": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5205936493218798381": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15427181019330262398": ["convolution_gpu_bfyx_f16", 8],
+        "10959940286555804884": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8974050386876725444": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18162518237985989872": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10232477134289465267": ["convolution_gpu_bfyx_f16", 8],
+        "6212673407294495601": ["convolution_gpu_bfyx_f16", 8],
+        "17860712443740757354": ["convolution_gpu_bfyx_f16", 5],
+        "4146950753180366119": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8930966597498483291": ["convolution_gpu_bfyx_f16", 2],
+        "17992495365931215688": ["convolution_gpu_bfyx_f16", 8],
+        "7146066549311428539": ["convolution_gpu_bfyx_os_iyx_osv16", 810],
+        "3323834459803099675": ["convolution_gpu_bfyx_f16", 3],
+        "13970979686543548079": ["convolution_gpu_bfyx_f16", 5],
+        "17224261730179174234": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "8599384037400091771": ["convolution_gpu_bfyx_f16", 1],
+        "13691442749949008699": ["convolution_gpu_bfyx_f16", 1],
+        "18249370808689016771": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "9285872109762575050": ["convolution_gpu_bfyx_f16", 1],
+        "3778806780227752318": ["convolution_gpu_bfyx_f16", 2],
+        "14533995413631988093": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "18213608828525161872": ["convolution_gpu_bfyx_f16", 1],
+        "18201367360115946218": ["convolution_gpu_bfyx_f16", 7],
+        "2298930512642991761": ["convolution_gpu_bfyx_os_iyx_osv16", 417],
+        "7524439404315811688": ["convolution_gpu_bfyx_f16", 2],
+        "12807813148826993243": ["convolution_gpu_bfyx_f16", 2],
+        "10386449367791123777": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13710585118886614159": ["convolution_gpu_bfyx_f16", 5],
+        "5083137484520510189": ["convolution_gpu_bfyx_f16", 6],
+        "7310721662758245090": ["convolution_gpu_bfyx_os_iyx_osv16", 7],
+        "3441130252760474056": ["convolution_gpu_bfyx_f16", 3],
+        "17896513277322150873": ["convolution_gpu_bfyx_f16", 4],
+        "15804284488423654345": ["convolution_gpu_bfyx_os_iyx_osv16", 65],
+        "7664482438087213797": ["convolution_gpu_bfyx_f16", 8],
+        "11503348949826253367": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "8658460661236455347": ["convolution_gpu_bfyx_f16", 8],
+        "3330382517900635622": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17858565811787708662": ["convolution_gpu_bfyx_f16", 8],
+        "10812355035102461624": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1763713485502711028": ["convolution_gpu_bfyx_f16", 8],
+        "537708856180494652": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14039400995173243881": ["convolution_gpu_bfyx_f16", 8],
+        "16144549808790730407": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3607178347319840731": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1616172004620830694": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16914324612936017713": ["convolution_gpu_bfyx_f16", 8],
+        "5643847517719208248": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10544570776782304031": ["convolution_gpu_bfyx_f16", 8],
+        "10319250281140119656": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15493574903211227269": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8519850106535467914": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10879590057665696981": ["convolution_gpu_bfyx_f16", 8],
+        "14575912896890066295": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3580607955559330220": ["convolution_gpu_bfyx_f16", 8],
+        "11846250273112462539": ["convolution_gpu_bfyx_os_iyx_osv16", 160],
+        "5211338259118953367": ["convolution_gpu_bfyx_f16", 8],
+        "2534964735134768930": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13711299339844520924": ["convolution_gpu_bfyx_f16", 7],
+        "6904712232722172471": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "14274320868149065806": ["convolution_gpu_bfyx_f16", 1],
+        "10286169767282972888": ["convolution_gpu_bfyx_f16", 2],
+        "5778557207840627218": ["convolution_gpu_bfyx_os_iyx_osv16", 755],
+        "512957346409732922": ["convolution_gpu_bfyx_f16", 8],
+        "16606774042635630114": ["convolution_gpu_bfyx_f16", 3],
+        "2762113297856010960": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "16799551121569588878": ["convolution_gpu_bfyx_os_iyx_osv16", 4],
+        "3047328071912702715": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4614195525780772879": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "229690452505740843": ["convolution_gpu_bfyx_f16", 7],
+        "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13526783681740823304": ["binary_convolution_gpu_1x1", 0],
+        "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16642117060176841433": ["convolution_gpu_bfyx_f16", 6],
+        "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4859984633862705344": ["convolution_gpu_bfyx_f16", 8],
+        "6643541161570220487": ["convolution_gpu_bfyx_f16", 3],
+        "4771606875232577147": ["convolution_gpu_bfyx_f16", 8],
+        "10197214218719989238": ["convolution_gpu_bfyx_f16", 3],
+        "9021222698443352890": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13650156111024145576": ["convolution_gpu_bfyx_f16", 6],
+        "1823355198957173511": ["convolution_gpu_bfyx_f16", 2],
+        "18224832115562649218": ["convolution_gpu_bfyx_f16", 7],
+        "17989075751697734530": ["convolution_gpu_bfyx_f16", 2],
+        "11065625785330976824": ["convolution_gpu_bfyx_f16", 1],
+        "16508598944144723038": ["convolution_gpu_bfyx_f16", 2],
+        "18207737700426780826": ["convolution_gpu_bfyx_f16", 7],
+        "5739556172667922404": ["convolution_gpu_bfyx_f16", 2],
+        "5648963558592113654": ["convolution_gpu_bfyx_f16", 5],
+        "12700332665217178557": ["convolution_gpu_bfyx_f16", 4],
+        "2049835121645334394": ["convolution_gpu_bfyx_f16", 8],
+        "3127350206986146597": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "413340907950386667": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12169959708985325397": ["convolution_gpu_bfyx_f16", 8],
+        "15591038406687270109": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10924767171232495386": ["convolution_gpu_bfyx_f16", 8],
+        "13999571841387221249": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "5672447484681958632": ["convolution_gpu_bfyx_f16", 2],
+        "9409310337267359456": ["convolution_gpu_bfyx_f16", 8],
+        "12167511024377353453": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15893208324896471495": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "12208132924820452372": ["convolution_gpu_bfyx_f16", 8],
+        "3293314982357251400": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10021818023939833096": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "2473005239224541536": ["convolution_gpu_bfyx_f16", 8],
+        "6856119608423875377": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18255457417918645346": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "16665418645769386939": ["convolution_gpu_bfyx_f16", 8],
+        "10430922762683242901": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15110089333676343949": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "9475257013522373650": ["convolution_gpu_bfyx_f16", 8],
+        "10610536157845815072": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5557066335410910062": ["convolution_gpu_bfyx_f16", 8],
+        "18146293782255442927": ["convolution_gpu_bfyx_f16", 1],
+        "11852944538668620269": ["convolution_gpu_bfyx_f16", 3],
+        "14937682075916905713": ["convolution_gpu_bfyx_f16", 8],
+        "12792249796816770204": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17312037326967676576": ["convolution_gpu_bfyx_f16", 0],
+        "11909045540447457308": ["convolution_gpu_bfyx_f16", 8],
+        "10510946825189206241": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6124992432121468125": ["convolution_gpu_bfyx_f16", 2],
+        "14190999291985701693": ["convolution_gpu_bfyx_f16", 8],
+        "13598178470968135338": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9448354674053762309": ["convolution_gpu_bfyx_f16", 2],
+        "12777599919231312068": ["convolution_gpu_bfyx_f16", 8],
+        "9337614078096106084": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16578631161511759035": ["convolution_gpu_bfyx_f16", 1],
+        "6322333494387087177": ["convolution_gpu_bfyx_f16", 8],
+        "9051299669421439712": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6900406474100422151": ["convolution_gpu_bfyx_f16", 2],
+        "1770678726875883309": ["convolution_gpu_bfyx_f16", 8],
+        "9884897216756697592": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5833785867675066644": ["convolution_gpu_bfyx_f16", 1],
+        "16500610465961551242": ["convolution_gpu_bfyx_f16", 8],
+        "1390142483294581487": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10267854415205002238": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15623187792779892835": ["convolution_gpu_bfyx_f16", 2],
+        "13148059837896884273": ["convolution_gpu_bfyx_f16", 7],
+        "171559638613408493": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10278640368905105405": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10760404678801561747": ["convolution_gpu_bfyx_f16", 1],
+        "8548473413394744544": ["convolution_gpu_bfyx_f16", 6],
+        "16729171964149306867": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3716706098703025358": ["convolution_gpu_bfyx_f16", 1],
+        "129949756464977129": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "3542071367019145965": ["fused_conv_eltwise_gpu_ref", 1],
+        "6421891780685569059": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "12397973858549014447": ["convolution_gpu_bfyx_f16", 8],
+        "147576342753084622": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9006679435677596041": ["convolution_gpu_bfyx_f16", 8],
+        "375607190849326617": ["convolution_gpu_bfyx_f16", 8],
+        "12465373696424446749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3330746708867776870": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "175810741723366131": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1393228887151888661": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2791644023635315729": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14953047924930959040": ["convolution_gpu_bfyx_f16", 8],
+        "13576707834156737134": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17441797654332334591": ["convolution_gpu_bfyx_f16", 8],
+        "4368216880157087051": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4527131704372375891": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12532849387017200369": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6143746716136988129": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "229425834968700183": ["convolution_gpu_bfyx_f16", 5],
+        "14611155839967647053": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "12220017703888172171": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3017664565974342570": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3919092484794350954": ["convolution_gpu_bfyx_f16", 4],
+        "777704696687372198": ["convolution_gpu_bfyx_f16", 4],
+        "9530089245179389803": ["convolution_gpu_bfyx_os_iyx_osv16", 912],
+        "15407099455173114443": ["convolution_gpu_bfyx_f16", 8],
+        "13830673382612975715": ["convolution_gpu_bfyx_f16", 6],
+        "15418883453881678146": ["convolution_gpu_bfyx_f16", 4],
+        "18181217963990641003": ["convolution_gpu_bfyx_f16", 2],
+        "15712589409011660453": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "1196403115198061647": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "643101540653656807": ["convolution_gpu_bfyx_f16", 2],
+        "13215809871210781323": ["convolution_gpu_bfyx_f16", 2],
+        "18368779848570116967": ["convolution_gpu_bfyx_f16", 2],
+        "3544698174676763847": ["convolution_gpu_bfyx_os_iyx_osv16", 3],
+        "12428007544423412129": ["convolution_gpu_bfyx_f16", 6],
+        "4939325123575119544": ["convolution_gpu_bfyx_f16", 1],
+        "17947736981603570615": ["convolution_gpu_bfyx_f16", 2],
+        "6620861214152396614": ["convolution_gpu_bfyx_os_iyx_osv16", 6],
+        "8176114476658865003": ["convolution_gpu_bfyx_os_iyx_osv16", 1057],
+        "7408205445085068145": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11368781584821592726": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13893351700564465666": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3518310626820299509": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14411220648355431920": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16360948136590378689": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12603778068505548164": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7393554260847466099": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11640173157120764930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10391275203444358233": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12900060990097311151": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13949457796213177880": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12229727046452778843": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6356853913935067660": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2057724637751433123": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16951394780935673368": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4362905853733519089": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11437739738725998008": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1166763569766001639": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "13509884479614626207": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13561264673311456568": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13949179271064170300": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9085227279626009353": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "15999251370466034620": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8278218983765546430": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1283216388519834306": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10392839783862963669": ["convolution_gpu_bfyx_gemm_like", 2],
+        "446095524058497778": ["convolution_gpu_bfyx_os_iyx_osv16", 470],
+        "16418977525726114825": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5073696559530173773": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8986786677408239490": ["convolution_gpu_bfyx_os_iyx_osv16", 807],
+        "18154134293896237020": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9604863051097029874": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12931069967038668164": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6806199908367808607": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11683146685348965370": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8154297486284619437": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14336744408490491240": ["convolution_gpu_bfyx_os_iyx_osv16", 308],
+        "4571901717343198720": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6532394816830144120": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2666796249274140911": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "11653606109120321972": ["convolution_gpu_bfyx_os_iyx_osv16", 995],
+        "6204893434840435239": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "13218364348439640168": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10201555771333451359": ["convolution_gpu_bfyx_os_iyx_osv16", 110],
+        "6894773592689372049": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7168438768023636584": ["convolution_gpu_bfyx_f16", 8],
+        "10451355428354516953": ["convolution_gpu_bfyx_f16", 8],
+        "14472734042788843355": ["convolution_gpu_bfyx_f16", 8],
+        "10854104081943494369": ["convolution_gpu_bfyx_f16", 8],
+        "93020906459675429": ["convolution_gpu_bfyx_f16", 3],
+        "18398350909015256408": ["convolution_gpu_bfyx_f16", 8],
+        "4154340122141626612": ["convolution_gpu_bfyx_f16", 5],
+        "18200289027422735061": ["convolution_gpu_bfyx_f16", 3],
+        "5565357052205136958": ["convolution_gpu_bfyx_f16", 7],
+        "15946908544184249774": ["convolution_gpu_bfyx_f16", 8],
+        "14037627422329357174": ["convolution_gpu_bfyx_f16", 8],
+        "14408378031985995049": ["convolution_gpu_bfyx_f16", 7],
+        "13211513495214123892": ["convolution_gpu_bfyx_f16", 6],
+        "1496494589494248203": ["convolution_gpu_bfyx_f16", 8],
+        "17087805036943027743": ["convolution_gpu_bfyx_f16", 8],
+        "13247615789377163390": ["convolution_gpu_bfyx_f16", 6],
+        "5098352369763200627": ["convolution_gpu_bfyx_f16", 8],
+        "7557421223834089733": ["convolution_gpu_bfyx_gemm_like", 0],
+        "10657042057899091892": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2064129679519084519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13149626711154707837": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2450247775784772609": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "9349162934459662079": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13032204489661886072": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "9915338154088450212": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2204239160621715211": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449351266437601922": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "1155876454105658452": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10367977997774504988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9837317326715221119": ["convolution_gpu_bfyx_os_iyx_osv16", 373],
+        "10725269803461677890": ["convolution_gpu_bfyx_os_iyx_osv16", 1115],
+        "2877965337998085379": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5765037690630152391": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2731214798095843918": ["convolution_gpu_bfyx_os_iyx_osv16", 739],
+        "4432212871967601555": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "13674833960992369491": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2407729796226002219": ["convolution_gpu_bfyx_f16", 8],
+        "8712233195607754052": ["convolution_gpu_bfyx_f16", 8],
+        "18376338036643391330": ["convolution_gpu_bfyx_f16", 8],
+        "15000057703375682508": ["convolution_gpu_bfyx_f16", 8],
+        "11538380796610598086": ["convolution_gpu_bfyx_f16", 8],
+        "8807959414103299339": ["convolution_gpu_bfyx_f16", 8],
+        "8923709952861619751": ["convolution_gpu_bfyx_f16", 8],
+        "2281723979610106495": ["convolution_gpu_bfyx_f16", 3],
+        "7003402145984308994": ["convolution_gpu_bfyx_f16", 8],
+        "8802871881972169446": ["convolution_gpu_bfyx_f16", 8],
+        "17672255854769914684": ["convolution_gpu_bfyx_f16", 8],
+        "15989515952156087492": ["convolution_gpu_bfyx_f16", 8],
+        "12613916101209377956": ["convolution_gpu_bfyx_f16", 8],
+        "8926372099361990033": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13134908817293730842": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14010406343040661271": ["convolution_gpu_bfyx_f16", 8],
+        "2325094934617563483": ["convolution_gpu_bfyx_f16", 8],
+        "11756769107875909669": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8711172943068374489": ["convolution_gpu_bfyx_f16", 8],
+        "1909118584082415877": ["convolution_gpu_bfyx_f16", 8],
+        "12040023093627702264": ["convolution_gpu_bfyx_f16", 8],
+        "6474623094910171017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6747189810752747337": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "12216744913496272224": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4342399258032747578": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5084349834068342816": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "12786257902562938666": ["convolution_gpu_bfyx_os_iyx_osv16", 372],
+        "8284243114775216351": ["convolution_gpu_bfyx_os_iyx_osv16", 1103],
+        "17588749900110806571": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15418915313718368321": ["convolution_gpu_bfyx_os_iyx_osv16", 1097],
+        "763194125654617818": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5660634357872541998": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16012873046323424192": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "3722402584962183950": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9761723873626289438": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4696863372127622823": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "3364509432107392704": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "17187804634689894363": ["convolution_gpu_bfyx_os_iyx_osv16", 727],
+        "10681521954706351183": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4147438820393951383": ["convolution_gpu_bfyx_os_iyx_osv16", 727],
+        "11120743380724204067": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9774801800070756895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1998618394547230268": ["convolution_gpu_bfyx_os_iyx_osv16", 751],
+        "5638081054417809107": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16704551377771794086": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11414353004383751891": ["convolution_gpu_bfyx_f16", 8],
+        "13826353934358977360": ["convolution_gpu_bfyx_f16", 8],
+        "12571951090832825431": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12750018695410865011": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6036780184043053863": ["convolution_gpu_bfyx_f16", 8],
+        "6704445240879304751": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6059617597062194696": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "17195686088514144017": ["convolution_gpu_bfyx_f16", 8],
+        "6228695761133876306": ["convolution_gpu_bfyx_f16", 8],
+        "1875177778795651060": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1020688071038165625": ["convolution_gpu_bfyx_f16", 8],
+        "11609278929695762477": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11618496013484392127": ["convolution_gpu_bfyx_f16", 7],
+        "9467068612251977759": ["convolution_gpu_bfyx_f16", 8],
+        "11965876788458629557": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1968426148563107280": ["convolution_gpu_bfyx_f16", 7],
+        "5809259008840872032": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16891389262193208125": ["convolution_gpu_bfyx_f16", 8],
+        "17789658392895927080": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13756435969613742897": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "3851698237626497000": ["convolution_gpu_bfyx_os_iyx_osv16", 993],
+        "13501391260376277367": ["convolution_gpu_bfyx_os_iyx_osv16", 993],
+        "1765482196017051011": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12502159939277602017": ["convolution_gpu_bfyx_f16", 3],
+        "16051024745177409774": ["convolution_gpu_bfyx_f16", 4],
+        "13606942804997151903": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4351281046292319725": ["convolution_gpu_bfyx_f16", 4],
+        "6443607999496148234": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13547488471348547459": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18238745366827633559": ["convolution_gpu_bfyx_os_iyx_osv16", 82],
+        "11158062361663031443": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "9206861055140649226": ["convolution_gpu_bfyx_f16", 6],
+        "9426001650092504798": ["convolution_gpu_bfyx_f16", 8],
+        "13181672943699248834": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "6339523663850142246": ["convolution_gpu_bfyx_os_iyx_osv16", 416],
+        "1471109004832880586": ["convolution_gpu_bfyx_f16", 3],
+        "17107284393334082714": ["convolution_gpu_bfyx_f16", 6],
+        "13719359892110227962": ["convolution_gpu_bfyx_os_iyx_osv16", 6],
+        "9464351599302771690": ["convolution_gpu_bfyx_os_iyx_osv16", 383],
+        "2096653216949318450": ["convolution_gpu_bfyx_f16", 8],
+        "13025913519962707885": ["convolution_gpu_bfyx_f16", 8],
+        "17728310140731150226": ["convolution_gpu_bfyx_os_iyx_osv16", 6],
+        "5737189353417573057": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "3213984700185874261": ["convolution_gpu_bfyx_f16", 8],
+        "10740106091021667886": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "13362856801855126628": ["convolution_gpu_bfyx_os_iyx_osv16", 41],
+        "5477965717233241895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13669762279828807941": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11383807956757990177": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660099130061496863": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17151683028720387864": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1859914910272455189": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7396998153023492339": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2008700175670389343": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16827869183124732303": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13120889385491477637": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18305507733019922935": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4387964680811897490": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9490382148010824252": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7607585452987307694": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6647358668213164168": ["convolution_gpu_bfyx_os_iyx_osv16", 315],
+        "3269426835760928022": ["convolution_gpu_bfyx_os_iyx_osv16", 318],
+        "8407302923973070317": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7392260165026897157": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17129583679506972654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15394113208725741887": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2232515974555590822": ["convolution_gpu_bfyx_os_iyx_osv16", 760],
+        "10924517066879469764": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16962109663829219905": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12727830299177939535": ["convolution_gpu_bfyx_f16", 8],
+        "14199062222704041939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10076860909609577057": ["convolution_gpu_bfyx_f16", 8],
+        "1776138842548256617": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12080107273581243331": ["convolution_gpu_bfyx_f16", 8],
+        "17797545214985482309": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1422959599890390628": ["convolution_gpu_bfyx_f16", 8],
+        "9838313987238017367": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5055964951388373312": ["convolution_gpu_bfyx_f16", 8],
+        "9954422981575375090": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6900498541045596449": ["convolution_gpu_bfyx_f16", 8],
+        "10947987508463792407": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8011212857567850331": ["convolution_gpu_bfyx_f16", 8],
+        "12066306068956923073": ["convolution_gpu_bfyx_f16", 8],
+        "10402257611113721897": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16298426629186155976": ["convolution_gpu_bfyx_f16", 8],
+        "5849544612077982343": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7486977943442929227": ["convolution_gpu_bfyx_f16", 8],
+        "4669548232510373224": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11178064889018543448": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "14338047015194840420": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11113611129372516159": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7571325526315806090": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "16051125771881231197": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "5500003724328450643": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "6990517414810688521": ["fully_connected_gpu_bf_io_gemm", 2],
+        "3365158575268504690": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15532688375958629736": ["convolution_gpu_bfyx_os_iyx_osv16", 365],
+        "7172667569652614272": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "9852378413482765633": ["convolution_gpu_bfyx_gemm_like", 0],
+        "17891867756237002865": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "8518413618774363848": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "3035058890807107503": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10522964111588366077": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5091533143160590449": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 742],
+        "18403842741213451915": ["convolution_gpu_bfyx_os_iyx_osv16", 222],
+        "16363667733973120518": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17885073348446455401": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "11022756012642936369": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11139267075730841649": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11229901418407413996": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5222025157174261438": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "17449182536559459768": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6297704420477135889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11108691276983929466": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "17214808446370850848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10308273010954959421": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15889539072687412294": ["convolution_gpu_bfyx_gemm_like", 2],
+        "543890610580810398": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1039],
+        "10792988210112094339": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "7408203620228473987": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16676023485427668788": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1037],
+        "947940965229080670": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 656],
+        "13790640092608885830": ["convolution_gpu_bfyx_os_iyx_osv16", 951],
+        "6545311138362761303": ["convolution_gpu_bfyx_os_iyx_osv16", 548],
+        "5305325292949121227": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "12206315739377842316": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15794321689897308881": ["convolution_gpu_bfyx_f16", 5],
+        "5484125953239615763": ["convolution_gpu_bfyx_f16", 5],
+        "2757551509240446139": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18162571474251370775": ["convolution_gpu_bfyx_f16", 8],
+        "18166598730010472057": ["convolution_gpu_bfyx_f16", 7],
+        "9693459623757611016": ["convolution_gpu_bfyx_f16", 7],
+        "5728119660273315956": ["convolution_gpu_bfyx_f16", 5],
+        "11239914102833617438": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "12459704794510442759": ["convolution_gpu_bfyx_f16", 6],
+        "10085932287585840621": ["convolution_gpu_bfyx_f16", 6],
+        "4683959402324362591": ["convolution_gpu_bfyx_f16", 6],
+        "601777369358795451": ["convolution_gpu_bfyx_f16", 8],
+        "18164526837814844607": ["convolution_gpu_bfyx_f16", 8],
+        "10891482236028483911": ["convolution_gpu_bfyx_f16", 7],
+        "2288431871961311886": ["convolution_gpu_bfyx_f16", 6],
+        "8042721734241214802": ["convolution_gpu_bfyx_f16", 2],
+        "2099403897129551255": ["convolution_gpu_bfyx_f16", 6],
+        "13249070386604821754": ["convolution_gpu_bfyx_f16", 8],
+        "13094552025197588032": ["convolution_gpu_bfyx_f16", 7],
+        "3033228150494649847": ["convolution_gpu_bfyx_f16", 4],
+        "1886675028572526491": ["convolution_gpu_bfyx_f16", 6],
+        "13297546803430310514": ["convolution_gpu_bfyx_f16", 8],
+        "7841875474696309399": ["convolution_gpu_bfyx_f16", 7],
+        "18152244993328643321": ["convolution_gpu_bfyx_f16", 8],
+        "6360926220193053423": ["convolution_gpu_bfyx_f16", 6],
+        "17119700657499960250": ["convolution_gpu_bfyx_os_iyx_osv16", 44],
+        "10899267078041093597": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "11165701472241951833": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3582634693373659847": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "3334026180071867610": ["convolution_gpu_bfyx_os_iyx_osv16", 844],
+        "5443310231181579928": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17203265678149575116": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "18001153514387944483": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "14678448066677992909": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1472673738079022921": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "9210929274479838540": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "14052560267577031250": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "11761558075765102945": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2994573423350313291": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "6446696801960621776": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "4993668527725303377": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16681164889734441913": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "5870803719794486347": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "5252877195442523975": ["convolution_gpu_bfyx_os_iyx_osv16", 202],
+        "17366351824112539739": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4000739627265205773": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "3021897915458395756": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "3587400134318800957": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "899884405480315978": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "16109177282570031068": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 134],
+        "4102196194477012012": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "6443971566937312874": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5730812864956211386": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1037],
+        "10903630002990314118": ["convolution_gpu_bfyx_f16", 8],
+        "14792133935314535772": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "134764196422339946": ["convolution_gpu_bfyx_f16", 8],
+        "7649098040464263012": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8261353883745708993": ["convolution_gpu_bfyx_f16", 8],
+        "14729283399254215184": ["convolution_gpu_bfyx_f16", 8],
+        "15972034366129164791": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6089665236185789777": ["convolution_gpu_bfyx_f16", 8],
+        "13871746223287309461": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10063803553810811685": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1872584393135018560": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5428450090197909187": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16371170442503065678": ["convolution_gpu_bfyx_f16", 8],
+        "4237307788889339587": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6443689845617564164": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1311581305426450842": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12519308309976060263": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9615782627992922213": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8051063619232397665": ["convolution_gpu_bfyx_f16", 8],
+        "10987585104127812498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7131640551183167105": ["convolution_gpu_bfyx_f16", 5],
+        "17528260968382789267": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "3692483328113186067": ["convolution_gpu_bfyx_f16", 8],
+        "7694255321069379488": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3545269441923145336": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10643380013749923489": ["convolution_gpu_bfyx_f16", 4],
+        "13821946704646192935": ["convolution_gpu_bfyx_f16", 8],
+        "7150971004919685584": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7555796481960570354": ["convolution_gpu_bfyx_f16", 8],
+        "1802080211194796745": ["convolution_gpu_bfyx_f16", 3],
+        "11278616463993391107": ["convolution_gpu_bfyx_f16", 1],
+        "4522486456498017325": ["convolution_gpu_bfyx_f16", 2],
+        "10839722921299529226": ["convolution_gpu_bfyx_f16", 4],
+        "12049793935704273778": ["convolution_gpu_bfyx_f16", 2],
+        "11167394660860618324": ["convolution_gpu_bfyx_f16", 2],
+        "10589914405539478974": ["convolution_gpu_bfyx_f16", 2],
+        "1873986292070678779": ["convolution_gpu_bfyx_f16", 8],
+        "2954053167638478731": ["convolution_gpu_bfyx_f16", 7],
+        "4151068961170605556": ["convolution_gpu_bfyx_f16", 5],
+        "430695072683807402": ["convolution_gpu_bfyx_os_iyx_osv16", 380],
+        "2475680330312153399": ["convolution_gpu_bfyx_os_iyx_osv16", 380],
+        "4814090476154320391": ["convolution_gpu_bfyx_gemm_like", 2],
+        "401208792095350972": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12980211839763569977": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6011669866574390388": ["fully_connected_gpu_fb_io_ref", 1],
+        "4568334008414745667": ["fully_connected_gpu_fb_oi_ref", 1],
+        "11395215181578068623": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10720631808458688474": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2668670046934680180": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1852843918994539642": ["convolution_gpu_bfyx_gemm_like", 2],
+        "609944608610496003": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2491010747718166234": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18310729590270667665": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18061582718156557458": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3957386760515436702": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "18218313235608627889": ["convolution_gpu_bfyx_os_iyx_osv16", 1048],
+        "16069469614549557651": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13635064319608016375": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "12955977963529216714": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9000599407449073799": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "4538238288532448191": ["convolution_gpu_bfyx_os_iyx_osv16", 286],
+        "5772569803234537608": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "12841353805697309892": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6327608958004075948": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10481749345430191494": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3465618418555443152": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6220132353152696371": ["convolution_gpu_bfyx_os_iyx_osv16", 471],
+        "767822057476164981": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9047957325396112699": ["convolution_gpu_bfyx_os_iyx_osv16", 262],
+        "4356441299961129632": ["convolution_gpu_bfyx_os_iyx_osv16", 259],
+        "10144632434338007132": ["convolution_gpu_bfyx_os_iyx_osv16", 637],
+        "15158722447225497040": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14636891429613595743": ["convolution_gpu_bfyx_os_iyx_osv16", 1014],
+        "10686925946858146532": ["convolution_gpu_bfyx_os_iyx_osv16", 255],
+        "8212789547545225423": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "11769756626318373236": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5110309993577022127": ["convolution_gpu_bfyx_os_iyx_osv16", 834],
         "10298865798559508430": ["convolution_gpu_bfyx_gemm_like", 2],
         "8036745915261696332": ["convolution_gpu_bfyx_gemm_like", 2],
         "10569376024770516176": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
         "10092988206358215006": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
         "14474098620251910263": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
         "5986743242501081071": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
-        "9267895309185863228": ["convolution_gpu_bfyx_os_iyx_osv16", 1]
+        "9267895309185863228": ["convolution_gpu_bfyx_os_iyx_osv16", 1],
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5981205170754513046": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3025829117046314851": ["convolution_gpu_bfyx_os_iyx_osv16", 107],
+        "14600403613863348033": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9466249274834206569": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17167052658616496904": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "758159154291645307": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "14555191501995137081": ["fully_connected_gpu_bf_io_gemm", 1],
+        "12700938470888412097": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "4143776775548070480": ["convolution_gpu_bfyx_os_iyx_osv16", 1055],
+        "1941626906605395126": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17730451527258141168": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1132589293248085972": ["convolution_gpu_bfyx_os_iyx_osv16", 721],
+        "9751859564693419826": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14126491856050876512": ["convolution_gpu_bfyx_gemm_like", 0]
     },
     "48": {
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5981205170754513046": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3025829117046314851": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "14600403613863348033": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9466249274834206569": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17167052658616496904": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "758159154291645307": ["fully_connected_gpu_bf_io_gemm", 2],
+        "14555191501995137081": ["fully_connected_gpu_bf_io_gemm", 1],
+        "3057483147285040704": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4460662214292495759": ["convolution_gpu_bfyx_f16", 8],
+        "17632851940131114495": ["convolution_gpu_bfyx_f16", 8],
+        "7945867532035693686": ["convolution_gpu_bfyx_f16", 8],
+        "10798155343477437060": ["convolution_gpu_bfyx_f16", 8],
+        "14191150640021059705": ["convolution_gpu_bfyx_f16", 8],
+        "14593228968660512118": ["convolution_gpu_bfyx_f16", 8],
+        "5573515532668433114": ["convolution_gpu_bfyx_f16", 8],
+        "11642934660277782628": ["convolution_gpu_bfyx_f16", 8],
+        "4825553592910970555": ["convolution_gpu_bfyx_f16", 8],
+        "17245530055973419690": ["convolution_gpu_bfyx_f16", 8],
+        "14644519840111409049": ["convolution_gpu_bfyx_f16", 8],
+        "15093112872571669071": ["convolution_gpu_bfyx_f16", 8],
+        "6172925429706792586": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16888914688498810916": ["convolution_gpu_bfyx_f16", 8],
+        "7094210524110336636": ["convolution_gpu_bfyx_f16", 8],
+        "1102719880087191972": ["convolution_gpu_bfyx_f16", 8],
+        "17635368969132641763": ["convolution_gpu_bfyx_f16", 8],
+        "6444855710931944326": ["convolution_gpu_bfyx_f16", 8],
+        "3685203889040861337": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8983142397488339162": ["convolution_gpu_bfyx_f16", 8],
+        "2942771097961823034": ["convolution_gpu_bfyx_f16", 8],
+        "16912834065670733738": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2419223013209835757": ["convolution_gpu_bfyx_os_iyx_osv16", 1040],
+        "11179881900554989521": ["convolution_gpu_bfyx_f16", 8],
+        "16511126264743737451": ["convolution_gpu_bfyx_f16", 6],
+        "10100289629103173958": ["convolution_gpu_bfyx_os_iyx_osv16", 668],
+        "9258363108725341315": ["convolution_gpu_bfyx_f16", 7],
+        "13334138861096017540": ["convolution_gpu_bfyx_f16", 8],
+        "6513616579637283618": ["convolution_gpu_bfyx_f16", 7],
+        "881483878813237044": ["convolution_gpu_bfyx_f16", 6],
+        "9696420455787045679": ["convolution_gpu_bfyx_f16", 8],
+        "7480696988694183789": ["convolution_gpu_bfyx_f16", 8],
+        "9560848299493464065": ["convolution_gpu_bfyx_f16", 8],
+        "4670244085889208769": ["convolution_gpu_bfyx_f16", 8],
+        "11349612635173553035": ["convolution_gpu_bfyx_f16", 8],
+        "6259794269666057674": ["convolution_gpu_bfyx_f16", 8],
+        "5786551708845072629": ["convolution_gpu_bfyx_f16", 8],
+        "16619951395310930207": ["convolution_gpu_bfyx_f16", 8],
+        "3173655881192997611": ["convolution_gpu_bfyx_f16", 8],
+        "6211510258514141464": ["convolution_gpu_bfyx_f16", 3],
+        "14941982212174570311": ["convolution_gpu_bfyx_f16", 8],
+        "11364624703533653571": ["convolution_gpu_bfyx_f16", 8],
+        "338313831905889757": ["convolution_gpu_bfyx_f16", 4],
+        "13154424438571292174": ["convolution_gpu_bfyx_f16", 8],
+        "14845639704528269654": ["convolution_gpu_bfyx_f16", 8],
+        "12200202041476611175": ["convolution_gpu_bfyx_f16", 4],
+        "14166499608250271507": ["convolution_gpu_bfyx_f16", 8],
+        "13694208494559240243": ["convolution_gpu_bfyx_f16", 8],
+        "14476260143987433871": ["convolution_gpu_bfyx_f16", 4],
+        "6145395374917324923": ["convolution_gpu_bfyx_f16", 4],
+        "2094686947151722271": ["convolution_gpu_bfyx_os_iyx_osv16", 75],
+        "11589833946098195323": ["convolution_gpu_bfyx_os_iyx_osv16", 39],
+        "11775116692122787310": ["convolution_gpu_bfyx_os_iyx_osv16", 391],
+        "570493430126610249": ["fully_connected_gpu_bf_io_gemm", 2],
+        "17743072683947532579": ["fully_connected_gpu_bf_io_gemm", 1],
+        "18382443157447369363": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5689213766720451736": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11153755804932874939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13074790088623248655": ["convolution_gpu_bfyx_f16", 6],
+        "14552950763379636885": ["convolution_gpu_bfyx_f16", 7],
+        "1094600023872583173": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16827633753490728058": ["convolution_gpu_bfyx_f16", 8],
+        "6130516122331504865": ["convolution_gpu_bfyx_f16", 8],
+        "7670629548971090825": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5029322578170351026": ["convolution_gpu_bfyx_f16", 8],
+        "11682717086936489649": ["convolution_gpu_bfyx_f16", 8],
+        "9372644596618467274": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14183733053550126939": ["convolution_gpu_bfyx_f16", 7],
+        "5642981720905097704": ["convolution_gpu_bfyx_f16", 8],
+        "3924580903671169312": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17700105511171786728": ["convolution_gpu_bfyx_f16", 6],
+        "14998223809620050073": ["convolution_gpu_bfyx_f16", 8],
+        "419201770890811765": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7063350782589593425": ["convolution_gpu_bfyx_f16", 6],
+        "10687898799916833174": ["convolution_gpu_bfyx_f16", 6],
+        "5341504900604548311": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8082311845702095517": ["convolution_gpu_bfyx_f16", 7],
+        "5769891345892528049": ["convolution_gpu_bfyx_f16", 6],
+        "5034821474694053994": ["convolution_gpu_bfyx_f16", 8],
+        "2717532297792072749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "368628635269408785": ["convolution_gpu_bfyx_f16", 7],
+        "10159612784755046280": ["convolution_gpu_bfyx_f16", 8],
+        "15051374440521170869": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17031332595095892437": ["convolution_gpu_bfyx_f16", 4],
+        "6938198718430530942": ["convolution_gpu_bfyx_f16", 8],
+        "2358029178760210430": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16131007134197705525": ["convolution_gpu_bfyx_f16", 4],
+        "6612035874395100788": ["convolution_gpu_bfyx_f16", 6],
+        "15022677981959490269": ["convolution_gpu_bfyx_f16", 7],
+        "11900509609879947992": ["convolution_gpu_bfyx_f16", 5],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3968445701280656378": ["convolution_gpu_bfyx_f16", 8],
+        "7463742252314920613": ["convolution_gpu_bfyx_f16", 8],
+        "17747065651432157057": ["convolution_gpu_bfyx_f16", 8],
+        "2951437417233062866": ["convolution_gpu_bfyx_f16", 8],
+        "4695031178096669813": ["convolution_gpu_bfyx_f16", 8],
+        "13200791011072363046": ["convolution_gpu_bfyx_f16", 8],
+        "7702483443698911725": ["convolution_gpu_bfyx_f16", 8],
+        "3225276687886679210": ["convolution_gpu_bfyx_f16", 8],
+        "8406061878298060171": ["convolution_gpu_bfyx_f16", 8],
+        "11861948300376902542": ["convolution_gpu_bfyx_f16", 8],
+        "18047654118875021903": ["convolution_gpu_bfyx_f16", 8],
+        "3876838946012690078": ["convolution_gpu_bfyx_f16", 7],
+        "11532811324432477051": ["convolution_gpu_bfyx_f16", 7],
+        "16482627014547828135": ["convolution_gpu_bfyx_f16", 6],
+        "4565106422618308814": ["convolution_gpu_bfyx_f16", 7],
+        "16991433003318725315": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "16286683168753184722": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7074368169815304627": ["convolution_gpu_bfyx_os_iyx_osv16", 118],
+        "10702490327714920783": ["convolution_gpu_bfyx_gemm_like", 2],
+        "964168479107166949": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6252510766878541979": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1012052068628903875": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15499166167392043521": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14327383763442344255": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18371627210590255356": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13185859115957551268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15052792752810689842": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17918808521142517830": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1644157325342654261": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12198018126650448419": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9714393675511550323": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4928366179227934688": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15361605271135812199": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10267714663732575502": ["convolution_gpu_bfyx_1x1", 1],
+        "9990965405769569785": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10493403039286551634": ["convolution_gpu_bfyx_1x1", 1],
+        "18324310183763016728": ["convolution_gpu_bfyx_os_iyx_osv16", 376],
+        "6002923098500991259": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3429780644945779272": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6005067060818453503": ["convolution_gpu_bfyx_f16", 8],
+        "3676547304316346974": ["convolution_gpu_bfyx_f16", 8],
+        "8412675332215210248": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14477382651380138146": ["convolution_gpu_bfyx_f16", 8],
+        "15899888589766240554": ["convolution_gpu_bfyx_f16", 8],
+        "4529376177404929890": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "7210896246223636810": ["convolution_gpu_bfyx_f16", 8],
+        "2775471071662652034": ["convolution_gpu_bfyx_f16", 8],
+        "17132456912135683375": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15563691660506818555": ["convolution_gpu_bfyx_f16", 6],
+        "9997402509928965207": ["convolution_gpu_bfyx_f16", 8],
+        "7793754164423097155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4639865771698877244": ["convolution_gpu_bfyx_f16", 6],
+        "1766192115208251594": ["convolution_gpu_bfyx_f16", 8],
+        "2015853414727933068": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10306264176864957825": ["convolution_gpu_bfyx_f16", 7],
+        "4871044181497936479": ["convolution_gpu_bfyx_f16", 6],
+        "8396548857016837452": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12714653556587252941": ["convolution_gpu_bfyx_f16", 6],
+        "1967886437456544865": ["convolution_gpu_bfyx_f16", 6],
+        "11350907923254547441": ["convolution_gpu_bfyx_f16", 7],
+        "12282274184666824734": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16866941685634953173": ["convolution_gpu_bfyx_f16", 6],
+        "6312283149621718315": ["convolution_gpu_bfyx_f16", 7],
+        "9795822066940245604": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7256380059517365529": ["convolution_gpu_bfyx_f16", 3],
+        "11966909558503849515": ["convolution_gpu_bfyx_f16", 8],
+        "11277466712159791917": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4992371260504969141": ["convolution_gpu_bfyx_f16", 6],
+        "15043181455492553716": ["convolution_gpu_bfyx_f16", 7],
+        "8399107263382557054": ["convolution_gpu_bfyx_f16", 8],
+        "6350452055467384023": ["convolution_gpu_bfyx_f16", 6],
+        "14026570177552137240": ["convolution_gpu_bfyx_os_iyx_osv16", 856],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like", 2],
+        "641417817126876622": ["convolution_gpu_bfyx_os_iyx_osv16", 104],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16", 132],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16", 717],
+        "4917807560042671575": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "44341776758472069": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "3584869801682702110": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "13032463401326344281": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "12074020528214820344": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "10792368710075698135": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "14773903272136532468": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "4459329337183571568": ["convolution_gpu_bfyx_os_iyx_osv16", 906],
+        "17247158622529817069": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "15248304664655540462": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "8737603244374483727": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "6375630142791083064": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16951442326148701883": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "8824140014793073324": ["convolution_gpu_bfyx_os_iyx_osv16", 616],
+        "6420666457275061685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18191060893922845906": ["convolution_gpu_bfyx_os_iyx_osv16", 248],
+        "4914314319075651246": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2995522243104361971": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12727854191946007642": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 330],
+        "3260693384502698965": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8599674766060889778": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8021852643758937492": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 329],
+        "2492924011838985637": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1085],
+        "4309855944835724499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14741878965259218163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180612484034524170": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13300287078635373813": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13378751364754764186": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6449257695177020930": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "17627392788011440461": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "13831493475156855535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16483429728914404238": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 734],
+        "3860080842190932938": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12349486511618981663": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "15798538366019336375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17142061595610833587": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "413520381980740601": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 661],
+        "15678637644328155655": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "6526747512277607691": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16117940336643166742": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5991582579063082343": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "3294597200237228703": ["convolution_gpu_bfyx_os_iyx_osv16", 663],
+        "16191151963860109032": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4092109744625924274": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4849563739505810631": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3411824370004173602": ["convolution_gpu_bfyx_os_iyx_osv16", 246],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "15344685054531225492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14837032904820198149": ["convolution_gpu_bfyx_f16", 8],
+        "14191080790860851837": ["convolution_gpu_bfyx_f16", 8],
+        "17023834849779428858": ["convolution_gpu_bfyx_f16", 8],
+        "3329139872094988661": ["convolution_gpu_bfyx_f16", 8],
+        "4450424283454693457": ["convolution_gpu_bfyx_f16", 5],
+        "6264730897461114496": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16058636937964624617": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "499215221217528434": ["convolution_gpu_bfyx_f16", 8],
+        "14655897748934541342": ["convolution_gpu_bfyx_f16", 8],
+        "15662090780385020537": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7311728100823416883": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7221666363928264914": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "949611108582310305": ["convolution_gpu_bfyx_f16", 7],
+        "398119457330194405": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18306921825426259074": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14045661362966364917": ["convolution_gpu_bfyx_f16", 8],
+        "11211712695622132026": ["convolution_gpu_bfyx_f16", 8],
+        "13777550841624006577": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4765385132115618850": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "16898905631497333152": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2688905295933725456": ["convolution_gpu_bfyx_f16", 7],
+        "10325568251605243952": ["convolution_gpu_bfyx_f16", 8],
+        "4697609485293892109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15761571492230997960": ["convolution_gpu_bfyx_f16", 6],
+        "10403493618856101043": ["convolution_gpu_bfyx_f16", 7],
+        "15694677292906293678": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11385013883660304429": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8155797389244290087": ["convolution_gpu_bfyx_f16", 5],
+        "16706121580364790904": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "5495776091407365966": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5673972310424776040": ["convolution_gpu_bfyx_os_iyx_osv16", 238],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16", 483],
+        "4305463771822108179": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "2079318718874681198": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "17439941375453858836": ["convolution_gpu_bfyx_os_iyx_osv16", 1042],
+        "12467583825605788345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9058857190661793339": ["fused_conv_eltwise_gpu_ref", 2],
+        "11620974866622716017": ["fused_conv_eltwise_gpu_ref", 0],
+        "8857009061371774666": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5756084360647965669": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3975438095352877013": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3345987020362642539": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16755500582498207386": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1636861132129961823": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9793091808041097161": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2080318501154291605": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "13813582937323882369": ["fully_connected_gpu_bf_io_ref", 1],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16", 680],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "2877876834438717783": ["convolution_gpu_bfyx_os_iyx_osv16", 949],
+        "9156649014297448284": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "13660470643303663441": ["convolution_gpu_bfyx_os_iyx_osv16", 909],
+        "8081997809574506331": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "8199400320947837516": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11460891889180307970": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5643924526605879168": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14198463555297179999": ["convolution_gpu_bfyx_f16", 8],
+        "9820219997540294747": ["convolution_gpu_bfyx_os_iyx_osv16", 1071],
+        "16598220433310484103": ["convolution_gpu_bfyx_f16", 8],
+        "13332579082252874358": ["convolution_gpu_bfyx_os_iyx_osv16", 727],
+        "10148956417804060854": ["convolution_gpu_bfyx_f16", 8],
+        "16052199780545784176": ["convolution_gpu_bfyx_f16", 8],
+        "17284989371701058847": ["convolution_gpu_bfyx_os_iyx_osv16", 351],
+        "18186300610687882698": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17323620992879479455": ["convolution_gpu_bfyx_f16", 8],
+        "10782643446733040985": ["convolution_gpu_bfyx_f16", 8],
+        "3080843366919845836": ["convolution_gpu_bfyx_os_iyx_osv16", 1068],
+        "16898206352994894714": ["convolution_gpu_bfyx_f16", 8],
+        "17502393571772755646": ["convolution_gpu_bfyx_os_iyx_osv16", 438],
+        "12982233543299343225": ["convolution_gpu_bfyx_os_iyx_osv16", 807],
+        "5609871805820255743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "7971259885907841252": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15956442448148612253": ["convolution_gpu_bfyx_os_iyx_osv16", 1062],
+        "7600980811977404651": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "6051578359778554994": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "14591236937522474591": ["convolution_gpu_bfyx_os_iyx_osv16", 751],
+        "380671738106280681": ["convolution_gpu_bfyx_os_iyx_osv16", 1122],
+        "16581313033870107357": ["convolution_gpu_bfyx_os_iyx_osv16", 314],
+        "15132868076468531540": ["convolution_gpu_bfyx_os_iyx_osv16", 347],
+        "4964421818619633295": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "5206589624074157418": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "15804259593852912096": ["convolution_gpu_bfyx_os_iyx_osv16", 573],
+        "9667577643691138471": ["convolution_gpu_bfyx_f16", 8],
+        "9587296295017154035": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "624896425985287215": ["convolution_gpu_bfyx_f16", 8],
+        "13698491289625410930": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "5501294609610168354": ["convolution_gpu_bfyx_f16", 8],
+        "14684726385174603824": ["convolution_gpu_bfyx_f16", 8],
+        "3538746967389669479": ["convolution_gpu_bfyx_os_iyx_osv16", 679],
+        "5442728869442056950": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17446903112723559991": ["convolution_gpu_bfyx_f16", 8],
+        "17314761693722740561": ["convolution_gpu_bfyx_f16", 8],
+        "12956681231908531328": ["convolution_gpu_bfyx_os_iyx_osv16", 1013],
+        "17836528995874415642": ["convolution_gpu_bfyx_f16", 8],
+        "8896717627818724430": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "14716947061630316041": ["convolution_gpu_bfyx_os_iyx_osv16", 806],
+        "9735141117399046903": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18366465884925728820": ["convolution_gpu_bfyx_os_iyx_osv16", 632],
+        "17388129439366166721": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "13724220569112734431": ["convolution_gpu_bfyx_os_iyx_osv16", 596],
+        "5529587475911632254": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "11660089067798953391": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "5181665423821543629": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "8048807352445331657": ["convolution_gpu_bfyx_os_iyx_osv16", 312],
+        "3470485673426524224": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "3135008557801015427": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "14966985685297154154": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2035874178080637954": ["convolution_gpu_bfyx_os_iyx_osv16", 687],
+        "5013120291092844103": ["convolution_gpu_bfyx_os_iyx_osv16", 745],
+        "4460181251394130653": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "5117453858905614531": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "8461950668910238851": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "1805006234516270784": ["convolution_gpu_bfyx_os_iyx_osv16", 351],
+        "2718931301666622839": ["convolution_gpu_bfyx_os_iyx_osv16", 351],
+        "7124614724653589875": ["convolution_gpu_bfyx_os_iyx_osv16", 725],
+        "7805147511722673361": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "18231162877100499337": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1081152612562015774": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "14118692364036816874": ["convolution_gpu_bfyx_os_iyx_osv16", 1018],
+        "2771511633327598307": ["convolution_gpu_bfyx_os_iyx_osv16", 219],
+        "2043520288487456245": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "10128395594093504455": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "1986294224967713231": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "8596104233487286880": ["convolution_gpu_bfyx_os_iyx_osv16", 1056],
+        "14619067706344498943": ["convolution_gpu_bfyx_os_iyx_osv16", 1070],
+        "16247177074403714471": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "17302460560764241489": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "11296929673638920561": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "2856387545805299627": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "9602711901243573665": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1389991763672509207": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7273251266921979515": ["convolution_gpu_bfyx_os_iyx_osv16", 115],
+        "7271698086258726731": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11671327445697582898": ["convolution_gpu_bfyx_gemm_like", 0],
+        "17570554483516474486": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3950135144885165819": ["convolution_gpu_bfyx_os_iyx_osv16", 664],
+        "16452498081261682201": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15356297740028337585": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8878636242180690359": ["convolution_gpu_bfyx_os_iyx_osv16", 679],
+        "12899244091844068967": ["convolution_gpu_bfyx_gemm_like", 0],
+        "18197774991654792135": ["convolution_gpu_bfyx_os_iyx_osv16", 929],
+        "11881486982136101383": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5087105232357685910": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6362183633269684086": ["convolution_gpu_bfyx_gemm_like", 2],
+        "665553611665131720": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15800366255097765592": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3441148927037088426": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10113814865022625794": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9290161943539060420": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6575054771337880905": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6930297774765427265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14687805371465731129": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17348903837738033737": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5058042344671975771": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4296792831323727718": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16569718635085620248": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6479800863775629782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8225524724938376205": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4610533059559454932": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10523639436634369983": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17500026797620139898": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9352837842671844352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15431710492660944867": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4094966339608175937": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10293540888522108040": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2178813930852805198": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4172720860698260594": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11035900209971591093": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16347989689011736788": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18426670112574344316": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9075740348545764459": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16177541412848844107": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7413356361797538770": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11878518514118760052": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1483436564981355857": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2583631235760101021": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10826337022193127499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "883141931001824331": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18286338108393131357": ["convolution_gpu_bfyx_gemm_like", 2],
+        "513328329651240169": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2866563084547740589": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6618382574307554008": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1564076582163500801": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2980118259786021998": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14006008710769892285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11047701363022632258": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17824545902528351132": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13411717706564225997": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "2553988022244380074": ["convolution_gpu_bfyx_os_iyx_osv16", 844],
+        "2007784578504956396": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "2128232248278266618": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180218859472587238": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17950127156676619192": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11734174131078900953": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17769805455612014213": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "354985206063783019": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12008052381997574625": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14579060801049956629": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "2475732477128179942": ["convolution_gpu_bfyx_os_iyx_osv16", 92],
+        "7425269551190332752": ["convolution_gpu_bfyx_os_iyx_osv16", 22],
+        "2523330181210520033": ["convolution_gpu_bfyx_os_iyx_osv16", 60],
+        "9252516395349163399": ["fused_conv_eltwise_gpu_ref", 0],
+        "3025740595729338904": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9221796417553554577": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "142182287837081331": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16817115615539634498": ["fully_connected_gpu_bf_io_gemm", 2],
+        "846549121454126986": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "1865437550609663375": ["fully_connected_gpu_bf_io_gemm", 2],
+        "14491983419826529399": ["convolution_gpu_bfyx_os_iyx_osv16", 451],
+        "11866343372130060111": ["convolution_gpu_bfyx_os_iyx_osv16", 126],
+        "3750595711145201146": ["convolution_gpu_bfyx_os_iyx_osv16", 799],
+        "555112033233919049": ["fully_connected_gpu_bf_io_gemm", 1],
+        "9449916193007510499": ["fully_connected_gpu_bf_io_gemm", 1],
+        "821153009898835283": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10053897550646291639": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "166522152877705111": ["convolution_gpu_bfyx_os_iyx_osv16", 872],
+        "8194080531314571831": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "8462596687449136841": ["convolution_gpu_bfyx_os_iyx_osv16", 85],
+        "16641148739441654579": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "3012332306785177280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1667559253581127345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17950962563816983793": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "15920581282829793263": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "4931844549089354374": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11227326613484178737": ["convolution_gpu_bfyx_os_iyx_osv16", 833],
+        "8926339988827333993": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "14947161471102583853": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7959005479751426244": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "13876295120508241721": ["convolution_gpu_bfyx_os_iyx_osv16", 43],
+        "5450799298000231966": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "745049678230480319": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17799305583546345514": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "15448134419455024563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10247046915015701375": ["convolution_gpu_bfyx_os_iyx_osv16", 84],
+        "818326236814735107": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11621993279519931789": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10879300979808656559": ["fully_connected_gpu_bf_io_gemm", 1],
+        "6931984251726006059": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "6196230740484518313": ["convolution_gpu_bfyx_os_iyx_osv16", 212],
+        "9158058375618670219": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "11236152897678664523": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "3406694758050234432": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1110],
+        "10974039527048973006": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "15899184198611288897": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "5208730096669264907": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "12427052259883778985": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "11537945670773619430": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "3449889481023274859": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "14190614451726695163": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "12928525615597254487": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "9516426687291882678": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17048242738976449237": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 730],
+        "9268483331991252048": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "6122901745362984256": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "5485405121200417034": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11959778533528884090": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "10066541947363706408": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12190338269093090393": ["convolution_gpu_bfyx_os_iyx_osv16", 745],
+        "18114410819861988056": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9399757365169066601": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18002225531765237416": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "493140137361754334": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1260161648603954768": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5667262301262234990": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "9589013771119948673": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "12893936099497050507": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "5453417400746204459": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "16783136590567851390": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11157538002790552612": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "232009389683898587": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11216071562773188709": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "12325371158799601152": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11203921208856246202": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "7748329451001058910": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4699825578606954745": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9148351131305560328": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5864010731331844548": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8354231196544866003": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3293708605626705859": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3238880006626116922": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6138876053139091484": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12144683985655531326": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "251191902439549345": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3094287673106030943": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15664461533342111743": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12991662142109741177": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "1919860812260988485": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12975178408849254081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "560685047966004607": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 660],
+        "1411165869695864657": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6971410560932215974": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17372785589054562125": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1164314361485656318": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9516217840174015532": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12868299597160484729": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16492694273514080106": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "14567423858977789672": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "1069440014730910857": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 661],
+        "4511752002127622518": ["convolution_gpu_bfyx_gemm_like", 2],
+        "670011076817691046": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 733],
+        "10000917296337062736": ["convolution_gpu_bfyx_os_iyx_osv16", 359],
+        "12236539205690542952": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 735],
+        "12066036542483319741": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "16551989359219084137": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14375560443851968119": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "4925269744341463388": ["convolution_gpu_bfyx_os_iyx_osv16", 476],
+        "7404267750384901384": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "1136134476921992394": ["convolution_gpu_bfyx_os_iyx_osv16", 542],
+        "12274460319290366021": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "1936271684402780579": ["convolution_gpu_bfyx_os_iyx_osv16", 805],
+        "7797907475238799442": ["fused_conv_eltwise_gpu_ref", 2],
+        "107092103514596960": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8153466715673110154": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13707460333812965439": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13482095577300687063": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12240700165957110598": ["convolution_gpu_bfyx_os_iyx_osv16", 659],
+        "13922184309028933319": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "4503204697730374875": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "15444198622559010805": ["fully_connected_gpu_bf_io_gemm", 2],
+        "3399502934446395571": ["fully_connected_gpu_bf_io_gemm", 2],
+        "13954223602112460287": ["fully_connected_gpu_bf_io_gemm", 0],
+        "10986360375271263743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "1716892750352083242": ["fully_connected_gpu_bf_io_gemm", 2],
+        "9725379584761388986": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "8812448421277455303": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10082079116080072102": ["convolution_gpu_bfyx_gemm_like", 2],
+        "399551887429980535": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4993763244005264691": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16584618141013506079": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2812521679999989071": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13008426794683170889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12575702775019311249": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7546191118828069537": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16065515254801458590": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "3425550832073889758": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12158565214239239362": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "4674402155077047884": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17292794084187069459": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "2786512217326082861": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14350551992529551543": ["convolution_gpu_bfyx_os_iyx_osv16", 992],
+        "2393708926889890184": ["convolution_gpu_bfyx_os_iyx_osv16", 994],
+        "14469325606711534393": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16745817877996028596": ["convolution_gpu_bfyx_os_iyx_osv16", 833],
+        "5009730037803270259": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12990454601941366626": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "16417111816169006680": ["convolution_gpu_bfyx_os_iyx_osv16", 842],
+        "133571575038273240": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14248134542225645633": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9431127887153901797": ["convolution_gpu_bfyx_f16", 8],
+        "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "11176513032851549145": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13523379689227815262": ["convolution_gpu_bfyx_f16", 3],
+        "6721716607254493168": ["convolution_gpu_bfyx_f16", 5],
+        "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11234282887624973651": ["convolution_gpu_bfyx_f16", 7],
+        "13831173402079080202": ["convolution_gpu_bfyx_f16", 3],
+        "8326492500469570449": ["convolution_gpu_bfyx_f16", 6],
+        "3264529476730576869": ["convolution_gpu_bfyx_f16", 4],
+        "7297268657172014757": ["convolution_gpu_bfyx_f16", 2],
+        "1625066159015188551": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6691529969484361871": ["convolution_gpu_bfyx_f16", 1],
+        "15713770358690264680": ["convolution_gpu_bfyx_f16", 5],
+        "16321675691643798095": ["convolution_gpu_bfyx_f16", 2],
+        "11669126976746433467": ["convolution_gpu_bfyx_f16", 3],
+        "343301842058050721": ["convolution_gpu_bfyx_f16", 1],
+        "2752323179285263511": ["convolution_gpu_bfyx_f16", 0],
+        "2102366789632970362": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "1306385926849681711": ["convolution_gpu_bfyx_f16", 1],
+        "126985649265174875": ["convolution_gpu_bfyx_f16", 1],
+        "1398008210451653662": ["convolution_gpu_bfyx_f16", 1],
+        "18349997465728341610": ["convolution_gpu_bfyx_f16", 2],
+        "6014604866075552044": ["convolution_gpu_bfyx_f16", 1],
+        "10704627126748844083": ["convolution_gpu_bfyx_f16", 2],
+        "2290627489333161117": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "9281553420666514549": ["convolution_gpu_bfyx_f16", 1],
+        "5307698759365425674": ["convolution_gpu_bfyx_f16", 8],
+        "16618476158797450107": ["convolution_gpu_bfyx_f16", 2],
+        "6448987340419115272": ["convolution_gpu_bfyx_f16", 1],
+        "7649625315489330023": ["convolution_gpu_bfyx_f16", 8],
+        "1407861661939721927": ["convolution_gpu_bfyx_f16", 8],
+        "7240814723112731361": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6941932686830863618": ["convolution_gpu_bfyx_f16", 5],
+        "2035160132949629453": ["convolution_gpu_bfyx_f16", 5],
+        "17827286460954881640": ["convolution_gpu_bfyx_f16", 6],
+        "6051363798671277490": ["convolution_gpu_bfyx_f16", 8],
+        "7990676476696328795": ["convolution_gpu_bfyx_f16", 6],
+        "9594336645019216285": ["convolution_gpu_bfyx_f16", 8],
+        "2826412019603377751": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "10171778444869246611": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "13742305118572588823": ["convolution_gpu_bfyx_f16", 7],
+        "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14821402568274932830": ["binary_convolution_gpu_1x1", 0],
+        "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10657672650587258853": ["convolution_gpu_bfyx_f16", 6],
+        "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13626797216057420236": ["convolution_gpu_bfyx_f16", 6],
+        "2506095387855338923": ["convolution_gpu_bfyx_f16", 2],
+        "562790620732503479": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1176788949160939554": ["convolution_gpu_bfyx_f16", 3],
+        "11395171679618279746": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9681660158274412796": ["convolution_gpu_bfyx_f16", 5],
+        "17157976605793655955": ["convolution_gpu_bfyx_f16", 1],
+        "13706914146179028206": ["convolution_gpu_bfyx_f16", 2],
+        "1586631406027561282": ["convolution_gpu_bfyx_f16", 2],
+        "9177089521763332472": ["convolution_gpu_bfyx_f16", 2],
+        "7623827168813087262": ["convolution_gpu_bfyx_f16", 2],
+        "2251294131085073114": ["convolution_gpu_bfyx_f16", 8],
+        "11257985397820322504": ["convolution_gpu_bfyx_f16", 1],
+        "3873298083628570562": ["convolution_gpu_bfyx_f16", 4],
+        "15662803497226104305": ["convolution_gpu_bfyx_f16", 5],
+        "8980088396308495358": ["convolution_gpu_bfyx_f16", 4],
+        "8049787711095084959": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "8361191677655973935": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "6455326407035817823": ["convolution_gpu_bfyx_os_iyx_osv16", 95],
+        "4549875381866576113": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "14780479128645572595": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "9221666339438514459": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "17091218700152862273": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9951123692498529061": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "15226633731441516361": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "4453349487216529991": ["convolution_gpu_bfyx_os_iyx_osv16", 833],
+        "17929115705990268026": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6621532750524834097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "16562571407098459049": ["convolution_gpu_bfyx_os_iyx_osv16", 539],
+        "2873284221161386597": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3769897639705493224": ["convolution_gpu_bfyx_os_iyx_osv16", 540],
+        "5447803100312758964": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 583],
+        "9163238347824560017": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "1688979903294911182": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9338092674592431198": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "15522545626077485199": ["convolution_gpu_bfyx_os_iyx_osv16", 163],
+        "1797489112792772811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5478531388148194783": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "3289369122755371980": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 81],
+        "14572382016053496602": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "16841168676076935693": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18407347961782182453": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 836],
+        "8695092335925023399": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "14168685794682021826": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12423218459706339590": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8734189831526420226": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "14362182205968229036": ["convolution_gpu_bfyx_os_iyx_osv16", 510],
+        "13157476677873103938": ["convolution_gpu_bfyx_os_iyx_osv16", 135],
+        "11940005480315119153": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "1302512649939808216": ["convolution_gpu_bfyx_os_iyx_osv16", 497],
+        "16919811480058643640": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "5208084625746441471": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "8262549900448065079": ["convolution_gpu_bfyx_os_iyx_osv16", 380],
+        "5227665249672396809": ["convolution_gpu_bfyx_os_iyx_osv16", 381],
+        "2276167946100759891": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "18129268521578260814": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5584283929974879275": ["convolution_gpu_bfyx_f16", 8],
+        "11429584360303226064": ["convolution_gpu_bfyx_f16", 8],
+        "8686735181567651375": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "212877757325472435": ["convolution_gpu_bfyx_f16", 8],
+        "17559312741017462443": ["convolution_gpu_bfyx_f16", 6],
+        "12388383111921084595": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6044859325822961324": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15028548616895245917": ["convolution_gpu_bfyx_f16", 6],
+        "2416358280826517238": ["convolution_gpu_bfyx_f16", 8],
+        "5078717573348951772": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16307093827408988813": ["convolution_gpu_bfyx_f16", 8],
+        "7365885889295117317": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5519237001078836815": ["convolution_gpu_bfyx_f16", 8],
+        "10536941332534385779": ["convolution_gpu_bfyx_f16", 3],
+        "4735765505172334525": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11998001963634536052": ["convolution_gpu_bfyx_f16", 3],
+        "4967372874318410724": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6469943853460256537": ["convolution_gpu_bfyx_f16", 8],
+        "4356868209069762908": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10874805992997105013": ["convolution_gpu_bfyx_f16", 5],
+        "10425195772947453108": ["convolution_gpu_bfyx_os_iyx_osv16", 811],
+        "17029022832617859459": ["convolution_gpu_bfyx_os_iyx_osv16", 434],
+        "2476051167651059767": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7844096932162345117": ["convolution_gpu_bfyx_f16", 5],
+        "2705126395780936342": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6904686223481502731": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3795064777145790033": ["convolution_gpu_bfyx_f16", 3],
+        "3981089957521430742": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12864700296881955607": ["convolution_gpu_bfyx_f16", 4],
+        "183214037684746423": ["convolution_gpu_bfyx_os_iyx_osv16", 416],
+        "14009922923845987763": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "3202655487034498666": ["convolution_gpu_bfyx_f16", 1],
+        "756854486757180730": ["convolution_gpu_bfyx_f16", 2],
+        "1750742987566783306": ["convolution_gpu_bfyx_os_iyx_osv16", 7],
+        "10704219670342115822": ["convolution_gpu_bfyx_os_iyx_osv16", 383],
+        "559540403792175610": ["convolution_gpu_bfyx_f16", 1],
+        "11042961657717641258": ["convolution_gpu_bfyx_f16", 7],
+        "7454909001123355674": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "14140446373297940618": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "12992827495874215098": ["convolution_gpu_bfyx_f16", 4],
+        "17560813776447689945": ["convolution_gpu_bfyx_f16", 6],
+        "12650154599281162622": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "9985311646893058565": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "17149651085328252645": ["convolution_gpu_bfyx_f16", 0],
+        "11779581889508749846": ["convolution_gpu_bfyx_os_iyx_osv16", 85],
+        "16817641185194791244": ["convolution_gpu_bfyx_os_iyx_osv16", 83],
+        "6143862109537773906": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6935108295659465736": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7424861737057604968": ["convolution_gpu_bfyx_os_iyx_osv16", 116],
+        "9640933201231819369": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17277279278034795112": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8111879884622212613": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3806358488669113143": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1711328697805315421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5288962955659199699": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17987842029397168642": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4203253185427070377": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9945419220893973658": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14309870202508661817": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18192195499329490812": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18075812052832099472": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8192820779590386413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8925814981090917840": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5081785566500341341": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3605237561875385705": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1716302732338667414": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15442445290156572536": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14397043442550652899": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2228098191590860938": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "1573476283825580755": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "5388852746720776479": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "16916797286744339336": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3644136601694826205": ["convolution_gpu_bfyx_f16", 8],
+        "14012991338325957511": ["convolution_gpu_bfyx_f16", 5],
+        "2398531071421825711": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14629119844168195239": ["convolution_gpu_bfyx_f16", 7],
+        "9479190421236869705": ["convolution_gpu_bfyx_f16", 3],
+        "4695468509968937176": ["convolution_gpu_bfyx_f16", 3],
+        "16869493346339355004": ["convolution_gpu_bfyx_f16", 3],
+        "15546783882511835538": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14073402551217828243": ["convolution_gpu_bfyx_f16", 3],
+        "5737532382360638209": ["convolution_gpu_bfyx_f16", 6],
+        "14178075057440850235": ["convolution_gpu_bfyx_f16", 6],
+        "1041942313707882183": ["convolution_gpu_bfyx_f16", 5],
+        "14311656444636567643": ["convolution_gpu_bfyx_f16", 7],
+        "5708322911191147507": ["convolution_gpu_bfyx_f16", 3],
+        "7237541003077150774": ["convolution_gpu_bfyx_f16", 5],
+        "17097394033112334006": ["convolution_gpu_bfyx_f16", 2],
+        "10077604090153912107": ["convolution_gpu_bfyx_f16", 3],
+        "3042641872059534006": ["convolution_gpu_bfyx_f16", 2],
+        "13168100741247170644": ["convolution_gpu_bfyx_f16", 8],
+        "3376503797303782111": ["convolution_gpu_bfyx_f16", 3],
+        "8462839229772971651": ["convolution_gpu_bfyx_f16", 6],
+        "9678831759870330874": ["convolution_gpu_bfyx_f16", 5],
+        "10053808465394315011": ["convolution_gpu_bfyx_f16", 3],
+        "325275946658065165": ["convolution_gpu_bfyx_f16", 1],
+        "11261634534154975791": ["convolution_gpu_bfyx_f16", 7],
+        "11257004820976953882": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "5713491991252168637": ["convolution_gpu_bfyx_os_iyx_osv16", 420],
+        "2083080453795724323": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6452660166904314994": ["convolution_gpu_bfyx_os_iyx_osv16", 894],
+        "10411169140360183327": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "9743813978144755927": ["convolution_gpu_bfyx_os_iyx_osv16", 399],
+        "15769267071952355833": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "9816235120364293291": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5617875919579896151": ["fully_connected_gpu_bf_io_gemm", 1],
+        "18309383982594141239": ["fully_connected_gpu_bf_io_gemm", 2],
+        "6954046921635466236": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12133573113666871990": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18286924901612269315": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16168987643236739114": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17573344121250212662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8792004303945144557": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6055054188657886157": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16692293796070898202": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18377591093081814522": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7171735046681228890": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2461164836823254208": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14430129165479757357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14698972830975282413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3479216436904445131": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5269956004669551826": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "13594976208424418204": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12373590460058087695": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "4405236452109167503": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14132900527730577142": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1349033639465657142": ["convolution_gpu_bfyx_gemm_like", 2],
+        "812985719328060901": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "12407276986845062239": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9170373506597510005": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1389904024718949479": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18042225157963583297": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6369935194881138691": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11002183397247930282": ["convolution_gpu_bfyx_os_iyx_osv16", 93],
+        "14040168861632997052": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10321120422537436943": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7628224528894213786": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "18333490976250555089": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12240359612725499137": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5214167856473943406": ["convolution_gpu_bfyx_os_iyx_osv16", 843],
+        "14052955765964466465": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7134511117843066284": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "4293870201735626607": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "7398517597116797925": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9788174666000966313": ["convolution_gpu_bfyx_os_iyx_osv16", 847],
+        "18042814645135189475": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "11236623772616442479": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11418806742471661595": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18007798448985514623": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "9699952679060486545": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "579905583383428310": ["convolution_gpu_bfyx_os_iyx_osv16", 95],
+        "11102245529349471251": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "10406248465333026906": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8032924116166179276": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "7025366523000457929": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "4140229891062448185": ["convolution_gpu_bfyx_f16", 8],
+        "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12777387776061796777": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "5183001506630431534": ["convolution_gpu_bfyx_f16", 6],
+        "13244421635448480964": ["convolution_gpu_bfyx_f16", 4],
+        "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 545],
+        "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12722030162332410659": ["convolution_gpu_bfyx_f16", 7],
+        "11753505949184833814": ["convolution_gpu_bfyx_f16", 5],
+        "1138805437505853389": ["convolution_gpu_bfyx_f16", 6],
+        "236844015511730537": ["convolution_gpu_bfyx_f16", 4],
+        "16372093802852963117": ["convolution_gpu_bfyx_f16", 4],
+        "8017045013578597247": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3767812249447618647": ["convolution_gpu_bfyx_f16", 4],
+        "14300856950146697632": ["convolution_gpu_bfyx_f16", 3],
+        "6658791967844021067": ["convolution_gpu_bfyx_f16", 2],
+        "16114394473926845719": ["convolution_gpu_bfyx_f16", 3],
+        "3329703306452769429": ["convolution_gpu_bfyx_f16", 1],
+        "12485385390638720435": ["convolution_gpu_bfyx_f16", 1],
+        "18325147922902896614": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "2042543291306492227": ["convolution_gpu_bfyx_f16", 1],
+        "14016387396197131183": ["convolution_gpu_bfyx_f16", 2],
+        "388252829841919694": ["convolution_gpu_bfyx_f16", 1],
+        "3224136725591132250": ["convolution_gpu_bfyx_f16", 1],
+        "11555472669677513180": ["convolution_gpu_bfyx_f16", 2],
+        "5878699865486527531": ["convolution_gpu_bfyx_f16", 2],
+        "14726692927619009109": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "10286586505667471565": ["convolution_gpu_bfyx_f16", 2],
+        "8635430703217243594": ["convolution_gpu_bfyx_f16", 8],
+        "3194668567618864343": ["convolution_gpu_bfyx_f16", 1],
+        "2611344153711817460": ["convolution_gpu_bfyx_f16", 1],
+        "11818558634104933451": ["convolution_gpu_bfyx_f16", 2],
+        "5390496664798965323": ["convolution_gpu_bfyx_f16", 5],
+        "13357658087174699785": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14394195619252967214": ["convolution_gpu_bfyx_f16", 7],
+        "4669930370801439013": ["convolution_gpu_bfyx_f16", 4],
+        "1370501593495473908": ["convolution_gpu_bfyx_f16", 7],
+        "4179197899143727062": ["convolution_gpu_bfyx_f16", 8],
+        "4150158815056302279": ["convolution_gpu_bfyx_f16", 3],
+        "7454737385785852953": ["convolution_gpu_bfyx_f16", 4],
+        "17477451880893252674": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "5768225444324661639": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "1885336536803061563": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "5714742374217969073": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10896471338854021271": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "3806914827253341543": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16632515980529050205": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "7466530815481157347": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9025266984842296356": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "5906873273896994744": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7802957391728955870": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "9353411647951951678": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3747726337434740481": ["convolution_gpu_bfyx_os_iyx_osv16", 586],
+        "5744097132927875811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16955287132696194727": ["convolution_gpu_bfyx_os_iyx_osv16", 585],
+        "9395452164938581548": ["convolution_gpu_bfyx_os_iyx_osv16", 585],
+        "11451950139903792107": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10917339102734674830": ["convolution_gpu_bfyx_1x1", 2],
+        "6989917785852753861": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15880522773125518978": ["convolution_gpu_bfyx_1x1", 2],
+        "747373540539235872": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1416762068965596282": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "13394998921545119351": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "15308583448258001619": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "3947469783167254911": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16513233390968673543": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "7953644920144486409": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12990194208171226009": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "212918306790163121": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12507063931340717634": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "944308351100913426": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10557004218031162358": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "15668589103337174848": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15795875095918663327": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "719423812642773465": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17406040551647193807": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "14418191404250235680": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "4355752766807245481": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8001877558898476628": ["convolution_gpu_bfyx_os_iyx_osv16", 85],
+        "5663755974835173519": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15515579892465814722": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "6228391894735143720": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "17619521756514112890": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "7618115892322102589": ["convolution_gpu_bfyx_os_iyx_osv16", 452],
+        "10859348180122457267": ["convolution_gpu_bfyx_os_iyx_osv16", 76],
+        "8920870418107208273": ["convolution_gpu_bfyx_os_iyx_osv16", 828],
+        "15455728969592248176": ["convolution_gpu_bfyx_os_iyx_osv16", 78],
+        "17332144919524270474": ["fully_connected_gpu_bf_io_gemm", 2],
+        "9513119231978452377": ["fully_connected_gpu_bf_io_gemm", 2],
+        "6332576636757295449": ["fully_connected_gpu_bf_io_gemm", 1],
+        "2493240824080071735": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4780210213847704316": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10506991028553025432": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5428553079642989652": ["convolution_gpu_bfyx_f16", 8],
+        "16121853713631575869": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1510497042951931323": ["convolution_gpu_bfyx_f16", 8],
+        "2745430731204028395": ["convolution_gpu_bfyx_f16", 8],
+        "12876976900388547418": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16890063210386281886": ["convolution_gpu_bfyx_f16", 8],
+        "3927810740679311711": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16684473381571424732": ["convolution_gpu_bfyx_f16", 8],
+        "13284968934065954912": ["convolution_gpu_bfyx_f16", 7],
+        "2631762936534205094": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1739904418563128064": ["convolution_gpu_bfyx_f16", 8],
+        "10980290216903708719": ["convolution_gpu_bfyx_f16", 8],
+        "735103333401104515": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6983554020850996053": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5921617358811124053": ["convolution_gpu_bfyx_f16", 8],
+        "14301841882009814238": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15717838690804403986": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12405925645446300036": ["convolution_gpu_bfyx_f16", 7],
+        "2258154151361692964": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "209570180062724480": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "28298051505236331": ["convolution_gpu_bfyx_f16", 6],
+        "5213864300694772508": ["convolution_gpu_bfyx_f16", 8],
+        "5762331369519287189": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2276871110978868522": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14409415690583079892": ["convolution_gpu_bfyx_f16", 8],
+        "1937137823574087575": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "852636453039879630": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5479982500377449068": ["convolution_gpu_bfyx_f16", 8],
+        "4498519811904474615": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12692499579789677851": ["convolution_gpu_bfyx_f16", 3],
+        "7387278268805782919": ["convolution_gpu_bfyx_f16", 4],
+        "2438123442946203226": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13258719154936933305": ["convolution_gpu_bfyx_f16", 8],
+        "18100501541133790185": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17588841654811790691": ["convolution_gpu_bfyx_f16", 8],
+        "6689548390020199537": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "17024985107770974703": ["convolution_gpu_bfyx_f16", 8],
+        "9968353980515607037": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17524096092737615017": ["convolution_gpu_bfyx_f16", 8],
+        "18114533502018520363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5255663532662079743": ["convolution_gpu_bfyx_f16", 8],
+        "7603883354122442155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "846394177044425685": ["convolution_gpu_bfyx_f16", 8],
+        "13144168456084150868": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11614500724316937770": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14902332370005427398": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13885328145647066921": ["convolution_gpu_bfyx_f16", 8],
+        "12145548657602787381": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13518278887400015765": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5205936493218798381": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15427181019330262398": ["convolution_gpu_bfyx_f16", 8],
+        "10959940286555804884": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8974050386876725444": ["convolution_gpu_bfyx_f16", 8],
+        "18162518237985989872": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10232477134289465267": ["convolution_gpu_bfyx_f16", 7],
+        "6212673407294495601": ["convolution_gpu_bfyx_f16", 6],
+        "17860712443740757354": ["convolution_gpu_bfyx_f16", 4],
+        "4146950753180366119": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8930966597498483291": ["convolution_gpu_bfyx_f16", 1],
+        "17992495365931215688": ["convolution_gpu_bfyx_f16", 5],
+        "7146066549311428539": ["convolution_gpu_bfyx_os_iyx_osv16", 810],
+        "3323834459803099675": ["convolution_gpu_bfyx_f16", 3],
+        "13970979686543548079": ["convolution_gpu_bfyx_f16", 1],
+        "17224261730179174234": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8599384037400091771": ["convolution_gpu_bfyx_f16", 2],
+        "13691442749949008699": ["convolution_gpu_bfyx_f16", 2],
+        "18249370808689016771": ["convolution_gpu_bfyx_os_iyx_osv16", 798],
+        "9285872109762575050": ["convolution_gpu_bfyx_f16", 1],
+        "3778806780227752318": ["convolution_gpu_bfyx_f16", 2],
+        "14533995413631988093": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "18213608828525161872": ["convolution_gpu_bfyx_f16", 2],
+        "18201367360115946218": ["convolution_gpu_bfyx_f16", 6],
+        "2298930512642991761": ["convolution_gpu_bfyx_os_iyx_osv16", 7],
+        "7524439404315811688": ["convolution_gpu_bfyx_f16", 1],
+        "12807813148826993243": ["convolution_gpu_bfyx_f16", 1],
+        "10386449367791123777": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13710585118886614159": ["convolution_gpu_bfyx_f16", 3],
+        "5083137484520510189": ["convolution_gpu_bfyx_f16", 7],
+        "7310721662758245090": ["convolution_gpu_bfyx_os_iyx_osv16", 794],
+        "3441130252760474056": ["convolution_gpu_bfyx_f16", 5],
+        "17896513277322150873": ["convolution_gpu_bfyx_f16", 8],
+        "15804284488423654345": ["convolution_gpu_bfyx_os_iyx_osv16", 65],
+        "7664482438087213797": ["convolution_gpu_bfyx_f16", 8],
+        "11503348949826253367": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8658460661236455347": ["convolution_gpu_bfyx_f16", 8],
+        "3330382517900635622": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17858565811787708662": ["convolution_gpu_bfyx_f16", 8],
+        "10812355035102461624": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1763713485502711028": ["convolution_gpu_bfyx_f16", 8],
+        "537708856180494652": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14039400995173243881": ["convolution_gpu_bfyx_f16", 7],
+        "16144549808790730407": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3607178347319840731": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1616172004620830694": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16914324612936017713": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5643847517719208248": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10544570776782304031": ["convolution_gpu_bfyx_f16", 8],
+        "10319250281140119656": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15493574903211227269": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8519850106535467914": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10879590057665696981": ["convolution_gpu_bfyx_f16", 8],
+        "14575912896890066295": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3580607955559330220": ["convolution_gpu_bfyx_f16", 7],
+        "11846250273112462539": ["convolution_gpu_bfyx_os_iyx_osv16", 456],
+        "5211338259118953367": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2534964735134768930": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13711299339844520924": ["convolution_gpu_bfyx_f16", 7],
+        "6904712232722172471": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "14274320868149065806": ["convolution_gpu_bfyx_f16", 2],
+        "10286169767282972888": ["convolution_gpu_bfyx_f16", 1],
+        "5778557207840627218": ["convolution_gpu_bfyx_os_iyx_osv16", 418],
+        "512957346409732922": ["convolution_gpu_bfyx_f16", 8],
+        "16606774042635630114": ["convolution_gpu_bfyx_f16", 3],
+        "2762113297856010960": ["convolution_gpu_bfyx_os_iyx_osv16", 5],
+        "16799551121569588878": ["convolution_gpu_bfyx_os_iyx_osv16", 4],
+        "3047328071912702715": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "4614195525780772879": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "229690452505740843": ["convolution_gpu_bfyx_f16", 6],
+        "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13526783681740823304": ["binary_convolution_gpu_1x1", 0],
+        "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 203],
+        "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16642117060176841433": ["convolution_gpu_bfyx_f16", 6],
+        "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4859984633862705344": ["convolution_gpu_bfyx_f16", 6],
+        "6643541161570220487": ["convolution_gpu_bfyx_f16", 5],
+        "4771606875232577147": ["convolution_gpu_bfyx_f16", 7],
+        "10197214218719989238": ["convolution_gpu_bfyx_f16", 4],
+        "9021222698443352890": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13650156111024145576": ["convolution_gpu_bfyx_f16", 3],
+        "1823355198957173511": ["convolution_gpu_bfyx_f16", 2],
+        "18224832115562649218": ["convolution_gpu_bfyx_f16", 3],
+        "17989075751697734530": ["convolution_gpu_bfyx_f16", 2],
+        "11065625785330976824": ["convolution_gpu_bfyx_f16", 1],
+        "16508598944144723038": ["convolution_gpu_bfyx_f16", 1],
+        "18207737700426780826": ["convolution_gpu_bfyx_f16", 6],
+        "5739556172667922404": ["convolution_gpu_bfyx_f16", 2],
+        "5648963558592113654": ["convolution_gpu_bfyx_f16", 2],
+        "12700332665217178557": ["convolution_gpu_bfyx_f16", 5],
+        "2049835121645334394": ["convolution_gpu_bfyx_f16", 4],
+        "3127350206986146597": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "413340907950386667": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12169959708985325397": ["convolution_gpu_bfyx_f16", 8],
+        "15591038406687270109": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10924767171232495386": ["convolution_gpu_bfyx_f16", 8],
+        "13999571841387221249": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "5672447484681958632": ["convolution_gpu_bfyx_f16", 5],
+        "9409310337267359456": ["convolution_gpu_bfyx_f16", 8],
+        "12167511024377353453": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15893208324896471495": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "12208132924820452372": ["convolution_gpu_bfyx_f16", 8],
+        "3293314982357251400": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10021818023939833096": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "2473005239224541536": ["convolution_gpu_bfyx_f16", 8],
+        "6856119608423875377": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18255457417918645346": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "16665418645769386939": ["convolution_gpu_bfyx_f16", 8],
+        "10430922762683242901": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15110089333676343949": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "9475257013522373650": ["convolution_gpu_bfyx_f16", 8],
+        "10610536157845815072": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5557066335410910062": ["convolution_gpu_bfyx_f16", 6],
+        "18146293782255442927": ["convolution_gpu_bfyx_f16", 2],
+        "11852944538668620269": ["convolution_gpu_bfyx_f16", 7],
+        "14937682075916905713": ["convolution_gpu_bfyx_f16", 7],
+        "12792249796816770204": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17312037326967676576": ["convolution_gpu_bfyx_f16", 2],
+        "11909045540447457308": ["convolution_gpu_bfyx_f16", 6],
+        "10510946825189206241": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6124992432121468125": ["convolution_gpu_bfyx_f16", 8],
+        "14190999291985701693": ["convolution_gpu_bfyx_f16", 8],
+        "13598178470968135338": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9448354674053762309": ["convolution_gpu_bfyx_f16", 2],
+        "12777599919231312068": ["convolution_gpu_bfyx_f16", 7],
+        "9337614078096106084": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16578631161511759035": ["convolution_gpu_bfyx_f16", 1],
+        "6322333494387087177": ["convolution_gpu_bfyx_f16", 7],
+        "9051299669421439712": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6900406474100422151": ["convolution_gpu_bfyx_f16", 5],
+        "1770678726875883309": ["convolution_gpu_bfyx_f16", 7],
+        "9884897216756697592": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "5833785867675066644": ["convolution_gpu_bfyx_f16", 2],
+        "16500610465961551242": ["convolution_gpu_bfyx_f16", 8],
+        "1390142483294581487": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10267854415205002238": ["convolution_gpu_bfyx_f16", 4],
+        "15623187792779892835": ["convolution_gpu_bfyx_f16", 1],
+        "13148059837896884273": ["convolution_gpu_bfyx_f16", 3],
+        "171559638613408493": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10278640368905105405": ["convolution_gpu_bfyx_f16", 4],
+        "10760404678801561747": ["convolution_gpu_bfyx_f16", 2],
+        "8548473413394744544": ["convolution_gpu_bfyx_f16", 3],
+        "16729171964149306867": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3716706098703025358": ["convolution_gpu_bfyx_f16", 3],
+        "129949756464977129": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "3542071367019145965": ["fused_conv_eltwise_gpu_ref", 2],
+        "6421891780685569059": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "12397973858549014447": ["convolution_gpu_bfyx_f16", 8],
+        "147576342753084622": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9006679435677596041": ["convolution_gpu_bfyx_f16", 7],
+        "375607190849326617": ["convolution_gpu_bfyx_f16", 8],
+        "12465373696424446749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3330746708867776870": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "175810741723366131": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1393228887151888661": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2791644023635315729": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14953047924930959040": ["convolution_gpu_bfyx_f16", 8],
+        "13576707834156737134": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17441797654332334591": ["convolution_gpu_bfyx_f16", 8],
+        "4368216880157087051": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4527131704372375891": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12532849387017200369": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6143746716136988129": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "229425834968700183": ["convolution_gpu_bfyx_f16", 6],
+        "14611155839967647053": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12220017703888172171": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3017664565974342570": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3919092484794350954": ["convolution_gpu_bfyx_f16", 7],
+        "777704696687372198": ["convolution_gpu_bfyx_f16", 3],
+        "9530089245179389803": ["convolution_gpu_bfyx_os_iyx_osv16", 834],
+        "15407099455173114443": ["convolution_gpu_bfyx_f16", 6],
+        "13830673382612975715": ["convolution_gpu_bfyx_f16", 4],
+        "15418883453881678146": ["convolution_gpu_bfyx_f16", 5],
+        "18181217963990641003": ["convolution_gpu_bfyx_f16", 2],
+        "15712589409011660453": ["convolution_gpu_bfyx_os_iyx_osv16", 416],
+        "1196403115198061647": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "643101540653656807": ["convolution_gpu_bfyx_f16", 1],
+        "13215809871210781323": ["convolution_gpu_bfyx_f16", 1],
+        "18368779848570116967": ["convolution_gpu_bfyx_f16", 1],
+        "3544698174676763847": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "12428007544423412129": ["convolution_gpu_bfyx_f16", 6],
+        "4939325123575119544": ["convolution_gpu_bfyx_f16", 2],
+        "17947736981603570615": ["convolution_gpu_bfyx_f16", 8],
+        "6620861214152396614": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "10018756206737727294": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5830779024517851317": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "7913817244562964901": ["convolution_gpu_bfyx_f16", 8],
+        "11779589567746893119": ["convolution_gpu_bfyx_f16", 8],
+        "5287441936829096354": ["convolution_gpu_bfyx_os_iyx_osv16", 289],
+        "16879635677321458783": ["convolution_gpu_bfyx_f16", 8],
+        "5936894667802097344": ["convolution_gpu_bfyx_f16", 8],
+        "12029555773381953470": ["convolution_gpu_bfyx_f16", 8],
+        "1395714970525756800": ["convolution_gpu_bfyx_f16", 8],
+        "18366381433142273315": ["convolution_gpu_bfyx_f16", 8],
+        "17839315025229585473": ["convolution_gpu_bfyx_f16", 8],
+        "7428339090190576585": ["convolution_gpu_bfyx_f16", 8],
+        "16427721132197847241": ["convolution_gpu_bfyx_f16", 8],
+        "929038963682864275": ["convolution_gpu_bfyx_f16", 8],
+        "6348679735483401866": ["convolution_gpu_bfyx_f16", 7],
+        "17409943223289937333": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10896472785943286419": ["convolution_gpu_bfyx_f16", 8],
+        "8675423965229942895": ["convolution_gpu_bfyx_f16", 8],
+        "15359653790909326580": ["convolution_gpu_bfyx_f16", 4],
+        "937772044105590355": ["convolution_gpu_bfyx_f16", 8],
+        "11630003841984891663": ["convolution_gpu_bfyx_f16", 8],
+        "15721323944762357421": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18032560040713612222": ["convolution_gpu_bfyx_f16", 8],
+        "16185581163541386950": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7296460872108123423": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18375557444371775299": ["convolution_gpu_bfyx_f16", 8],
+        "10922059457537054563": ["convolution_gpu_bfyx_f16", 8],
+        "122295605901184339": ["convolution_gpu_bfyx_f16", 4],
+        "12164250230746861951": ["convolution_gpu_bfyx_f16", 3],
+        "8176114476658865003": ["convolution_gpu_bfyx_os_iyx_osv16", 1049],
+        "7408205445085068145": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11368781584821592726": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13893351700564465666": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3518310626820299509": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14411220648355431920": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16360948136590378689": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12603778068505548164": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7393554260847466099": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11640173157120764930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10391275203444358233": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12900060990097311151": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13949457796213177880": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12229727046452778843": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6356853913935067660": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2057724637751433123": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16951394780935673368": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4362905853733519089": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11437739738725998008": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1166763569766001639": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "13509884479614626207": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13561264673311456568": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13949179271064170300": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9085227279626009353": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "15999251370466034620": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8278218983765546430": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1283216388519834306": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10392839783862963669": ["convolution_gpu_bfyx_gemm_like", 1],
+        "446095524058497778": ["convolution_gpu_bfyx_os_iyx_osv16", 469],
+        "16418977525726114825": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "5073696559530173773": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "8986786677408239490": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "18154134293896237020": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9604863051097029874": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "12931069967038668164": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "6806199908367808607": ["convolution_gpu_bfyx_os_iyx_osv16", 483],
+        "11683146685348965370": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8154297486284619437": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14336744408490491240": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "4571901717343198720": ["convolution_gpu_bfyx_os_iyx_osv16", 562],
+        "6532394816830144120": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2666796249274140911": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "11653606109120321972": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "6204893434840435239": ["convolution_gpu_bfyx_os_iyx_osv16", 876],
+        "13218364348439640168": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10201555771333451359": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "6894773592689372049": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7168438768023636584": ["convolution_gpu_bfyx_f16", 8],
+        "10451355428354516953": ["convolution_gpu_bfyx_f16", 8],
+        "14472734042788843355": ["convolution_gpu_bfyx_f16", 8],
+        "10854104081943494369": ["convolution_gpu_bfyx_f16", 8],
+        "93020906459675429": ["convolution_gpu_bfyx_f16", 6],
+        "18398350909015256408": ["convolution_gpu_bfyx_f16", 8],
+        "4154340122141626612": ["convolution_gpu_bfyx_f16", 8],
+        "18200289027422735061": ["convolution_gpu_bfyx_f16", 7],
+        "5565357052205136958": ["convolution_gpu_bfyx_f16", 4],
+        "15946908544184249774": ["convolution_gpu_bfyx_f16", 8],
+        "14037627422329357174": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14408378031985995049": ["convolution_gpu_bfyx_f16", 4],
+        "13211513495214123892": ["convolution_gpu_bfyx_f16", 6],
+        "1496494589494248203": ["convolution_gpu_bfyx_f16", 8],
+        "17087805036943027743": ["convolution_gpu_bfyx_f16", 8],
+        "13247615789377163390": ["convolution_gpu_bfyx_f16", 3],
+        "5098352369763200627": ["convolution_gpu_bfyx_f16", 8],
+        "7557421223834089733": ["convolution_gpu_bfyx_gemm_like", 0],
+        "10657042057899091892": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2064129679519084519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13149626711154707837": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2450247775784772609": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "9349162934459662079": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13032204489661886072": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9915338154088450212": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2204239160621715211": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449351266437601922": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "1155876454105658452": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10367977997774504988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9837317326715221119": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "10725269803461677890": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "2877965337998085379": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5765037690630152391": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2731214798095843918": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "4432212871967601555": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "13674833960992369491": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2407729796226002219": ["convolution_gpu_bfyx_f16", 8],
+        "8712233195607754052": ["convolution_gpu_bfyx_f16", 8],
+        "18376338036643391330": ["convolution_gpu_bfyx_f16", 7],
+        "15000057703375682508": ["convolution_gpu_bfyx_f16", 5],
+        "11538380796610598086": ["convolution_gpu_bfyx_f16", 8],
+        "8807959414103299339": ["convolution_gpu_bfyx_f16", 8],
+        "8923709952861619751": ["convolution_gpu_bfyx_f16", 8],
+        "2281723979610106495": ["convolution_gpu_bfyx_f16", 6],
+        "7003402145984308994": ["convolution_gpu_bfyx_f16", 8],
+        "8802871881972169446": ["convolution_gpu_bfyx_f16", 8],
+        "17672255854769914684": ["convolution_gpu_bfyx_f16", 7],
+        "15989515952156087492": ["convolution_gpu_bfyx_f16", 8],
+        "12613916101209377956": ["convolution_gpu_bfyx_f16", 8],
+        "8926372099361990033": ["convolution_gpu_bfyx_f16", 7],
+        "13134908817293730842": ["convolution_gpu_bfyx_f16", 8],
+        "14010406343040661271": ["convolution_gpu_bfyx_f16", 8],
+        "2325094934617563483": ["convolution_gpu_bfyx_f16", 8],
+        "11756769107875909669": ["convolution_gpu_bfyx_f16", 3],
+        "8711172943068374489": ["convolution_gpu_bfyx_f16", 8],
+        "1909118584082415877": ["convolution_gpu_bfyx_f16", 5],
+        "12040023093627702264": ["convolution_gpu_bfyx_f16", 8],
+        "6474623094910171017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6747189810752747337": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "12216744913496272224": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4342399258032747578": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5084349834068342816": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "12786257902562938666": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "8284243114775216351": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17588749900110806571": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15418915313718368321": ["convolution_gpu_bfyx_os_iyx_osv16", 721],
+        "763194125654617818": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5660634357872541998": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16012873046323424192": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "3722402584962183950": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9761723873626289438": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4696863372127622823": ["convolution_gpu_bfyx_os_iyx_osv16", 341],
+        "3364509432107392704": ["convolution_gpu_bfyx_os_iyx_osv16", 349],
+        "17187804634689894363": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10681521954706351183": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4147438820393951383": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "11120743380724204067": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9774801800070756895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1998618394547230268": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "4933328578946081154": ["convolution_gpu_bfyx_os_iyx_osv16", 860],
+        "13882747247011638614": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "814582084353022226": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "4844820846457555156": ["convolution_gpu_bfyx_os_iyx_osv16", 509],
+        "6607603202773469786": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "15439502814859116813": ["convolution_gpu_bfyx_os_iyx_osv16", 167],
+        "15777107988701235428": ["convolution_gpu_bfyx_os_iyx_osv16", 499],
+        "12832042711454018844": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "6099745418702030715": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "4230880085403638923": ["convolution_gpu_bfyx_os_iyx_osv16", 46],
+        "62516450676185117": ["convolution_gpu_bfyx_os_iyx_osv16", 44],
+        "5638081054417809107": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16704551377771794086": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11414353004383751891": ["convolution_gpu_bfyx_f16", 8],
+        "13826353934358977360": ["convolution_gpu_bfyx_f16", 5],
+        "12571951090832825431": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "12750018695410865011": ["convolution_gpu_bfyx_f16", 8],
+        "6036780184043053863": ["convolution_gpu_bfyx_f16", 8],
+        "6704445240879304751": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6059617597062194696": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17195686088514144017": ["convolution_gpu_bfyx_f16", 6],
+        "6228695761133876306": ["convolution_gpu_bfyx_f16", 8],
+        "1875177778795651060": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1020688071038165625": ["convolution_gpu_bfyx_f16", 6],
+        "11609278929695762477": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "11618496013484392127": ["convolution_gpu_bfyx_f16", 7],
+        "9467068612251977759": ["convolution_gpu_bfyx_f16", 5],
+        "11965876788458629557": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1968426148563107280": ["convolution_gpu_bfyx_f16", 4],
+        "5809259008840872032": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16891389262193208125": ["convolution_gpu_bfyx_f16", 8],
+        "17789658392895927080": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13756435969613742897": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3851698237626497000": ["convolution_gpu_bfyx_os_iyx_osv16", 498],
+        "13501391260376277367": ["convolution_gpu_bfyx_os_iyx_osv16", 123],
+        "1765482196017051011": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12502159939277602017": ["convolution_gpu_bfyx_f16", 3],
+        "16051024745177409774": ["convolution_gpu_bfyx_f16", 4],
+        "13606942804997151903": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4351281046292319725": ["convolution_gpu_bfyx_f16", 4],
+        "6443607999496148234": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13547488471348547459": ["convolution_gpu_bfyx_f16", 4],
+        "18238745366827633559": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "11158062361663031443": ["convolution_gpu_bfyx_os_iyx_osv16", 416],
+        "9206861055140649226": ["convolution_gpu_bfyx_f16", 8],
+        "9426001650092504798": ["convolution_gpu_bfyx_f16", 2],
+        "13181672943699248834": ["convolution_gpu_bfyx_os_iyx_osv16", 383],
+        "6339523663850142246": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "1471109004832880586": ["convolution_gpu_bfyx_f16", 3],
+        "17107284393334082714": ["convolution_gpu_bfyx_f16", 6],
+        "13719359892110227962": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "9464351599302771690": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "2096653216949318450": ["convolution_gpu_bfyx_f16", 3],
+        "13025913519962707885": ["convolution_gpu_bfyx_f16", 7],
+        "17728310140731150226": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "5737189353417573057": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "3213984700185874261": ["convolution_gpu_bfyx_f16", 3],
+        "10740106091021667886": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "13362856801855126628": ["convolution_gpu_bfyx_os_iyx_osv16", 442],
+        "5477965717233241895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13669762279828807941": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11383807956757990177": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660099130061496863": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17151683028720387864": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1859914910272455189": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7396998153023492339": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2008700175670389343": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16827869183124732303": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13120889385491477637": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18305507733019922935": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4387964680811897490": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9490382148010824252": ["convolution_gpu_bfyx_os_iyx_osv16", 689],
+        "7607585452987307694": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6647358668213164168": ["convolution_gpu_bfyx_os_iyx_osv16", 693],
+        "3269426835760928022": ["convolution_gpu_bfyx_os_iyx_osv16", 690],
+        "8407302923973070317": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7392260165026897157": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17129583679506972654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15394113208725741887": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2232515974555590822": ["convolution_gpu_bfyx_os_iyx_osv16", 8],
+        "17180103562901495937": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16817205245313896299": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2495268194877370173": ["convolution_gpu_bfyx_f16", 7],
+        "12476976926994223419": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "837759583632984386": ["convolution_gpu_bfyx_f16", 8],
+        "15704905077262309915": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15294932718062276977": ["convolution_gpu_bfyx_f16", 8],
+        "4080044423867161503": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12782915336639648289": ["convolution_gpu_bfyx_f16", 5],
+        "6939516498492475263": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689321018957344059": ["convolution_gpu_bfyx_f16", 3],
+        "2757721937742809580": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10786200002789430346": ["convolution_gpu_bfyx_f16", 6],
+        "1941288041804222048": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14851676883700287486": ["convolution_gpu_bfyx_f16", 6],
+        "17430311645965116316": ["convolution_gpu_bfyx_f16", 6],
+        "3115685904789548595": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12312218395355058343": ["convolution_gpu_bfyx_f16", 6],
+        "17435783978159028678": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18104511008021666751": ["convolution_gpu_bfyx_f16", 7],
+        "2889130721514872852": ["convolution_gpu_bfyx_f16", 2],
+        "10924517066879469764": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16962109663829219905": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12727830299177939535": ["convolution_gpu_bfyx_f16", 8],
+        "14199062222704041939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10076860909609577057": ["convolution_gpu_bfyx_f16", 8],
+        "1776138842548256617": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12080107273581243331": ["convolution_gpu_bfyx_f16", 7],
+        "17797545214985482309": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1422959599890390628": ["convolution_gpu_bfyx_f16", 8],
+        "9838313987238017367": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5055964951388373312": ["convolution_gpu_bfyx_f16", 8],
+        "9954422981575375090": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6900498541045596449": ["convolution_gpu_bfyx_f16", 8],
+        "10947987508463792407": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8011212857567850331": ["convolution_gpu_bfyx_f16", 8],
+        "12066306068956923073": ["convolution_gpu_bfyx_f16", 8],
+        "10402257611113721897": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16298426629186155976": ["convolution_gpu_bfyx_f16", 6],
+        "5849544612077982343": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7486977943442929227": ["convolution_gpu_bfyx_f16", 6],
+        "4669548232510373224": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11178064889018543448": ["convolution_gpu_bfyx_os_iyx_osv16", 1042],
+        "14338047015194840420": ["convolution_gpu_bfyx_os_iyx_osv16", 290],
+        "11113611129372516159": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7571325526315806090": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "16051125771881231197": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "5500003724328450643": ["convolution_gpu_bfyx_os_iyx_osv16", 501],
+        "6990517414810688521": ["fully_connected_gpu_bf_io_gemm", 2],
+        "3365158575268504690": ["convolution_gpu_bfyx_os_iyx_osv16", 98],
+        "15532688375958629736": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "7172667569652614272": ["convolution_gpu_bfyx_os_iyx_osv16", 605],
+        "9852378413482765633": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "17891867756237002865": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "8518413618774363848": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "3035058890807107503": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10522964111588366077": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5091533143160590449": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1111],
+        "18403842741213451915": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "16363667733973120518": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17885073348446455401": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1110],
+        "11022756012642936369": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11139267075730841649": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11229901418407413996": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5222025157174261438": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 730],
+        "17449182536559459768": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6297704420477135889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11108691276983929466": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 735],
+        "17214808446370850848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10308273010954959421": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15889539072687412294": ["convolution_gpu_bfyx_gemm_like", 2],
+        "543890610580810398": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 662],
+        "10792988210112094339": ["convolution_gpu_bfyx_os_iyx_osv16", 662],
+        "7408203620228473987": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16676023485427668788": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 286],
+        "947940965229080670": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1038],
+        "13790640092608885830": ["convolution_gpu_bfyx_os_iyx_osv16", 861],
+        "6545311138362761303": ["convolution_gpu_bfyx_os_iyx_osv16", 885],
+        "5305325292949121227": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "12206315739377842316": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15794321689897308881": ["convolution_gpu_bfyx_f16", 5],
+        "5484125953239615763": ["convolution_gpu_bfyx_f16", 6],
+        "2757551509240446139": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18162571474251370775": ["convolution_gpu_bfyx_f16", 7],
+        "18166598730010472057": ["convolution_gpu_bfyx_f16", 7],
+        "9693459623757611016": ["convolution_gpu_bfyx_f16", 5],
+        "5728119660273315956": ["convolution_gpu_bfyx_f16", 4],
+        "11239914102833617438": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "12459704794510442759": ["convolution_gpu_bfyx_f16", 5],
+        "10085932287585840621": ["convolution_gpu_bfyx_f16", 6],
+        "4683959402324362591": ["convolution_gpu_bfyx_f16", 6],
+        "601777369358795451": ["convolution_gpu_bfyx_f16", 1],
+        "18164526837814844607": ["convolution_gpu_bfyx_f16", 7],
+        "10891482236028483911": ["convolution_gpu_bfyx_f16", 8],
+        "2288431871961311886": ["convolution_gpu_bfyx_f16", 8],
+        "8042721734241214802": ["convolution_gpu_bfyx_f16", 2],
+        "2099403897129551255": ["convolution_gpu_bfyx_f16", 6],
+        "13249070386604821754": ["convolution_gpu_bfyx_f16", 2],
+        "13094552025197588032": ["convolution_gpu_bfyx_f16", 7],
+        "3033228150494649847": ["convolution_gpu_bfyx_f16", 3],
+        "1886675028572526491": ["convolution_gpu_bfyx_f16", 8],
+        "13297546803430310514": ["convolution_gpu_bfyx_f16", 6],
+        "7841875474696309399": ["convolution_gpu_bfyx_f16", 8],
+        "18152244993328643321": ["convolution_gpu_bfyx_f16", 2],
+        "6360926220193053423": ["convolution_gpu_bfyx_f16", 8],
+        "17119700657499960250": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "10899267078041093597": ["convolution_gpu_bfyx_os_iyx_osv16", 44],
+        "11509503516680870396": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3553844546517243430": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "11739050017164389431": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "14683616789766294266": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1178443422000627700": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 741],
+        "3959894501921049830": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "6268257722565030993": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8104007721367839894": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11004242349744689661": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "18331651243656907622": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "165832937834890614": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13820132527548818114": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "11494973886338256684": ["convolution_gpu_bfyx_os_iyx_osv16", 598],
+        "9562717353252171645": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15182874743616431755": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "11923231799522030843": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "7212742683076043022": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "1535659774314187616": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9077124630226762093": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "10707439442194349922": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "13670707208998927662": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11898738546265963886": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "7218310781442328740": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 213],
+        "17307988793370069255": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "3159313229944494871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2202381460552007272": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 740],
+        "4539543204582046751": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "2922645767583925625": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "11165701472241951833": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "3582634693373659847": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "3334026180071867610": ["convolution_gpu_bfyx_os_iyx_osv16", 92],
+        "5443310231181579928": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "17203265678149575116": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "18001153514387944483": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "14678448066677992909": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1472673738079022921": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "9210929274479838540": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "14052560267577031250": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "11761558075765102945": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2994573423350313291": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "6446696801960621776": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "4993668527725303377": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16681164889734441913": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "5870803719794486347": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "5252877195442523975": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "17366351824112539739": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4000739627265205773": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "3021897915458395756": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "3587400134318800957": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "899884405480315978": ["convolution_gpu_bfyx_os_iyx_osv16", 471],
+        "16109177282570031068": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 94],
+        "4102196194477012012": ["convolution_gpu_bfyx_os_iyx_osv16", 84],
+        "6443971566937312874": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5730812864956211386": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 471],
+        "10903630002990314118": ["convolution_gpu_bfyx_f16", 7],
+        "14792133935314535772": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "134764196422339946": ["convolution_gpu_bfyx_f16", 8],
+        "7649098040464263012": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "8261353883745708993": ["convolution_gpu_bfyx_f16", 8],
+        "14729283399254215184": ["convolution_gpu_bfyx_f16", 8],
+        "15972034366129164791": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "6089665236185789777": ["convolution_gpu_bfyx_f16", 7],
+        "13871746223287309461": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10063803553810811685": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1872584393135018560": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5428450090197909187": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16371170442503065678": ["convolution_gpu_bfyx_f16", 8],
+        "4237307788889339587": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6443689845617564164": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1311581305426450842": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12519308309976060263": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9615782627992922213": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8051063619232397665": ["convolution_gpu_bfyx_f16", 8],
+        "10987585104127812498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7131640551183167105": ["convolution_gpu_bfyx_f16", 3],
+        "17528260968382789267": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3692483328113186067": ["convolution_gpu_bfyx_f16", 8],
+        "7694255321069379488": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3545269441923145336": ["convolution_gpu_bfyx_f16", 6],
+        "10643380013749923489": ["convolution_gpu_bfyx_f16", 4],
+        "13821946704646192935": ["convolution_gpu_bfyx_f16", 6],
+        "7150971004919685584": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7555796481960570354": ["convolution_gpu_bfyx_f16", 1],
+        "1802080211194796745": ["convolution_gpu_bfyx_f16", 5],
+        "11278616463993391107": ["convolution_gpu_bfyx_f16", 1],
+        "4522486456498017325": ["convolution_gpu_bfyx_f16", 1],
+        "10839722921299529226": ["convolution_gpu_bfyx_f16", 5],
+        "12049793935704273778": ["convolution_gpu_bfyx_f16", 2],
+        "11167394660860618324": ["convolution_gpu_bfyx_f16", 2],
+        "10589914405539478974": ["convolution_gpu_bfyx_f16", 0],
+        "1873986292070678779": ["convolution_gpu_bfyx_f16", 6],
+        "2954053167638478731": ["convolution_gpu_bfyx_f16", 3],
+        "4151068961170605556": ["convolution_gpu_bfyx_f16", 2],
+        "430695072683807402": ["convolution_gpu_bfyx_os_iyx_osv16", 756],
+        "2475680330312153399": ["convolution_gpu_bfyx_os_iyx_osv16", 380],
+        "4814090476154320391": ["convolution_gpu_bfyx_gemm_like", 1],
+        "401208792095350972": ["convolution_gpu_bfyx_os_iyx_osv16", 140],
+        "12980211839763569977": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "6011669866574390388": ["fully_connected_gpu_fb_oi_ref", 0],
+        "4568334008414745667": ["fully_connected_gpu_fb_oi_ref", 1],
+        "11395215181578068623": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14616145871710456304": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "5168719682914827724": ["convolution_gpu_bfyx_os_iyx_osv16", 495],
+        "9473263513191498949": ["convolution_gpu_bfyx_gemm_like", 0],
+        "10720631808458688474": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5434387853485184980": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "2668670046934680180": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6784038318046980185": ["convolution_gpu_bfyx_gemm_like", 0],
+        "6248879028648699716": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "1436424324238684653": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1852843918994539642": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1199836165181399413": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "609944608610496003": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14740129361300854586": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5500102903434438965": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "7297288884568452370": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5136459381906620211": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17411381157694639837": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "2491010747718166234": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8553537608760917592": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18310729590270667665": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18061582718156557458": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3957386760515436702": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "18218313235608627889": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "16069469614549557651": ["convolution_gpu_bfyx_gemm_like", 2],
+        "706526643700857104": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14937087468947592213": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10242452169628899571": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13635064319608016375": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "16629319403227634487": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5089311900051393846": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12955977963529216714": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9000599407449073799": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "4538238288532448191": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "5772569803234537608": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "12841353805697309892": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14614506535270942373": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3934913926529554178": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "5041391468298673889": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7553664247542433501": ["convolution_gpu_bfyx_f16", 3],
+        "11605857135211514409": ["convolution_gpu_bfyx_f16", 5],
+        "444971365656194402": ["convolution_gpu_bfyx_f16", 5],
+        "5717483566010225296": ["convolution_gpu_bfyx_f16", 5],
+        "4288000530029758806": ["convolution_gpu_bfyx_f16", 6],
+        "10127890223685122341": ["convolution_gpu_bfyx_f16", 1],
+        "9968769246349627476": ["convolution_gpu_bfyx_f16", 3],
+        "6199309548387501256": ["convolution_gpu_bfyx_f16", 5],
+        "12620136462430164778": ["convolution_gpu_bfyx_f16", 2],
+        "4030102215743087748": ["convolution_gpu_bfyx_f16", 4],
+        "5207630762737842899": ["convolution_gpu_bfyx_f16", 5],
+        "13353279607627735162": ["convolution_gpu_bfyx_f16", 2],
+        "12545074411559266651": ["convolution_gpu_bfyx_os_iyx_osv16", 14],
+        "11750405629109652478": ["convolution_gpu_bfyx_os_iyx_osv16", 451],
+        "4119964432511449865": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "13746113667444417879": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "10795302670177759469": ["convolution_gpu_bfyx_os_iyx_osv16", 77],
+        "13013373169763193744": ["convolution_gpu_bfyx_os_iyx_osv16", 77],
+        "4192778340765412918": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "1376358627098743825": ["fully_connected_gpu_bf_io_gemm", 2],
+        "1657296775356261285": ["fully_connected_gpu_bf_io_gemm", 0],
+        "5000052273967689626": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3550687136164360833": ["convolution_gpu_bfyx_os_iyx_osv16", 506],
+        "11872436735917473629": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15213766544697966206": ["convolution_gpu_bfyx_os_iyx_osv16", 138],
+        "2360564651185435605": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "8722771796488042004": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "8089184198607217332": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "4031469107004893821": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "16899210497921809352": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "2002591318101502434": ["fully_connected_gpu_bf_io_gemm", 1],
+        "8508119169246513026": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13461678175466315866": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1580848418974169308": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13189391944650202330": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5850612837647497531": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12734736056404146766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3072344987020666532": ["convolution_gpu_bfyx_os_iyx_osv16", 241],
+        "5932710369376133446": ["convolution_gpu_bfyx_os_iyx_osv16", 241],
+        "15493383292734604744": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8721087995946196075": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "16124622994105864663": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "1289727743091243002": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "13254721852483301327": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14370151670822727933": ["convolution_gpu_bfyx_f16", 8],
+        "15924942580474924301": ["convolution_gpu_bfyx_f16", 8],
+        "8623363904987921339": ["convolution_gpu_bfyx_f16", 8],
+        "14279463126817989625": ["convolution_gpu_bfyx_f16", 8],
+        "5771041403997282348": ["convolution_gpu_bfyx_f16", 7],
+        "13460126563546214581": ["convolution_gpu_bfyx_f16", 8],
+        "12781821122129489865": ["convolution_gpu_bfyx_f16", 8],
+        "5488105527272322800": ["convolution_gpu_bfyx_f16", 6],
+        "17828931984028343371": ["convolution_gpu_bfyx_f16", 8],
+        "12277484678078733815": ["convolution_gpu_bfyx_f16", 8],
+        "4379377123702223052": ["convolution_gpu_bfyx_f16", 6],
+        "13844007082423168759": ["convolution_gpu_bfyx_f16", 8],
+        "3449674399921465807": ["convolution_gpu_bfyx_f16", 8],
+        "8965134305331582692": ["convolution_gpu_bfyx_f16", 6],
+        "15610672058730770735": ["convolution_gpu_bfyx_f16", 8],
+        "5896690968165005425": ["convolution_gpu_bfyx_f16", 8],
+        "3524820477574731101": ["convolution_gpu_bfyx_f16", 8],
+        "18408976645775193874": ["convolution_gpu_bfyx_f16", 3],
+        "153117019091512087": ["convolution_gpu_bfyx_f16", 5],
+        "8416875419376211043": ["convolution_gpu_bfyx_f16", 8],
+        "8583589102830838750": ["convolution_gpu_bfyx_f16", 4],
+        "3120895120217288923": ["convolution_gpu_bfyx_f16", 8],
+        "15703787067071726819": ["convolution_gpu_bfyx_f16", 8],
+        "15071840228430286000": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10604301655933773079": ["convolution_gpu_bfyx_f16", 8],
+        "5325610794742202015": ["convolution_gpu_bfyx_f16", 8],
+        "17964375317877714914": ["convolution_gpu_bfyx_f16", 3],
+        "12053395569113050851": ["convolution_gpu_bfyx_f16", 5],
+        "5324438133148786955": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11947915845132871271": ["convolution_gpu_bfyx_f16", 6],
+        "7174430747851522735": ["convolution_gpu_bfyx_f16", 8],
+        "10817252888446193325": ["convolution_gpu_bfyx_f16", 7],
+        "17227836941611747083": ["convolution_gpu_bfyx_f16", 7],
+        "4263250141898433551": ["convolution_gpu_bfyx_f16", 8],
+        "15197397105147240256": ["convolution_gpu_bfyx_f16", 8],
+        "16826153050492358220": ["convolution_gpu_bfyx_f16", 8],
+        "6280191601341007993": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14088402405545946752": ["convolution_gpu_bfyx_f16", 8],
+        "11924657120112219404": ["convolution_gpu_bfyx_f16", 8],
+        "2913643747915656322": ["convolution_gpu_bfyx_f16", 7],
+        "5972050422651513452": ["convolution_gpu_bfyx_f16", 8],
+        "594987933637320500": ["convolution_gpu_bfyx_f16", 8],
+        "9117780867293066941": ["convolution_gpu_bfyx_f16", 7],
+        "10537265908618264344": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "516889867886123061": ["convolution_gpu_bfyx_f16", 8],
+        "10843546687065559705": ["convolution_gpu_bfyx_f16", 8],
+        "3218921047283180399": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "4228809976880203196": ["convolution_gpu_bfyx_f16", 8],
+        "1254993062868768184": ["convolution_gpu_bfyx_f16", 8],
+        "9020025051393195609": ["convolution_gpu_bfyx_f16", 6],
+        "3651025785330045688": ["convolution_gpu_bfyx_f16", 8],
+        "7338126575707982952": ["convolution_gpu_bfyx_f16", 8],
+        "8920269652980704805": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11886205472834821311": ["convolution_gpu_bfyx_f16", 8],
+        "16386706804437469983": ["convolution_gpu_bfyx_f16", 8],
+        "10891126431353150021": ["convolution_gpu_bfyx_f16", 4],
+        "1543362854403350459": ["convolution_gpu_bfyx_f16", 4],
+        "355779126755706267": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5058842932401709044": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3474787752627590416": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "11397785525222803208": ["convolution_gpu_bfyx_f16", 8],
+        "17189570066626092769": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "5416759191132692795": ["convolution_gpu_bfyx_f16", 8],
+        "15779223980784666571": ["convolution_gpu_bfyx_f16", 8],
+        "9876867732461890358": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4395456194321195850": ["convolution_gpu_bfyx_f16", 8],
+        "17305875411294121419": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14177925973191748560": ["convolution_gpu_bfyx_f16", 8],
+        "11226417030784059608": ["convolution_gpu_bfyx_f16", 8],
+        "13701064960014710666": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7578986390590629232": ["convolution_gpu_bfyx_f16", 6],
+        "18249884212717189127": ["convolution_gpu_bfyx_f16", 8],
+        "15067786896746843371": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9401123449386398137": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16048844891250863961": ["convolution_gpu_bfyx_f16", 8],
+        "6728497031007384694": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2516044816386401962": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16648925195162114604": ["convolution_gpu_bfyx_f16", 7],
+        "5525235637767568664": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1390363802044780888": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4660585027739905927": ["convolution_gpu_bfyx_f16", 6],
+        "13129095945962624168": ["convolution_gpu_bfyx_f16", 8],
+        "12788331791832647513": ["convolution_gpu_bfyx_f16", 8],
+        "14679708279498263758": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11429404684477518332": ["convolution_gpu_bfyx_f16", 8],
+        "14865242305011765347": ["convolution_gpu_bfyx_f16", 8],
+        "3336471572478290650": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11500634857451727324": ["convolution_gpu_bfyx_f16", 6],
+        "11595683408837949967": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3687032763857590951": ["convolution_gpu_bfyx_f16", 4],
+        "6217128641000145091": ["convolution_gpu_bfyx_f16", 4],
+        "16821549364716957301": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15302171095409526152": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11864034994554724984": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14870500658715304450": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17166137878933261586": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4747051200454897627": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "6103297927282359131": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12354577265502528987": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5629707522150533470": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "4669341675413375412": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5824689503286977960": ["convolution_gpu_bfyx_os_iyx_osv16", 1089],
+        "1546913508270024508": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12481630413997884765": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12369473519508202883": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "11638732022457475943": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12849186218218397773": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "6223786629523546158": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "16409268476361727461": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5980922898957821133": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "7460501275697392871": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "18201105716376505767": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12988798514363758096": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7520277356070624225": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "16428007036508355221": ["convolution_gpu_bfyx_os_iyx_osv16", 673],
+        "13507669105800739346": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6145360746375638990": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1961091377024077375": ["convolution_gpu_bfyx_os_iyx_osv16", 724],
+        "10172928736166564721": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "17782881999244653954": ["convolution_gpu_bfyx_os_iyx_osv16", 725],
+        "6247677935035843769": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1313412961572873590": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8815917103337680437": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "2588641648834517674": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13375988416811654681": ["convolution_gpu_bfyx_f16", 8],
+        "639248689874473989": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "18230323277806031379": ["convolution_gpu_bfyx_f16", 8],
+        "9840337783900152749": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "9675828062182624375": ["convolution_gpu_bfyx_f16", 8],
+        "9377492070079649297": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7112970727457201985": ["convolution_gpu_bfyx_f16", 7],
+        "11374310483937359607": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10352635863591448343": ["convolution_gpu_bfyx_f16", 7],
+        "6800759166076075555": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16002017704446988989": ["convolution_gpu_bfyx_f16", 7],
+        "18425882095713222772": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17369116353245315394": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "7965652103961413110": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "541252306015518029": ["convolution_gpu_bfyx_f16", 6],
+        "1060404712024230017": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3993499882812656917": ["convolution_gpu_bfyx_f16", 7],
+        "17846701225707639413": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13408643537227938026": ["convolution_gpu_bfyx_f16", 7],
+        "9522850710190578404": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12574188065500921524": ["convolution_gpu_bfyx_f16", 8],
+        "3789554453839587972": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5299625387052100099": ["convolution_gpu_bfyx_f16", 6],
+        "336892846225020589": ["convolution_gpu_bfyx_f16", 6],
+        "5185829229139389046": ["convolution_gpu_bfyx_f16", 5],
+        "2831646144717760351": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18424570657159231491": ["convolution_gpu_bfyx_f16", 5],
+        "9205364888756552960": ["convolution_gpu_bfyx_f16", 3],
+        "3887640479401316139": ["convolution_gpu_bfyx_os_iyx_osv16", 48],
+        "6656241698352770423": ["convolution_gpu_bfyx_f16", 3],
+        "7811986603236019243": ["convolution_gpu_bfyx_f16", 2],
+        "9876053612488794566": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16267089649659533695": ["convolution_gpu_bfyx_f16", 2],
+        "16995874808271476239": ["convolution_gpu_bfyx_f16", 2],
+        "9563406721506757343": ["convolution_gpu_bfyx_os_iyx_osv16", 13],
+        "5267299357355370554": ["convolution_gpu_bfyx_f16", 1],
+        "6658762342071792814": ["convolution_gpu_bfyx_f16", 2],
+        "3847156373714139957": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "2382572392955625224": ["convolution_gpu_bfyx_f16", 1],
+        "15376846894059882538": ["convolution_gpu_bfyx_f16", 6],
+        "4522481920910455569": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "13437481345646491476": ["convolution_gpu_bfyx_f16", 1],
+        "903637112880415287": ["convolution_gpu_bfyx_f16", 2],
+        "11379365004693699817": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "4598588079263356267": ["convolution_gpu_bfyx_f16", 7],
+        "3642481197610192005": ["convolution_gpu_bfyx_f16", 3],
+        "12423446124851974206": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "11020339094339633876": ["convolution_gpu_bfyx_f16", 8],
+        "1366222232036504221": ["convolution_gpu_bfyx_f16", 5],
+        "7837288500475798381": ["convolution_gpu_bfyx_os_iyx_osv16", 64],
+        "2779831597589397721": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14888498856025675875": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13008816286946828339": ["convolution_gpu_bfyx_os_iyx_osv16", 131],
+        "14472562307183930494": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "12260051528344627305": ["convolution_gpu_bfyx_os_iyx_osv16", 877],
+        "12237139830764526217": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "12839904859734107448": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "2557331839687658350": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "14711934417369240383": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "7324956106181658437": ["convolution_gpu_bfyx_f16", 8],
+        "11516100376069186015": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "75988560390265531": ["convolution_gpu_bfyx_f16", 8],
+        "7993548757830399994": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13514240768023629554": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13043388032264307920": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3870546364113237300": ["convolution_gpu_bfyx_f16", 7],
+        "260191733589958832": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15013159908977981805": ["convolution_gpu_bfyx_f16", 6],
+        "9337772532306485903": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "2586645227127931947": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10949794786261718674": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "4075981715729743261": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5260474776491928924": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1199570205321806135": ["convolution_gpu_bfyx_f16", 6],
+        "11375048135247711028": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2566520237302171109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3394745723753563598": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9187084522252003753": ["convolution_gpu_bfyx_f16", 8],
+        "5354859258229104455": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6459996129125419168": ["convolution_gpu_bfyx_f16", 7],
+        "1480958967678326823": ["convolution_gpu_bfyx_os_iyx_osv16", 804],
+        "15068007241112743131": ["convolution_gpu_bfyx_f16", 7],
+        "618975323495168026": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9529518231093074440": ["convolution_gpu_bfyx_f16", 3],
+        "15305384015295940803": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "5539082047551617378": ["convolution_gpu_bfyx_f16", 4],
+        "12707656392447062040": ["convolution_gpu_bfyx_f16", 1],
+        "11606382700107557730": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "12969132519312136058": ["convolution_gpu_bfyx_f16", 7],
+        "842687355344268246": ["convolution_gpu_bfyx_f16", 3],
+        "2656665532089288876": ["convolution_gpu_bfyx_os_iyx_osv16", 376],
+        "2150959290870604234": ["convolution_gpu_bfyx_os_iyx_osv16", 5],
+        "1544327503165399517": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2039872499448437447": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3991348982014278143": ["convolution_gpu_bfyx_f16", 8],
+        "7827718823084060727": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3681376157677527214": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16091816004313541827": ["convolution_gpu_bfyx_os_iyx_osv16", 469],
+        "8758769511439480688": ["convolution_gpu_bfyx_f16", 8],
+        "5753897782370339077": ["convolution_gpu_bfyx_f16", 8],
+        "776679759950033458": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15392321417589373062": ["convolution_gpu_bfyx_os_iyx_osv16", 93],
+        "16658067151111824217": ["convolution_gpu_bfyx_f16", 8],
+        "4205857284855499968": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16586084939410219119": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "6469574179117123167": ["convolution_gpu_bfyx_f16", 8],
+        "16376979276035825608": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8269248435817479295": ["convolution_gpu_bfyx_os_iyx_osv16", 55],
+        "1395225722502733977": ["convolution_gpu_bfyx_f16", 8],
+        "10013178580860124936": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10371314197191515": ["convolution_gpu_bfyx_os_iyx_osv16", 805],
+        "16689758532561396949": ["convolution_gpu_bfyx_f16", 8],
+        "6406311807883630817": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13420164150295884514": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17427856786317070333": ["convolution_gpu_bfyx_f16", 6],
+        "13045564637680284253": ["convolution_gpu_bfyx_f16", 1],
+        "10240575152538862347": ["convolution_gpu_bfyx_f16", 8],
+        "350106201615562244": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2408882959031808890": ["convolution_gpu_bfyx_f16", 8],
+        "13535264758398237992": ["convolution_gpu_bfyx_f16", 8],
+        "8019110633749314726": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17143135464144572440": ["convolution_gpu_bfyx_f16", 6],
+        "7164571433420538604": ["convolution_gpu_bfyx_f16", 8],
+        "759843595724886461": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8349697007955129262": ["convolution_gpu_bfyx_f16", 7],
+        "4612125500221158849": ["convolution_gpu_bfyx_f16", 7],
+        "1613453471546288707": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18082453876418923256": ["convolution_gpu_bfyx_f16", 7],
+        "2573494879369122465": ["convolution_gpu_bfyx_f16", 8],
+        "13178243377999862677": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14420748195672461101": ["convolution_gpu_bfyx_f16", 8],
+        "3780553431140772654": ["convolution_gpu_bfyx_f16", 8],
+        "7022912357125866065": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "21159126741870541": ["convolution_gpu_bfyx_f16", 8],
+        "6041100151446003929": ["convolution_gpu_bfyx_f16", 8],
+        "13214853558757669358": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12243903767779308254": ["convolution_gpu_bfyx_f16", 4],
+        "7684654778898882658": ["convolution_gpu_bfyx_f16", 8],
+        "10861634955820547836": ["convolution_gpu_bfyx_f16", 7],
+        "10937719522646877794": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3996245434452465017": ["convolution_gpu_bfyx_f16", 3],
+        "3949209947065694155": ["convolution_gpu_bfyx_f16", 6],
+        "7904844949382399644": ["convolution_gpu_bfyx_f16", 7],
+        "15791093795050497196": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1904667993331130850": ["convolution_gpu_bfyx_f16", 7],
+        "2007864734836080416": ["convolution_gpu_bfyx_f16", 7],
+        "1504595473072178549": ["fused_conv_eltwise_gpu_ref", 1],
+        "5321304003293302339": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6391803599307981783": ["convolution_gpu_bfyx_f16", 7],
+        "12753622180051769374": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1418471439817460893": ["convolution_gpu_bfyx_f16", 8],
+        "14007410751094259041": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6000600601103213217": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15944256561905190998": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9477177759691465931": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6061863826293001749": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5656695882306435761": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2754361133104409608": ["convolution_gpu_bfyx_f16", 6],
+        "16573836003993562922": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14361485468636686919": ["convolution_gpu_bfyx_f16", 7],
+        "13290181949279279819": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11573991700888599299": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3211944296604564565": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15416497136172272973": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13727677413762125787": ["convolution_gpu_bfyx_f16", 6],
+        "223582465360062033": ["convolution_gpu_bfyx_os_iyx_osv16", 203],
+        "4556622765359080875": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8367602301888142902": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5551657483299070298": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16580040324898643414": ["convolution_gpu_bfyx_f16", 3],
+        "3400482857331511907": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "12745087061597212091": ["convolution_gpu_bfyx_f16", 6],
+        "2427753259358876751": ["convolution_gpu_bfyx_f16", 8],
+        "14928919742046692806": ["convolution_gpu_bfyx_f16", 3],
+        "1086083190921173575": ["convolution_gpu_bfyx_f16", 1],
+        "7864788493798910665": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "419863291071220231": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18341088182899934803": ["convolution_gpu_bfyx_f16", 1],
+        "13936757228914648511": ["convolution_gpu_bfyx_f16", 1],
+        "3620847974583296219": ["convolution_gpu_bfyx_f16", 1],
+        "17043718605372433771": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "9491918828755162849": ["convolution_gpu_bfyx_f16", 8],
+        "5589151165477337040": ["convolution_gpu_bfyx_f16", 2],
+        "10228757350181746895": ["convolution_gpu_bfyx_f16", 1],
+        "1162872816262341718": ["convolution_gpu_bfyx_os_iyx_osv16", 6],
+        "16649287498467160559": ["convolution_gpu_bfyx_os_iyx_osv16", 494],
+        "7586799959717044009": ["convolution_gpu_bfyx_os_iyx_osv16", 486],
+        "9084083435358099350": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "9376423029944831246": ["convolution_gpu_bfyx_os_iyx_osv16", 119],
+        "705554998369361805": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "6245817051936572651": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "1323301183115767024": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "2507934590678243268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14525834002536696135": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "5474206322525908485": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "11515796160198158378": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "3132722606109144321": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14710881631609824591": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "11775792831795740823": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "5638724202264796275": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6886956365972144464": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "7665666161071576188": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 740],
+        "5417472746527799111": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "2594584321222202684": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "922877204324354246": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "7350126920223776235": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16303836867197711105": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7867449406763358779": ["convolution_gpu_bfyx_os_iyx_osv16", 237],
+        "15073897412776684512": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7483651542476843520": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15903567343998302316": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "15583443924198345750": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "17843088220627815484": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4632062586003457136": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "10545129526795036329": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "8799068442641712278": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14112582871403839539": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "1537120727711441803": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "6319422805741168410": ["convolution_gpu_bfyx_os_iyx_osv16", 469],
+        "4932803782521646509": ["convolution_gpu_bfyx_os_iyx_osv16", 1108],
+        "693915964507763961": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "12231852642666528690": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "6327608958004075948": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10481749345430191494": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "3465618418555443152": ["convolution_gpu_bfyx_os_iyx_osv16", 119],
+        "6220132353152696371": ["convolution_gpu_bfyx_os_iyx_osv16", 487],
+        "767822057476164981": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9047957325396112699": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "4356441299961129632": ["convolution_gpu_bfyx_os_iyx_osv16", 191],
+        "10144632434338007132": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "15158722447225497040": ["convolution_gpu_bfyx_os_iyx_osv16", 994],
+        "14636891429613595743": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "10686925946858146532": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "8212789547545225423": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "11769756626318373236": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5110309993577022127": ["convolution_gpu_bfyx_os_iyx_osv16", 83],
+        "2562131945197556573": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "10704041599214066504": ["convolution_gpu_bfyx_f16", 8],
+        "10613229998051250501": ["convolution_gpu_bfyx_f16", 8],
+        "11371787826925681911": ["convolution_gpu_bfyx_f16", 8],
+        "1813150318517555729": ["convolution_gpu_bfyx_f16", 8],
+        "2771555413518577061": ["convolution_gpu_bfyx_f16", 7],
+        "5185490410687016716": ["convolution_gpu_bfyx_f16", 8],
+        "7950736292930841432": ["convolution_gpu_bfyx_f16", 8],
+        "8261743217235812905": ["convolution_gpu_bfyx_f16", 6],
+        "8477837540026813338": ["convolution_gpu_bfyx_f16", 7],
+        "7870792155742596714": ["convolution_gpu_bfyx_f16", 7],
+        "877301692476873394": ["convolution_gpu_bfyx_f16", 7],
+        "13056385937425838233": ["convolution_gpu_bfyx_f16", 8],
+        "8845096601815863972": ["convolution_gpu_bfyx_f16", 6],
+        "14676936757685089287": ["convolution_gpu_bfyx_f16", 8],
+        "5137720027289968571": ["convolution_gpu_bfyx_f16", 8],
+        "16357238101987779826": ["convolution_gpu_bfyx_f16", 7],
+        "9042736284060217631": ["convolution_gpu_bfyx_f16", 8],
+        "1962817966750882229": ["convolution_gpu_bfyx_gemm_like", 2],
+        "757414390636970088": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "4392731931266884279": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14949235924854278221": ["convolution_gpu_bfyx_os_iyx_osv16", 229],
+        "8109779592266289481": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "12865143364214858603": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "15067445793956191132": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5644043280715935432": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "15401918726121762363": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "9239825223929080442": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "16388921203426413956": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "6777503252254723020": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "11128087433951850083": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "5141881043179760550": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "12802295324029349931": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "15431628897951881935": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "3589251091292907354": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "6054584798362533079": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "2027062613896109334": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "2494989528221736054": ["convolution_gpu_bfyx_f16", 3],
+        "10481457184081052557": ["convolution_gpu_bfyx_f16", 3],
+        "17843566914419305583": ["convolution_gpu_bfyx_f16", 8],
+        "10440359951914302042": ["convolution_gpu_bfyx_f16", 5],
+        "12355534646291322950": ["convolution_gpu_bfyx_f16", 3],
+        "1312046147551402733": ["convolution_gpu_bfyx_f16", 4],
+        "17747064821498992452": ["convolution_gpu_bfyx_f16", 1],
+        "15727623554601964014": ["convolution_gpu_bfyx_f16", 4],
+        "1123438482147655288": ["convolution_gpu_bfyx_f16", 1],
+        "7126696940487701707": ["convolution_gpu_bfyx_f16", 3],
+        "3872390202906772826": ["convolution_gpu_bfyx_f16", 6],
+        "2880589787553789663": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "4631844879520026809": ["convolution_gpu_bfyx_os_iyx_osv16", 155],
+        "13844000686044797469": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "9270121824344599740": ["convolution_gpu_bfyx_os_iyx_osv16", 159],
+        "16709277754916245782": ["convolution_gpu_bfyx_os_iyx_osv16", 529],
+        "17921065014385217728": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "13953277739655839946": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "365791512696267923": ["convolution_gpu_bfyx_os_iyx_osv16", 157],
+        "1141261355712926031": ["convolution_gpu_bfyx_os_iyx_osv16", 529],
+        "8813719449277469033": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "6092477671894277230": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8722892772000291602": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11520633390649939176": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "12980812349323846110": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7188357493962808046": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "11762345626350030823": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "5428672297616140288": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "10600353264973098791": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "15588761039208349307": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "16123871259057596631": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "14991936861614608527": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "5843416316523596635": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8982180163543777584": ["convolution_gpu_bfyx_os_iyx_osv16", 1104],
+        "9165342275276264623": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "8236012626172975207": ["convolution_gpu_bfyx_f16", 8],
+        "15754415694628496024": ["convolution_gpu_bfyx_f16", 8],
+        "17406936341866296662": ["convolution_gpu_bfyx_f16", 8],
+        "6529037726400888172": ["convolution_gpu_bfyx_f16", 7],
+        "9846172794997922278": ["convolution_gpu_bfyx_f16", 8],
+        "9493935898386112535": ["convolution_gpu_bfyx_f16", 8],
+        "7306292618362586227": ["convolution_gpu_bfyx_f16", 8],
+        "14967562562496852635": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9383921996276611774": ["convolution_gpu_bfyx_f16", 8],
+        "11168147969470549122": ["convolution_gpu_bfyx_f16", 8],
+        "14053664169544819760": ["convolution_gpu_bfyx_f16", 6],
+        "9184733790339562280": ["convolution_gpu_bfyx_f16", 8],
+        "10136778987522491016": ["convolution_gpu_bfyx_f16", 8],
+        "17355459669917546629": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14199620063459632318": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9059694442320091375": ["convolution_gpu_bfyx_f16", 8],
+        "3777808646279316435": ["convolution_gpu_bfyx_f16", 8],
+        "5077764668758006577": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15528598957598089701": ["convolution_gpu_bfyx_f16", 8],
+        "4910602340026479089": ["convolution_gpu_bfyx_f16", 8],
+        "3460615852926593636": ["convolution_gpu_bfyx_f16", 8],
+        "7051704960834828963": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "5849502570947855625": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17320230733736402509": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "14376448497282593859": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11059091112167439040": ["convolution_gpu_bfyx_os_iyx_osv16", 220],
+        "11073613812342958769": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4154541958145867375": ["convolution_gpu_bfyx_os_iyx_osv16", 595],
+        "11497596156215746295": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7989457597882264703": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11224449857742374449": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8019330764912846895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8298488609133255406": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14393217564854520848": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "3141906957984957990": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "8411633870815503324": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "17289238208820562994": ["convolution_gpu_bfyx_os_iyx_osv16", 993],
+        "17376882838565917025": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "376447867595880925": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9223591734176279618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2123481240130017671": ["convolution_gpu_bfyx_gemm_like", 0],
+        "60262519627721258": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5919114362027813213": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1357304910509750335": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2624254602965505549": ["convolution_gpu_bfyx_f16", 8],
+        "5577742374711315791": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "578315994260636114": ["convolution_gpu_bfyx_f16", 8],
+        "1262880924315152695": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6168533266847660009": ["convolution_gpu_bfyx_f16", 8],
+        "14627313247209797163": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1270860549971294137": ["convolution_gpu_bfyx_f16", 8],
+        "4422458267180761143": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16820926361172105951": ["convolution_gpu_bfyx_f16", 8],
+        "7270466581298144020": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17886363415956316754": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "1392628448770002052": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6733088214815340670": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9311722977080169500": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17741687009005052531": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "16599775094194414107": ["convolution_gpu_bfyx_f16", 7],
+        "17406888356387369802": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14665993929606055479": ["convolution_gpu_bfyx_f16", 6],
+        "1257358912309769908": ["convolution_gpu_bfyx_f16", 1],
+        "10136222189601190652": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3326350735262959593": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2567573000230960427": ["convolution_gpu_bfyx_f16", 6],
+        "1291883454136679475": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4612488935509382461": ["convolution_gpu_bfyx_f16", 8],
+        "44873969645629501": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2578924800298320995": ["convolution_gpu_bfyx_f16", 8],
+        "13431627645625703425": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3411924982644761856": ["convolution_gpu_bfyx_f16", 8],
+        "8768537636114686671": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7896309749934295024": ["convolution_gpu_bfyx_f16", 8],
+        "8325953452731944450": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "429170609380225181": ["convolution_gpu_bfyx_f16", 8],
+        "2114765225420794471": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18249809691677461763": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3183599956647450025": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9082014942562277789": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10836200599982993668": ["convolution_gpu_bfyx_f16", 7],
+        "12864139447025655415": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12523250500313140847": ["convolution_gpu_bfyx_f16", 6],
+        "12000695135118665982": ["convolution_gpu_bfyx_os_iyx_osv16", 724],
+        "742817882542885943": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4002541142367187136": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18334702187170194234": ["convolution_gpu_bfyx_f16", 7],
+        "3948406171098526504": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6872860675057100662": ["convolution_gpu_bfyx_f16", 8],
+        "17681970169299053286": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9324781373847684086": ["convolution_gpu_bfyx_f16", 8],
+        "4773059056534245515": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6177289206979472775": ["convolution_gpu_bfyx_f16", 8],
+        "13523041584984452151": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15908438282639350074": ["convolution_gpu_bfyx_f16", 8],
+        "6449849483344573800": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "874369470058074151": ["convolution_gpu_bfyx_f16", 8],
+        "9693986962988023660": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7166692858921939993": ["convolution_gpu_bfyx_f16", 8],
+        "16550351471125114158": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1398899063819018467": ["convolution_gpu_bfyx_f16", 8],
+        "18211571181565238164": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "6787622700852474159": ["convolution_gpu_bfyx_f16", 8],
+        "9330332380446446861": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3352821985265666302": ["convolution_gpu_bfyx_f16", 7],
+        "13446420473387679707": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10671502596789907716": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "10272839156464101832": ["convolution_gpu_bfyx_os_iyx_osv16", 254],
+        "2437761452427288852": ["convolution_gpu_bfyx_os_iyx_osv16", 172],
+        "10328358317722308811": ["convolution_gpu_bfyx_os_iyx_osv16", 242],
+        "8161893965853155550": ["convolution_gpu_bfyx_os_iyx_osv16", 254],
+        "10517584909517952169": ["convolution_gpu_bfyx_os_iyx_osv16", 997],
+        "16089431087164898643": ["convolution_gpu_bfyx_os_iyx_osv16", 841],
+        "18446632461258501693": ["fully_connected_gpu_bf_io_gemm", 2],
+        "16241580627391428048": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "12772269695217889469": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "11842608083285317080": ["convolution_gpu_bfyx_f16", 8],
+        "47254024987636917": ["convolution_gpu_bfyx_f16", 7],
+        "12203188887109457648": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10190268798161660806": ["convolution_gpu_bfyx_f16", 6],
+        "4893322057900198637": ["convolution_gpu_bfyx_f16", 8],
+        "14205587855693463012": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17828689021236060785": ["convolution_gpu_bfyx_f16", 8],
+        "5304616674637388104": ["convolution_gpu_bfyx_f16", 8],
+        "13774415976138800103": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16917839753648589507": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14219521195638380227": ["convolution_gpu_bfyx_f16", 8],
+        "1124640128461683757": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3648821283586419588": ["convolution_gpu_bfyx_f16", 8],
+        "8994225239524823748": ["convolution_gpu_bfyx_f16", 8],
+        "5364969049701663909": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10800673441196722081": ["convolution_gpu_bfyx_f16", 8],
+        "8758440761491268201": ["convolution_gpu_bfyx_f16", 8],
+        "2681801788585835365": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10429066432771360180": ["convolution_gpu_bfyx_f16", 8],
+        "13396899929422166121": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14329490371610928743": ["convolution_gpu_bfyx_f16", 8],
+        "10157392835347093325": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16379903423760197202": ["convolution_gpu_bfyx_f16", 8],
+        "6051540118588283350": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12727529778630405959": ["convolution_gpu_bfyx_f16", 8],
+        "18203336007027481684": ["convolution_gpu_bfyx_f16", 7],
+        "13735637322201614021": ["convolution_gpu_bfyx_f16", 8],
+        "16203649874514419110": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4970939826398189012": ["convolution_gpu_bfyx_f16", 7],
+        "8872133219292246457": ["convolution_gpu_bfyx_f16", 6],
+        "674741146618474055": ["convolution_gpu_bfyx_f16", 8],
+        "2192109161538544571": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9459049194486919395": ["convolution_gpu_bfyx_f16", 8],
+        "10883566778144231615": ["convolution_gpu_bfyx_f16", 3],
+        "13142382655510339647": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15651803299019465587": ["convolution_gpu_bfyx_f16", 7],
+        "4098800631750270081": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1700646036054230529": ["convolution_gpu_bfyx_f16", 3],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16", 489],
+        "17663469192304546280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "16672038432561840773": ["convolution_gpu_bfyx_gemm_like", 2],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8328046766891245727": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "2862029728492027826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10094608033766589665": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "4938427667130309532": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "37017760060253822": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16044646335477470657": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "571521463360043149": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3579916582911190192": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3771003491521695667": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "15514370342945522276": ["convolution_gpu_bfyx_os_iyx_osv16", 979],
+        "14438262965335231630": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "18041311106624909689": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5471430682416582179": ["convolution_gpu_bfyx_os_iyx_osv16", 235],
+        "18264290105582283647": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17555564884839598291": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "15463465056816958579": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "614603377985036814": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "17657484186971431467": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "16966946384436994988": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8451212914744825089": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "5131348852069018593": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "13619081494170885939": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "14814906622813306907": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "10390270859807723238": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9729987752669765456": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "11111492998730881451": ["convolution_gpu_bfyx_os_iyx_osv16", 1122],
+        "8751030381556349657": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "3416294810798281053": ["convolution_gpu_bfyx_os_iyx_osv16", 1035],
+        "12066560812164094695": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "9332596500956923556": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "15067550526427941795": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "15428062440621131394": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "12832357598114345067": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1372767468794397354": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16", 349],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "14109435279166116002": ["convolution_gpu_bfyx_f16", 7],
+        "13459688909495870984": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13140258746301602394": ["convolution_gpu_bfyx_f16", 8],
+        "17781214375438792660": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "10287714400844285017": ["convolution_gpu_bfyx_f16", 8],
+        "1565634623724172264": ["convolution_gpu_bfyx_f16", 8],
+        "3176458788783865475": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16205415802333521877": ["convolution_gpu_bfyx_f16", 8],
+        "9501682982876002973": ["convolution_gpu_bfyx_f16", 6],
+        "6632679386692958385": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3606200006594557304": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9426719661295147907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9327423854596846454": ["convolution_gpu_bfyx_f16", 6],
+        "12461826750063163499": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10919680708143692288": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13973910769569755022": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "16651330026533439491": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14221540347396094429": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16715505791872304993": ["convolution_gpu_bfyx_f16", 6],
+        "18216528544556604342": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16146697325761324781": ["convolution_gpu_bfyx_f16", 6],
+        "9431016105508711343": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "2035553893876765347": ["convolution_gpu_bfyx_f16", 8],
+        "15357486621038352160": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5090963959865039880": ["convolution_gpu_bfyx_f16", 6],
+        "10683642935643589149": ["convolution_gpu_bfyx_f16", 4],
+        "17035059025727718755": ["convolution_gpu_bfyx_f16", 6],
+        "17686802379348903240": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "1168634671898399586": ["convolution_gpu_bfyx_f16", 5],
+        "10871963505418141901": ["convolution_gpu_bfyx_f16", 4],
+        "12780308533167351871": ["convolution_gpu_bfyx_f16", 1],
+        "17814572351621240649": ["convolution_gpu_bfyx_f16", 1],
+        "8199659032184139406": ["convolution_gpu_bfyx_f16", 1],
+        "17292147847795515942": ["convolution_gpu_bfyx_f16", 2],
+        "13050546314117448472": ["convolution_gpu_bfyx_f16", 1],
+        "15661919785407152450": ["convolution_gpu_bfyx_f16", 0],
+        "13361476452589900091": ["convolution_gpu_bfyx_f16", 8],
+        "6082362633323240591": ["convolution_gpu_bfyx_f16", 3],
+        "8046441445847114800": ["convolution_gpu_bfyx_f16", 8],
+        "2833200599027391230": ["convolution_gpu_bfyx_os_iyx_osv16", 381],
+        "12087098149473149843": ["convolution_gpu_bfyx_os_iyx_osv16", 5],
+        "10797803634452978403": ["convolution_gpu_bfyx_os_iyx_osv16", 470],
+        "1460198641822920308": ["convolution_gpu_bfyx_os_iyx_osv16", 139],
+        "7541313557160970669": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "11956882275779755540": ["fully_connected_gpu_fb_io_ref", 2],
+        "16260999652061745783": ["fully_connected_gpu_bfyx_ref", 2],
+        "8746885602394700190": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "9829405698318443184": ["convolution_gpu_bfyx_f16", 8],
+        "11076033670880910796": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16821706771740161698": ["convolution_gpu_bfyx_f16", 8],
+        "9820952616161226599": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8413361786588130499": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1954817953089924262": ["convolution_gpu_bfyx_f16", 8],
+        "1911974730574689700": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12327794035459404641": ["convolution_gpu_bfyx_f16", 8],
+        "13508363642895529597": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "18212917731579404065": ["convolution_gpu_bfyx_f16", 3],
+        "149782878608295661": ["convolution_gpu_bfyx_f16", 8],
+        "18278189975456459234": ["convolution_gpu_bfyx_f16", 4],
+        "5103958102100938512": ["convolution_gpu_bfyx_f16", 4],
+        "8327836810944774590": ["convolution_gpu_bfyx_f16", 6],
+        "17957554514796196053": ["convolution_gpu_bfyx_f16", 3],
+        "11235079901248304624": ["convolution_gpu_bfyx_f16", 4],
+        "12525888646035586976": ["convolution_gpu_bfyx_f16", 8],
+        "4250146685122778746": ["convolution_gpu_bfyx_f16", 4],
+        "345518063251891244": ["convolution_gpu_bfyx_f16", 3],
+        "4150442044954827851": ["convolution_gpu_bfyx_f16", 2],
+        "6010542147949689482": ["convolution_gpu_bfyx_f16", 3],
+        "3511884127716721063": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "17902687769380768374": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5123488908996247917": ["convolution_gpu_bfyx_os_iyx_osv16", 128],
+        "3854084472651875897": ["convolution_gpu_bfyx_os_iyx_osv16", 547],
+        "10803004054574179414": ["convolution_gpu_bfyx_os_iyx_osv16", 884],
+        "8761961047097571733": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "3227291961704566512": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "15502623218270238644": ["convolution_gpu_bfyx_os_iyx_osv16", 541],
+        "13533336063700080325": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "4583484812233029888": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8595156989254845134": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14493123117003003092": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "12372261924257291610": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "1547771611689525848": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15727110405754725012": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10890620280807224744": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "16079792265815446547": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "15384055407657760803": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2464531851392092325": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "5613964218561759893": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11460648773146310189": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6593870431636005244": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "11529036254499853035": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2726453304845436156": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "2607416795507802412": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "17010201596936918243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "8480598154536665021": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17881013712456488163": ["convolution_gpu_bfyx_os_iyx_osv16", 68],
+        "9336215801757107337": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "8174421295799601683": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "1967655354607438665": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "4972222030950072866": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "18113157997465675692": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1472667774257971884": ["convolution_gpu_bfyx_os_iyx_osv16", 828],
+        "7480855342650290772": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "17244746622354078542": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "251775001146378096": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "14235558866846276172": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "18066867692765966577": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "1264200731459756446": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "6968087469917482002": ["convolution_gpu_bfyx_os_iyx_osv16", 77],
+        "1607381610581485984": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "17234843749633035510": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "11516168882438876247": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "8919164618663601566": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "16853448010512574338": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "3010644722195354051": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "17062011653598617580": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "4614875083188849196": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "10859023312681572942": ["convolution_gpu_bfyx_os_iyx_osv16", 619],
+        "1377210419756613502": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17391465283540972493": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1514213112647467874": ["convolution_gpu_bfyx_os_iyx_osv16", 437],
+        "17268633106022870055": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8140122945471321201": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "15079423575410353790": ["convolution_gpu_bfyx_os_iyx_osv16", 997],
+        "13787398748724798340": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "9739119866883611322": ["convolution_gpu_bfyx_os_iyx_osv16", 815],
+        "7151167803631697120": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "16532386511585070092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910582540370962997": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12335148041391647118": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "10689880083512104726": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "8870164706606458004": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "9269498023794081940": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "6779832349039897240": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "13942354789498444722": ["convolution_gpu_bfyx_os_iyx_osv16", 477],
+        "14294764660016835141": ["convolution_gpu_bfyx_os_iyx_osv16", 871],
+        "12323510278692809329": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "5728070995112243570": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "5381496395266530071": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "9712640406795417230": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "15036737419347383878": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "11552594222313787816": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "9399255910184037480": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "10594581016504135920": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "15640487942881889055": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "14165417928501578590": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "12251989236991754721": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "6675363512560434713": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "9831713940431605743": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "6531349504807709133": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "2726501303929773572": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "10439704858943788014": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "18137994263450376706": ["convolution_gpu_bfyx_os_iyx_osv16", 58],
+        "5711991739289045727": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "15255831401757117660": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "3906658058160172747": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "15823433297099049221": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "7829483638597533960": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "14092273913846393837": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "3746578485711843646": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "12228183555926126959": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "8776893332387904786": ["convolution_gpu_bfyx_os_iyx_osv16", 808],
+        "16672299044236704672": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "13309889945947393850": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "15966815420067673043": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "7415938485228396256": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "9655590024687998403": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "14798289196964890724": ["convolution_gpu_bfyx_os_iyx_osv16", 56],
+        "9794684437872784678": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "16729204245488754836": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "15185983488152870534": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "13821372148587948765": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "4727004015814244856": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "1738348894912205653": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "559491455289877068": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "17312172687490475177": ["convolution_gpu_bfyx_os_iyx_osv16", 460],
+        "3470176432841342662": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "8950283515337670839": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "3995072673238444396": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "1238913228370790536": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "928677976151553489": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "4059887681292863495": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "5665180797552893949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7180904384828396567": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17041465029020839746": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8648502659728489503": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "2007359338465363037": ["convolution_gpu_bfyx_os_iyx_osv16", 110],
+        "16300204511212928772": ["convolution_gpu_bfyx_os_iyx_osv16", 589],
+        "10636266218009746496": ["convolution_gpu_bfyx_os_iyx_osv16", 591],
+        "17502734572225953539": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "9266211532252099402": ["fully_connected_gpu_fb_oi_ref", 0],
+        "6763848192987176713": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6123737429963241103": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "10102406370623883494": ["convolution_gpu_bfyx_os_iyx_osv16", 861],
+        "16125206369312086947": ["convolution_gpu_bfyx_os_iyx_osv16", 485],
+        "16927483709629289661": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "3196823812655863240": ["convolution_gpu_bfyx_os_iyx_osv16", 861],
+        "7968691295772769464": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "6100031133333761315": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "4055514200737135942": ["fully_connected_gpu_bfyx_ref", 1],
+        "18141581865855554514": ["convolution_gpu_bfyx_os_iyx_osv16", 485],
+        "16956102699411887521": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "11526253915485637934": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "15696872908795836832": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "15332512198621601617": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "5702206454207934253": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 730],
+        "15414564531144316178": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "386448290084824203": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15390537225231495870": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10038180349007230302": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 330],
+        "6817180081986948843": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1527649565538821618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7004336584711849988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2157468701794819044": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 733],
+        "15920115680945815097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1034],
+        "17778554668592635168": ["convolution_gpu_bfyx_os_iyx_osv16", 466],
+        "6999571050665340986": ["convolution_gpu_bfyx_os_iyx_osv16", 840],
+        "9879436330613366129": ["convolution_gpu_bfyx_gemm_like", 1],
+        "726019095679197164": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1865317677339946921": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12018933315566840474": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "7606282654661282476": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6201358671959761215": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4829111442270007186": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "7267651931396380072": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "1279682391530947146": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2655979063469551930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14425547983540742516": ["convolution_gpu_bfyx_gemm_like", 2],
+        "981419593633555198": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12324657364444167791": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3246153532847702583": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4202705710324555180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12272318018055307535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "396815044270978782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15633173680908856082": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16635731992372618666": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10418466892824851134": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "3244777852750357718": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "2443758478383854939": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "13503934436248311972": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "2594310972560076285": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2424349375092546581": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7104985983444651979": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "13518747015059826801": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "11675809062974151496": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "4725349695436675084": ["convolution_gpu_bfyx_os_iyx_osv16", 373],
+        "17351243519367619322": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17026338651868178077": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8730407034445893642": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "144434691308306757": ["convolution_gpu_bfyx_os_iyx_osv16", 375],
+        "4114184149613179671": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2558882920723584206": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "16481414687792927331": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "17756651805686889890": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "2228533392085335649": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "9038567144062573854": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1345293381483212104": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "729683192738752814": ["convolution_gpu_bfyx_os_iyx_osv16", 1114],
+        "458997435535883643": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "16955907389221472146": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "17927673764274384911": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "6418222853479731432": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7539191242110313918": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "18014188548165359278": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "16640379332042800496": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "14856197725306980283": ["convolution_gpu_bfyx_os_iyx_osv16", 976],
+        "9279474331309267880": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "5717588912072437191": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1143426643765799488": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "1049385516019456025": ["convolution_gpu_bfyx_os_iyx_osv16", 965],
+        "10766144770072425534": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "6442062011017461761": ["convolution_gpu_bfyx_os_iyx_osv16", 208],
+        "6063490496423709036": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "3892512749863226006": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4970240836537468609": ["convolution_gpu_bfyx_os_iyx_osv16", 962],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 0],
+        "2017817372328795772": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18312913026696855515": ["convolution_gpu_bfyx_os_iyx_osv16", 227],
+        "1323873987880062206": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "7947635298491683844": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3828289925836476678": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "10112041311060264798": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "7966725359592006848": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "2213697863012348994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5200128826708487987": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910238486908592807": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "13616909429370698140": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5170073622279980223": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7110283028091835342": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "16035239784731081694": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8190708817382075098": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "14088072670684726938": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "4594156436010043898": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "11599404585487705575": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "12238796233133147488": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "16062641979970268785": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "17970835612618431265": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "2793976170555467399": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "5268998395189523109": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "10247076603819003292": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "10411646581372174184": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "3783590807023839590": ["convolution_gpu_bfyx_os_iyx_osv16", 712],
+        "13040613656895011417": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "3426085674061936062": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "18191480673111859449": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "3168817659922190247": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "18315877695535348266": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "12547634427503359071": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "16329007163840646462": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "10029877845127663589": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "2314415797696124986": ["convolution_gpu_bfyx_os_iyx_osv16", 1088],
+        "16980380685273501504": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "3178865432099367094": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "14025615946937229331": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "9213611800089847066": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "16929122365386190391": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "2135878993442720196": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "9676824536524126662": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "920276615573431782": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "14160730014298968824": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17736530310730065811": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "2980714886349866400": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16634588113528268855": ["convolution_gpu_bfyx_os_iyx_osv16", 981],
+        "11974061312537998708": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "16035580169248458433": ["convolution_gpu_bfyx_os_iyx_osv16", 617],
+        "9866780121729912726": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "9774829335571618473": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12220806137793480020": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18351615003377381150": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "5523604552813225273": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "7679309022130741323": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "5318931986270088360": ["convolution_gpu_bfyx_gemm_like", 1],
+        "515117191459385744": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8719869282082754142": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "7982863980065943223": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11226945962148431484": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4241838582334505669": ["convolution_gpu_bfyx_gemm_like", 2],
+        "377042666741080260": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18145274589954906463": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6999860230736815298": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "16857606646270000245": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12338108420996610172": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10159450328554854004": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9170293267334520501": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "566685987437510322": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3194003345823695583": ["convolution_gpu_bfyx_os_iyx_osv16", 746],
+        "12107562407862382766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7161737091607459281": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9553813691004246971": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "10335630215626781232": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660045223846569448": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14844074799300904420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5366152766029340057": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "8299878919282539563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18146920703695658789": ["convolution_gpu_bfyx_os_iyx_osv16", 750],
+        "9019625678983697946": ["convolution_gpu_bfyx_os_iyx_osv16", 689],
+        "10578264750808095350": ["convolution_gpu_bfyx_os_iyx_osv16", 1070],
+        "17553228602707603911": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "11544029240137241864": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "3625681568469091400": ["convolution_gpu_bfyx_os_iyx_osv16", 750],
+        "8849298369373186729": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "10796031718453810929": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9053983956770697828": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "6446557539680352152": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1642704598828904520": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "8319779172385327650": ["convolution_gpu_bfyx_os_iyx_osv16", 625],
+        "11579387987720364831": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12754351323109225715": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1114],
+        "7903220569487431556": ["convolution_gpu_bfyx_os_iyx_osv16", 950],
+        "3905190080706902824": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8296759260312471619": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17301520533084822859": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1119],
+        "14740238736074743734": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "11837023395630571569": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18200031323963616161": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 364],
+        "4125453719396313232": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3653945386031463537": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "290357754290893078": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3852245179144851596": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13731852935536160843": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "8777588932609025138": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1316118918790851994": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "11178580933542373407": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 360],
+        "17878041282431477247": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18049861144026923516": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2141454343831534876": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9144400494257163130": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13190119938630028553": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "4903536862079845135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "15066104804156933222": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13457620264718125011": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1119],
+        "16436525035845780373": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 732],
+        "11501452337228727462": ["convolution_gpu_bfyx_os_iyx_osv16", 199],
+        "14843223893923209210": ["convolution_gpu_bfyx_os_iyx_osv16", 199],
+        "3403065541792865347": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "5747468958285466504": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17552192746313035704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4855884888715402777": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "6932556634380539441": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "9400396209180747044": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "10431774409348875623": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "9495099584417616887": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "9115704215611322151": ["convolution_gpu_bfyx_os_iyx_osv16", 739],
+        "11735107098356940998": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15204384674852423405": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16866113149488400688": ["convolution_gpu_bfyx_os_iyx_osv16", 147],
+        "15389774302738715375": ["convolution_gpu_bfyx_os_iyx_osv16", 627],
+        "8101177730804364242": ["convolution_gpu_bfyx_os_iyx_osv16", 148],
+        "10149791427786334512": ["convolution_gpu_bfyx_os_iyx_osv16", 630],
+        "11053198857132396443": ["convolution_gpu_bfyx_os_iyx_osv16", 283],
+        "3963577328998759824": ["fully_connected_gpu_fb_oi_ref", 2],
+        "800184023925596362": ["convolution_gpu_bfyx_os_iyx_osv16", 493],
+        "13839532421033004873": ["convolution_gpu_bfyx_os_iyx_osv16", 566],
+        "8262487256974801864": ["convolution_gpu_bfyx_os_iyx_osv16", 148],
+        "3693217331248996607": ["convolution_gpu_bfyx_os_iyx_osv16", 898],
+        "10388555096612441710": ["convolution_gpu_bfyx_os_iyx_osv16", 526],
+        "8892698757722619628": ["convolution_gpu_bfyx_os_iyx_osv16", 146],
+        "9606108204575763003": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "8449999818915991236": ["fully_connected_gpu_fb_io_ref", 2],
+        "7933040116770016066": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1919536721555752974": ["convolution_gpu_bfyx_os_iyx_osv16", 1088],
+        "10686800639842865597": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8687217977804450176": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3954066703109036822": ["convolution_gpu_bfyx_gemm_like", 1],
+        "723914723460931977": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11198516910049713685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1635320120115967164": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15344790681368521678": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "12844169781725567332": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17741034184665639196": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15923530138304858829": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10444674910548414627": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10302498589531075361": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4924266705550545296": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18358817826057771246": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5814292023792160102": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11190351855453911732": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9686754964115262880": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10699818671891976144": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11629568560686145289": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2754112975365662883": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14572211541644991947": ["convolution_gpu_bfyx_os_iyx_osv16", 49],
+        "15460159349027866277": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "1933120851078072002": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "15544724104656453486": ["convolution_gpu_bfyx_os_iyx_osv16", 359],
+        "9953946296788154289": ["convolution_gpu_bfyx_os_iyx_osv16", 592],
+        "5949275355217152112": ["convolution_gpu_bfyx_os_iyx_osv16", 227],
+        "9953648472305845286": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 366],
+        "4585615709600143734": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "5688607327240251933": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17872945111265083716": ["convolution_gpu_bfyx_os_iyx_osv16", 599],
+        "7002575346587056029": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "4053858347143322566": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "15684381282886192452": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9172655573618628060": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "10794126133490266436": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "13850228162972171575": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "129286539782466549": ["convolution_gpu_bfyx_os_iyx_osv16", 595],
+        "405864173902226347": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "11446357246069900060": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11612145813762780082": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15323010740285064115": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "9782042377801038578": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "15340106601175659588": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3000754961057044652": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13882543862049484032": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1037],
+        "459319667430150397": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12757674875116871887": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16119575123089076330": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "17015151842140598799": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "634038212244146017": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "3726173595578668243": ["convolution_gpu_bfyx_f16", 8],
+        "1069242824083103727": ["convolution_gpu_bfyx_f16", 8],
+        "10139803717927136766": ["convolution_gpu_bfyx_f16", 8],
+        "10426525571408284384": ["convolution_gpu_bfyx_f16", 8],
+        "6036447764961737632": ["convolution_gpu_bfyx_f16", 8],
+        "16859712173301423348": ["convolution_gpu_bfyx_f16", 8],
+        "4950939249231517650": ["convolution_gpu_bfyx_f16", 8],
+        "15428640534166306063": ["convolution_gpu_bfyx_f16", 8],
+        "12539440450141711052": ["convolution_gpu_bfyx_f16", 8],
+        "4694865878411993051": ["convolution_gpu_bfyx_f16", 8],
+        "7855581105034231853": ["convolution_gpu_bfyx_f16", 8],
+        "16357120378854173738": ["convolution_gpu_bfyx_f16", 8],
+        "9788176856201644185": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3526857091962358658": ["convolution_gpu_bfyx_f16", 8],
+        "9524927752153133377": ["convolution_gpu_bfyx_f16", 5],
+        "967593872851912083": ["convolution_gpu_bfyx_f16", 8],
+        "8544250266821361254": ["convolution_gpu_bfyx_f16", 5],
+        "14702583823206509221": ["convolution_gpu_bfyx_f16", 8],
+        "6562594370920553562": ["convolution_gpu_bfyx_f16", 5],
+        "4871626169134099270": ["convolution_gpu_bfyx_f16", 8],
+        "4306257530819109379": ["convolution_gpu_bfyx_f16", 5],
+        "13097490329579729355": ["convolution_gpu_bfyx_f16", 5],
+        "7536472342317469819": ["convolution_gpu_bfyx_f16", 8],
+        "17240729682157914878": ["convolution_gpu_bfyx_f16", 5],
+        "4338687769151300794": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9217611707355973890": ["convolution_gpu_bfyx_f16", 5],
+        "16565126239389697019": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9706046427344615745": ["convolution_gpu_bfyx_f16", 5],
+        "8724624785920420532": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3678291868919586746": ["convolution_gpu_bfyx_f16", 5],
+        "357806365552700839": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13063387805113848039": ["convolution_gpu_bfyx_f16", 5],
+        "1557184360709050836": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8608461026786312785": ["convolution_gpu_bfyx_f16", 5],
+        "9987273496502066597": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "64106675123073412": ["convolution_gpu_bfyx_f16", 5],
+        "4220695701755939736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12963348434542940033": ["convolution_gpu_bfyx_f16", 7],
+        "16181124988724765560": ["convolution_gpu_bfyx_f16", 4],
+        "346998321908284784": ["convolution_gpu_bfyx_f16", 6],
+        "2318421272788358186": ["convolution_gpu_bfyx_f16", 3],
+        "15927802155084275629": ["convolution_gpu_bfyx_f16", 6],
+        "8773070973133375779": ["convolution_gpu_bfyx_f16", 4],
+        "9940763571380473237": ["convolution_gpu_bfyx_f16", 7],
+        "16277913671917468663": ["convolution_gpu_bfyx_f16", 3],
+        "1474918596978458534": ["convolution_gpu_bfyx_f16", 7],
+        "2186150200961617234": ["convolution_gpu_bfyx_f16", 3],
+        "10577259940464718041": ["convolution_gpu_bfyx_f16", 6],
+        "10352584043544857764": ["convolution_gpu_bfyx_f16", 4],
+        "9144746358156959840": ["convolution_gpu_bfyx_f16", 7],
+        "13301166545153738930": ["convolution_gpu_bfyx_f16", 4],
+        "10753675657145151848": ["convolution_gpu_bfyx_f16", 6],
+        "10604750453275830911": ["convolution_gpu_bfyx_f16", 3],
+        "9243411386937443096": ["convolution_gpu_bfyx_f16", 7],
+        "12042818423431873035": ["convolution_gpu_bfyx_f16", 4],
+        "6683976234770455967": ["convolution_gpu_bfyx_f16", 7],
+        "6298190398591064450": ["convolution_gpu_bfyx_f16", 3],
+        "17196237025206156806": ["convolution_gpu_bfyx_f16", 6],
+        "5853381784506376944": ["convolution_gpu_bfyx_f16", 4],
+        "7339440798895952661": ["convolution_gpu_bfyx_f16", 6],
+        "309066171876496786": ["convolution_gpu_bfyx_f16", 5],
+        "17843616251377971109": ["convolution_gpu_bfyx_f16", 7],
+        "12223137580096133095": ["convolution_gpu_bfyx_f16", 3],
+        "7577659638199402167": ["convolution_gpu_bfyx_f16", 6],
+        "565723015051709107": ["convolution_gpu_bfyx_f16", 4],
+        "14416887345595384816": ["convolution_gpu_bfyx_f16", 6],
+        "13314165049380641802": ["convolution_gpu_bfyx_f16", 3],
+        "7520511107200802065": ["convolution_gpu_bfyx_f16", 7],
+        "11534561269762454076": ["convolution_gpu_bfyx_f16", 3],
+        "10368570488453413379": ["convolution_gpu_bfyx_f16", 7],
+        "15747873854346463294": ["convolution_gpu_bfyx_f16", 3],
+        "7824157744505687913": ["convolution_gpu_bfyx_f16", 7],
+        "5462648317757708951": ["convolution_gpu_bfyx_f16", 4],
+        "3493741914954272091": ["convolution_gpu_bfyx_f16", 6],
+        "18286084829637877271": ["convolution_gpu_bfyx_f16", 3],
+        "260499864874634958": ["convolution_gpu_bfyx_f16", 7],
+        "10167218530612525698": ["convolution_gpu_bfyx_f16", 3],
+        "11647470184823377234": ["convolution_gpu_bfyx_f16", 6],
+        "6976222743405170101": ["convolution_gpu_bfyx_f16", 4],
+        "7655642513340250684": ["convolution_gpu_bfyx_f16", 6],
+        "2708987188750383204": ["convolution_gpu_bfyx_f16", 4],
+        "3147813143325864684": ["convolution_gpu_bfyx_f16", 7],
+        "13481932492220060429": ["convolution_gpu_bfyx_f16", 4],
+        "8069058927528586404": ["convolution_gpu_bfyx_f16", 7],
+        "9624255156096106627": ["convolution_gpu_bfyx_f16", 3],
+        "17730913632234504096": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11384790797228210583": ["convolution_gpu_bfyx_f16", 3],
+        "16177287431434086806": ["convolution_gpu_bfyx_f16", 2],
+        "2990533830830456778": ["convolution_gpu_bfyx_f16", 4],
+        "8610276394762287397": ["convolution_gpu_bfyx_f16", 1],
+        "14889103084722200470": ["convolution_gpu_bfyx_f16", 3],
+        "1845895244697890167": ["convolution_gpu_bfyx_f16", 1],
+        "9079010613051503735": ["convolution_gpu_bfyx_f16", 4],
+        "12061818277351885597": ["convolution_gpu_bfyx_f16", 2],
+        "9390843066348290833": ["convolution_gpu_bfyx_f16", 4],
+        "10509352827759959818": ["convolution_gpu_bfyx_f16", 1],
+        "7121505015354362475": ["convolution_gpu_bfyx_f16", 3],
+        "3145839553769702558": ["convolution_gpu_bfyx_f16", 2],
+        "9437978197962731993": ["convolution_gpu_bfyx_f16", 4],
+        "16274951933822979821": ["convolution_gpu_bfyx_f16", 2],
+        "14030311264395486109": ["convolution_gpu_bfyx_f16", 3],
+        "6745402198112522691": ["convolution_gpu_bfyx_f16", 2],
+        "17535374606849768070": ["convolution_gpu_bfyx_f16", 5],
+        "13107074908777587001": ["convolution_gpu_bfyx_f16", 2],
+        "12441704244463007888": ["convolution_gpu_bfyx_f16", 4],
+        "9830487478445609618": ["convolution_gpu_bfyx_f16", 1],
+        "2607686439369816702": ["convolution_gpu_bfyx_f16", 3],
+        "11952384679771234258": ["convolution_gpu_bfyx_f16", 2],
+        "3189741427811982954": ["convolution_gpu_bfyx_f16", 4],
+        "7501115822974560125": ["convolution_gpu_bfyx_f16", 2],
+        "5461533362170148981": ["convolution_gpu_bfyx_f16", 4],
+        "10622846706558433994": ["convolution_gpu_bfyx_f16", 2],
+        "14985143127047962687": ["convolution_gpu_bfyx_f16", 4],
+        "9631129065088682473": ["convolution_gpu_bfyx_f16", 2],
+        "9287906640814562678": ["convolution_gpu_bfyx_f16", 5],
+        "10312813290107807302": ["convolution_gpu_bfyx_f16", 1],
+        "12443171163993705676": ["convolution_gpu_bfyx_f16", 3],
+        "3168498630594159758": ["convolution_gpu_bfyx_f16", 1],
+        "1224004372693674977": ["convolution_gpu_bfyx_f16", 6],
+        "11479153223948565455": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15137118881649312407": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7380413826069265610": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16535858081334660130": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3621905235571219180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15946837476334836670": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "245178301664812042": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11536204967390696799": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13202661087717766278": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17082033214052891239": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10972993149458384549": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13266975232886004160": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5239323177752135143": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13950458285304028472": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1153656272296563651": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15832393447136864275": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449769853632530": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16481491209623188639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16355932574879498582": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "9885117015102902622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17948745397003387421": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6169584310346033045": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11946156629252758613": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8766639290602892682": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4124732995953832580": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14120940518810838558": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15477415938111847293": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7899374704077099747": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1738224818674864374": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4675498016268563894": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11678653628752466495": ["convolution_gpu_bfyx_gemm_like", 2],
+        "823094503720427089": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6268238156027633260": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12067387912557140291": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14700484317091478179": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5093753362153705304": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7185731190256343440": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7147929965532955967": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "11272978444176415320": ["convolution_gpu_bfyx_os_iyx_osv16", 707],
+        "3664831747298375482": ["convolution_gpu_bfyx_os_iyx_osv16", 1122],
+        "5055315246446375474": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "11248871352103466387": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "14138271699174946769": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "11248138620600796041": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "8218608499996018829": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "492405382055839338": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "13627463949725014842": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "10442692749607465731": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "5257716983547940732": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "4531738938698034182": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "4103900860372048770": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "1763848406836981250": ["convolution_gpu_bfyx_os_iyx_osv16", 748],
+        "13050289716763141821": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "5246872552943832761": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "8103482664263052993": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "4890599355418453618": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "13440603011986281192": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "7470027005329223304": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "10193635775409684341": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "9727214793392528330": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "10481905734789810461": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "17748868035178556381": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "1557394183568627973": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "1431347831018127681": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "11729412526159852880": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "4899105740108544338": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "8050406060207298909": ["convolution_gpu_bfyx_os_iyx_osv16", 1127],
+        "7380902367877842940": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "12400142005537988277": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "7084726217254409262": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "13881126705282937733": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "3268450385258447029": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "3315969006703902437": ["convolution_gpu_bfyx_os_iyx_osv16", 373],
+        "7995430380267318045": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "13355664807789465988": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "1814690350132893834": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "10977798741323641518": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "16290685659520662243": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "14814993085047057124": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "16036211705705298060": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3314627126439576532": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "5397150622881607923": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "4417629288282219686": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "2593493324630665553": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "2115136697391853510": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "3903972756038760641": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "18309964708787622418": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "10898709444676724488": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "3114718546872961667": ["convolution_gpu_bfyx_os_iyx_osv16", 212],
+        "4116817191288103322": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "5759507923877307269": ["convolution_gpu_bfyx_os_iyx_osv16", 591],
+        "13521523772245595449": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "7025699501997365179": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "17325198932789845471": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "1929216390450946038": ["convolution_gpu_bfyx_os_iyx_osv16", 587],
+        "9359713794448163515": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "1064765432017421754": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "17903113127620271097": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "955947984048164651": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "6871124717336911723": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "17054742656500024833": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "8735118147118298928": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "7689370938722443575": ["convolution_gpu_bfyx_os_iyx_osv16", 589],
+        "7389433284327478008": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "6352588504037946062": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "3420065266906936372": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "5158493429539582334": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "8584667522373731666": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "16628885743804758299": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "9979885527081183609": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "11585377068025763798": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "270198976247871883": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "14806119107242947719": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "6237698548794601324": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "16586342221264661586": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "8378911742901238960": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "8878591357527094058": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16", 403],
+        "16142734280696556211": ["convolution_gpu_bfyx_f16", 8],
+        "635140168178230171": ["convolution_gpu_bfyx_f16", 8],
+        "17935287735372634102": ["convolution_gpu_bfyx_f16", 8],
+        "15817877524852645836": ["convolution_gpu_bfyx_f16", 8],
+        "10065955805093424080": ["convolution_gpu_bfyx_f16", 8],
+        "11821370621780817632": ["convolution_gpu_bfyx_f16", 8],
+        "677921946529877110": ["convolution_gpu_bfyx_f16", 8],
+        "5361664571196670427": ["convolution_gpu_bfyx_f16", 8],
+        "2901538337520242272": ["convolution_gpu_bfyx_f16", 8],
+        "5581843211058265455": ["convolution_gpu_bfyx_f16", 8],
+        "217667049553318429": ["convolution_gpu_bfyx_f16", 7],
+        "5337496722551766654": ["convolution_gpu_bfyx_f16", 8],
+        "52740663361396709": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6991371618000668418": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2326385631302475177": ["convolution_gpu_bfyx_f16", 6],
+        "8721996744048476299": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "453498137980697662": ["convolution_gpu_bfyx_f16", 7],
+        "15807266772870766609": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6553421087532441250": ["convolution_gpu_bfyx_f16", 7],
+        "12573289076827071790": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8853947103468767323": ["convolution_gpu_bfyx_f16", 6],
+        "6453143304950619430": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1775677589702924323": ["convolution_gpu_bfyx_f16", 7],
+        "16761512340234377511": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2448165393673590598": ["convolution_gpu_bfyx_f16", 7],
+        "11041313275514857930": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8908290078256179450": ["convolution_gpu_bfyx_f16", 6],
+        "6872057470208040983": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "3462663905986148169": ["convolution_gpu_bfyx_f16", 6],
+        "9998472323723395768": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9695005447848657794": ["convolution_gpu_bfyx_f16", 7],
+        "864050420562880191": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "16884753149447117871": ["convolution_gpu_bfyx_f16", 7],
+        "9413300293443003372": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9584473138046573481": ["convolution_gpu_bfyx_f16", 7],
+        "17226124546002868085": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5703305874425530284": ["convolution_gpu_bfyx_f16", 7],
+        "16357533604618943588": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8568882981604412701": ["convolution_gpu_bfyx_f16", 6],
+        "6735600860810305128": ["convolution_gpu_bfyx_f16", 4],
+        "9976345793999587972": ["convolution_gpu_bfyx_f16", 6],
+        "15346869959264738522": ["convolution_gpu_bfyx_f16", 4],
+        "18151038936580799249": ["convolution_gpu_bfyx_f16", 7],
+        "11956105843463290323": ["convolution_gpu_bfyx_f16", 3],
+        "2197043795215802833": ["convolution_gpu_bfyx_f16", 7],
+        "7837223160972083111": ["convolution_gpu_bfyx_f16", 4],
+        "17991319065386721750": ["convolution_gpu_bfyx_f16", 7],
+        "8684426249485914306": ["convolution_gpu_bfyx_f16", 4],
+        "15440765487742350713": ["convolution_gpu_bfyx_f16", 7],
+        "4006988924644151380": ["convolution_gpu_bfyx_f16", 3],
+        "1165323482766442288": ["convolution_gpu_bfyx_f16", 7],
+        "6216179328027568162": ["convolution_gpu_bfyx_f16", 3],
+        "5085232160533811804": ["convolution_gpu_bfyx_f16", 6],
+        "5560503476513957999": ["convolution_gpu_bfyx_f16", 4],
+        "11899886655444339788": ["convolution_gpu_bfyx_f16", 7],
+        "8035035668897300219": ["convolution_gpu_bfyx_f16", 4],
+        "15531280953380757927": ["convolution_gpu_bfyx_f16", 6],
+        "5417611188973238514": ["convolution_gpu_bfyx_f16", 4],
+        "13845305820052266938": ["convolution_gpu_bfyx_f16", 6],
+        "156328565120552800": ["convolution_gpu_bfyx_f16", 4],
+        "15783591814248428053": ["convolution_gpu_bfyx_f16", 6],
+        "5753913342838023682": ["convolution_gpu_bfyx_f16", 4],
+        "3207990305547692029": ["convolution_gpu_bfyx_f16", 6],
+        "18084824492918706199": ["convolution_gpu_bfyx_f16", 4],
+        "8033743776899693075": ["convolution_gpu_bfyx_f16", 7],
+        "243712386211233379": ["convolution_gpu_bfyx_f16", 3],
+        "2965177266959923348": ["convolution_gpu_bfyx_f16", 7],
+        "13237451337340946362": ["convolution_gpu_bfyx_f16", 3],
+        "9188120772772842413": ["convolution_gpu_bfyx_f16", 7],
+        "1249134296559537004": ["convolution_gpu_bfyx_f16", 4],
+        "6776437678382831419": ["convolution_gpu_bfyx_f16", 6],
+        "9140223146321937006": ["convolution_gpu_bfyx_f16", 3],
+        "7509732267784929557": ["convolution_gpu_bfyx_f16", 7],
+        "9869335174149535367": ["convolution_gpu_bfyx_f16", 3],
+        "15410089184813419927": ["convolution_gpu_bfyx_f16", 6],
+        "12736591082694609735": ["convolution_gpu_bfyx_f16", 3],
+        "10111465201148839782": ["convolution_gpu_bfyx_f16", 6],
+        "6977012639021700914": ["convolution_gpu_bfyx_f16", 3],
+        "10452382209692659038": ["convolution_gpu_bfyx_f16", 6],
+        "13099335757796409253": ["convolution_gpu_bfyx_f16", 3],
+        "8355446198162136384": ["convolution_gpu_bfyx_f16", 6],
+        "6457714394569252436": ["convolution_gpu_bfyx_f16", 3],
+        "1870949498151438396": ["convolution_gpu_bfyx_f16", 7],
+        "6325249952936664765": ["convolution_gpu_bfyx_f16", 4],
+        "4283372428897156128": ["convolution_gpu_bfyx_f16", 7],
+        "15284708683366527091": ["convolution_gpu_bfyx_f16", 4],
+        "12367140420770161260": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17302868757320805407": ["convolution_gpu_bfyx_f16", 3],
+        "12812798569408798714": ["convolution_gpu_bfyx_f16", 5],
+        "18027642894783121874": ["convolution_gpu_bfyx_f16", 3],
+        "3766561909462900481": ["convolution_gpu_bfyx_f16", 4],
+        "8126433884587687354": ["convolution_gpu_bfyx_f16", 4],
+        "7431739774665400867": ["convolution_gpu_bfyx_f16", 5],
+        "15213968303698655071": ["convolution_gpu_bfyx_f16", 4],
+        "1895954773577076065": ["convolution_gpu_bfyx_f16", 4],
+        "10820634669412096693": ["convolution_gpu_bfyx_f16", 4],
+        "9105871040526273510": ["convolution_gpu_bfyx_f16", 4],
+        "6253056982440997971": ["convolution_gpu_bfyx_f16", 3],
+        "14271936409538632354": ["convolution_gpu_bfyx_f16", 5],
+        "7830723669305086809": ["convolution_gpu_bfyx_f16", 3],
+        "16905205856195133489": ["convolution_gpu_bfyx_f16", 4],
+        "17744780595721014433": ["convolution_gpu_bfyx_f16", 3],
+        "1185658428449577287": ["convolution_gpu_bfyx_f16", 5],
+        "4322844512730914538": ["convolution_gpu_bfyx_f16", 4],
+        "8559998096869077061": ["convolution_gpu_bfyx_f16", 5],
+        "12935328860605637188": ["convolution_gpu_bfyx_f16", 3],
+        "17826095303533956022": ["convolution_gpu_bfyx_f16", 3],
+        "6059064882469521870": ["convolution_gpu_bfyx_f16", 3],
+        "17987726224817029150": ["convolution_gpu_bfyx_f16", 4],
+        "1752617074755449766": ["convolution_gpu_bfyx_f16", 4],
+        "1147527648969475665": ["convolution_gpu_bfyx_f16", 5],
+        "336079374726362009": ["convolution_gpu_bfyx_f16", 3],
+        "3956037701575034246": ["convolution_gpu_bfyx_f16", 4],
+        "9177200416044551211": ["convolution_gpu_bfyx_f16", 3],
+        "3580337905402094261": ["convolution_gpu_bfyx_f16", 5],
+        "8657404564308325878": ["convolution_gpu_bfyx_f16", 4],
+        "9660551017019324634": ["convolution_gpu_bfyx_f16", 3],
+        "2283387892607580344": ["convolution_gpu_bfyx_f16", 4],
+        "9757276965383246450": ["convolution_gpu_bfyx_f16", 5],
+        "5662627047941545281": ["convolution_gpu_bfyx_f16", 6],
+        "4652102901251847499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8552605555461651066": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4821707856043228388": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like", 2],
+        "867673900353092030": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9400507072890048966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13391871893495885313": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10371076921125171059": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10315090439844489700": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "671453551040072499": ["convolution_gpu_bfyx_os_iyx_osv16", 119],
+        "7957019749780783255": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "14034525799882831106": ["convolution_gpu_bfyx_os_iyx_osv16", 869],
+        "3916912615549949771": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5115007207028125638": ["convolution_gpu_bfyx_os_iyx_osv16", 870],
+        "3702373232430988630": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7913076120244203725": ["convolution_gpu_bfyx_os_iyx_osv16", 866],
+        "17778091287904736965": ["convolution_gpu_bfyx_os_iyx_osv16", 868],
+        "16866405531619284081": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10645625090439446714": ["convolution_gpu_bfyx_os_iyx_osv16", 114],
+        "3118240332710616352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7450417963648518926": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "18271341717679165017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1520529227443340435": ["convolution_gpu_bfyx_os_iyx_osv16", 866],
+        "6547588888976666790": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2920840796593281126": ["convolution_gpu_bfyx_os_iyx_osv16", 867],
+        "3243287355593359731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15289152041466330689": ["convolution_gpu_bfyx_os_iyx_osv16", 117],
+        "11745487821055710420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10330180429524641331": ["convolution_gpu_bfyx_os_iyx_osv16", 114],
+        "2413743706626149595": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17228810554159747400": ["convolution_gpu_bfyx_os_iyx_osv16", 118],
+        "2891977832675907820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5140042030231193807": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "12362834244136780846": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "12975331316527510995": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "11296280342006832013": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "2729382724566640622": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "1973144337799131575": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "11237620198863831646": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "13522230668952002294": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "1878253869657286717": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "15537416934472628620": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "1614676161640914325": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "7887122837178625925": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "13932612600851474669": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "108336648992892440": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "10684345634354913297": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "10916615896929712681": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "6401617291202138329": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "14418145752469985573": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "10000629948062903268": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "17913158947435785150": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3683538222536942924": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "3497309410275654168": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "5849203144808104114": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "6079947803671938062": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "10023464714622430341": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "10125169683435871224": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "16183189414217717282": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "4778769961736466493": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "14240807033488944743": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "9611215430798915107": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "13082046205786468713": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "12207197008210652563": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "10581403540319621428": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "8583043839495629208": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "14447820502121172060": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "16001364310945493562": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "8482359546526573989": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "1012101590389722479": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "14078917033502693044": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "3484370445244910200": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16", 9],
+        "2438463778071005693": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10241616750018729197": ["convolution_gpu_bfyx_os_iyx_osv16", 257],
+        "16093736249698386830": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "15577855965797137317": ["fully_connected_gpu_fb_oi_ref", 1],
+        "2793239401424346732": ["fully_connected_gpu_fb_oi_ref", 1],
+        "1090168454685651958": ["fully_connected_gpu_fb_oi_ref", 2],
+        "8694043970360551765": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8773350383870039461": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "18102285308171488538": ["convolution_gpu_bfyx_os_iyx_osv16", 471],
+        "4504463103561729721": ["fully_connected_gpu_fb_io_ref", 1],
+        "13026398103046869012": ["fully_connected_gpu_fb_oi_ref", 2],
+        "4938053383542014494": ["fully_connected_gpu_fb_oi_ref", 2],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16", 1100],
+        "10292349730148518173": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "17322754821646330275": ["convolution_gpu_bfyx_os_iyx_osv16", 564],
+        "9463001223908267526": ["convolution_gpu_bfyx_os_iyx_osv16", 673],
+        "2819993544283340217": ["convolution_gpu_bfyx_os_iyx_osv16", 149],
+        "4891941794728322149": ["convolution_gpu_bfyx_os_iyx_osv16", 294],
+        "17966409116732724850": ["convolution_gpu_bfyx_os_iyx_osv16", 679],
+        "16009549743559486766": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14591935906857802585": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "111424963409848995": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2739383731123097925": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17284261626529871462": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "5668693380660004839": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "12579230945548766456": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5167557197439368430": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1540552565663233708": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "16096568902203474447": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5028262864972382565": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1974417291828577": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "2599172922002088957": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11660160310320618383": ["convolution_gpu_bfyx_gemm_like", 0],
+        "6321333724966975926": ["convolution_gpu_bfyx_os_iyx_osv16", 342],
+        "13769852278335802471": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "79817180213970569": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7224734161984848733": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2597920881875761524": ["convolution_gpu_bfyx_os_iyx_osv16", 264],
+        "17470658487460623535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3304768856579090475": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13876951907579147655": ["convolution_gpu_bfyx_os_iyx_osv16", 261],
+        "10366703264083184092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7178492718471026756": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2607889212984737257": ["convolution_gpu_bfyx_os_iyx_osv16", 298],
+        "6528945595038330865": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9831986499172731633": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18440050172847926353": ["convolution_gpu_bfyx_os_iyx_osv16", 257],
+        "6512088599266777589": ["convolution_gpu_bfyx_os_iyx_osv16", 640],
+        "10631671892805059138": ["convolution_gpu_bfyx_os_iyx_osv16", 154],
+        "13517627553690454113": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "17864904691465978047": ["convolution_gpu_bfyx_os_iyx_osv16", 158],
+        "7688613129211669281": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "15276587352894128846": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "16583642152876546031": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "1540351396976309640": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "12018060391889249406": ["convolution_gpu_bfyx_os_iyx_osv16", 529],
+        "1703594828023385832": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "16671415101494484639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4660166087476681397": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "824242000358871449": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "9894766303335506733": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17266480567140619519": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "13198159541095771298": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "3117760785038488579": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "1476464784116064433": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4485934013026623941": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "16673650204498772920": ["convolution_gpu_bfyx_os_iyx_osv16", 219],
+        "8328912827514946731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15425046562310745575": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9559768114277499815": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "8904325051665606784": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2193347488577584488": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6699483770041820657": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "789366296550494453": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14473138580870542149": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "1428800094127546021": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "14142504888572786665": ["convolution_gpu_bfyx_os_iyx_osv16", 271],
+        "1984025014517619256": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7957167898986800985": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "412995552853553524": ["convolution_gpu_bfyx_f16", 7],
+        "7058232330882130703": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15549425900373079382": ["convolution_gpu_bfyx_f16", 8],
+        "2713038204741622907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1878980012173918209": ["convolution_gpu_bfyx_f16", 8],
+        "12468208151780727122": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6674575974748163031": ["convolution_gpu_bfyx_f16", 8],
+        "5591111867402032949": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3413916493145831316": ["convolution_gpu_bfyx_f16", 8],
+        "12421615174911349736": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689084255978323672": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12474210147973914830": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14174889288973953645": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18224887830367116006": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16049847963625476676": ["convolution_gpu_bfyx_os_iyx_osv16", 499],
+        "3817623781909159313": ["convolution_gpu_bfyx_f16", 7],
+        "3004968067582685285": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6876765637331622545": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6802301901709446085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13245964863324091195": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "953254263392356310": ["convolution_gpu_bfyx_f16", 2],
+        "5388858533648189105": ["convolution_gpu_bfyx_f16", 4],
+        "3226238265868290723": ["convolution_gpu_bfyx_f16", 7],
+        "10098858620420134682": ["convolution_gpu_bfyx_f16", 1],
+        "18308172581381789101": ["convolution_gpu_bfyx_f16", 2],
+        "12846183737006963638": ["convolution_gpu_bfyx_f16", 3],
+        "8746233054079242877": ["convolution_gpu_bfyx_f16", 2],
+        "7516276889336424671": ["convolution_gpu_bfyx_f16", 4],
+        "8240661672477348007": ["convolution_gpu_bfyx_f16", 2],
+        "7421142512620741721": ["convolution_gpu_bfyx_f16", 2],
+        "17095633565672192085": ["convolution_gpu_bfyx_f16", 5],
+        "7381046541836362634": ["convolution_gpu_bfyx_f16", 1],
+        "7006663637645720459": ["convolution_gpu_bfyx_f16", 3],
+        "554667746487334145": ["convolution_gpu_bfyx_f16", 7],
+        "1899794088311416867": ["convolution_gpu_bfyx_f16", 6],
+        "4461871297663195464": ["convolution_gpu_bfyx_f16", 1],
+        "845238018552466931": ["convolution_gpu_bfyx_f16", 1],
+        "1588946175550138318": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15493305609986974083": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18266429764179335648": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4773783671939023015": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4841057875316789358": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10434845132440395347": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4971104866692187809": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3918510119122483722": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10511458406494047485": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4104477639131772427": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14619253124444303162": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "2303241947828987936": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "15440788136860909526": ["convolution_gpu_bfyx_os_iyx_osv16", 1028],
+        "5886674354741908134": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "8121822626577551399": ["convolution_gpu_bfyx_os_iyx_osv16", 653],
+        "6561450336890348030": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16", 686],
+        "6864098212683093769": ["convolution_gpu_bfyx_os_iyx_osv16", 1067],
+        "12286768317527546407": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15803888689432429483": ["convolution_gpu_bfyx_os_iyx_osv16", 653],
+        "2969163284049372725": ["convolution_gpu_bfyx_os_iyx_osv16", 288],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16", 294],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16", 1026],
+        "13943934495343791315": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "2864116308996401112": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16", 133],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "12840351521230542751": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "10101063893937511289": ["convolution_gpu_bfyx_os_iyx_osv16", 479],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16", 479],
+        "16052212361531923323": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "14021819955559248258": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16", 798],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "18183296320499063227": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16", 426],
+        "13889057206654080908": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2813710942447372241": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "13633232435632839044": ["convolution_gpu_bfyx_f16", 7],
+        "2883172178329270363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9432546329737888706": ["convolution_gpu_bfyx_f16", 8],
+        "12985746913235154779": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17940668702908419725": ["convolution_gpu_bfyx_f16", 8],
+        "2064000219100642226": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5833649709217830223": ["convolution_gpu_bfyx_f16", 8],
+        "10849235794440642481": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6321445979984216128": ["convolution_gpu_bfyx_f16", 7],
+        "14697315322325185660": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "36079357617783912": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4063865474431180498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13167503358764278233": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17498603449428007802": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6304136029727027056": ["convolution_gpu_bfyx_os_iyx_osv16", 166],
+        "1754448782405089213": ["convolution_gpu_bfyx_f16", 7],
+        "15489166244290113065": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5756918986564223629": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8035545676843269497": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17042017278300937839": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11260048813076144906": ["convolution_gpu_bfyx_f16", 1],
+        "6873924247641352061": ["convolution_gpu_bfyx_f16", 3],
+        "6474957215284027135": ["convolution_gpu_bfyx_f16", 6],
+        "16573724507496129614": ["convolution_gpu_bfyx_f16", 7],
+        "11210971373278055121": ["convolution_gpu_bfyx_f16", 4],
+        "185717560970701618": ["convolution_gpu_bfyx_f16", 5],
+        "11817410866221484993": ["convolution_gpu_bfyx_f16", 3],
+        "9765519004693711463": ["convolution_gpu_bfyx_f16", 6],
+        "14300671725579588671": ["convolution_gpu_bfyx_f16", 5],
+        "1297549572559338433": ["convolution_gpu_bfyx_f16", 4],
+        "4346210823986581329": ["convolution_gpu_bfyx_f16", 6],
+        "2750608965765787878": ["convolution_gpu_bfyx_f16", 5],
+        "14245442283142381063": ["convolution_gpu_bfyx_f16", 7],
+        "2942593456597250269": ["convolution_gpu_bfyx_f16", 5],
+        "14807774261203767931": ["convolution_gpu_bfyx_f16", 8],
+        "2024891861044519704": ["convolution_gpu_bfyx_f16", 8],
+        "12988352411577718659": ["convolution_gpu_bfyx_f16", 1],
+        "7546167886043158750": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12777758044198094011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17999895886988202252": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7284204319739516687": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11574916930945966662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12181953262469206135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11001131415959768285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11516255774873880270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17905472119711952421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3708423242842748011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16735610121492345646": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10393786933242452104": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8593006729492614006": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "8080047256092430454": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5827132729840694911": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2862262622518056270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7592655788466931007": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "10751447918697845967": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "14327549932088763609": ["convolution_gpu_bfyx_os_iyx_osv16", 249],
+        "9139350052341521235": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2268155498775258271": ["convolution_gpu_bfyx_os_iyx_osv16", 1002],
+        "9252995576301318377": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "16131094933895726474": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "2390813972238809739": ["convolution_gpu_bfyx_os_iyx_osv16", 630],
+        "6575286116803785519": ["convolution_gpu_bfyx_os_iyx_osv16", 911],
+        "9509860212160444680": ["convolution_gpu_bfyx_os_iyx_osv16", 142],
+        "2025729513014515133": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "7012386443457106080": ["convolution_gpu_bfyx_os_iyx_osv16", 519],
+        "10807317048120773939": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "13800264518247731721": ["convolution_gpu_bfyx_os_iyx_osv16", 162],
+        "10381956671421182115": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "4874673523117573787": ["convolution_gpu_bfyx_os_iyx_osv16", 918],
+        "18140414399325733479": ["convolution_gpu_bfyx_os_iyx_osv16", 426],
+        "5854165399605633326": ["convolution_gpu_bfyx_os_iyx_osv16", 423],
+        "17238058461587589303": ["convolution_gpu_bfyx_os_iyx_osv16", 797],
+        "4101383449947395379": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "2697043651083211983": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "1196153439884178828": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "1408371298472575421": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "9614122272772797675": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7199567766573336359": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13573164884579883011": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15482728985931330311": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4607650298345740971": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16086873164128770879": ["convolution_gpu_bfyx_gemm_like", 2],
+        "105926781977700977": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11591232422517503119": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11582016741808877197": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16914574072145986060": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6022176855777948587": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8941858845051007302": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9357675997524716463": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3521176117120705338": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12045093589986262223": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "470065517654323782": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16734161909350784601": ["convolution_gpu_bfyx_os_iyx_osv16", 973],
+        "11121230809258677064": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6349024748484491361": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9689224985169331447": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3750053020466161808": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "15788948623626667459": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13291988829313422545": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 2],
+        "17375427967226537519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "356011965155211999": ["convolution_gpu_bfyx_os_iyx_osv16", 285],
+        "10249443290070223207": ["convolution_gpu_bfyx_os_iyx_osv16", 548],
+        "11731131619682311119": ["convolution_gpu_bfyx_os_iyx_osv16", 283],
+        "499465197159774125": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "6713136765330410003": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "10482500982261483441": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12868046747643626115": ["convolution_gpu_bfyx_os_iyx_osv16", 286],
+        "3118940652855466279": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "9133477146144263621": ["convolution_gpu_bfyx_os_iyx_osv16", 1038],
+        "6014658843738581344": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2254000832500315403": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "2201913047888029571": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "6765174963106729735": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6860612036193780126": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4053722516029644812": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1037],
+        "3872902814632377403": ["convolution_gpu_bfyx_os_iyx_osv16", 175],
+        "11807558788154880902": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11052363375504603312": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "5704480811160976661": ["convolution_gpu_bfyx_os_iyx_osv16", 179],
+        "2631038501229053001": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11448877892018743111": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10504809699083269708": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 582],
+        "221686752427251764": ["convolution_gpu_bfyx_os_iyx_osv16", 541],
+        "8099629938775512387": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5641577920984461497": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 542],
+        "12153763576335891417": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "10715707282679913174": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15195978022706554558": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4702145645721143238": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10415281487218000500": ["convolution_gpu_bfyx_gemm_like", 2],
+        "680533894953795110": ["convolution_gpu_bfyx_os_iyx_osv16", 1025],
+        "1524996376386486665": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "2180727313291426024": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "13865408769089368168": ["convolution_gpu_bfyx_os_iyx_osv16", 530],
+        "17729561573161674389": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "14102092207521274159": ["convolution_gpu_bfyx_os_iyx_osv16", 874],
+        "14601505600623942303": ["convolution_gpu_bfyx_os_iyx_osv16", 499],
+        "93092162022748986": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15895053123520992434": ["convolution_gpu_bfyx_os_iyx_osv16", 923],
+        "14005851072926998714": ["convolution_gpu_bfyx_os_iyx_osv16", 544],
+        "13582287631171243512": ["convolution_gpu_bfyx_os_iyx_osv16", 597],
+        "10982128848228134282": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "7236965443679023925": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "1267627207431132628": ["convolution_gpu_bfyx_os_iyx_osv16", 137],
+        "2427481818567622188": ["convolution_gpu_bfyx_os_iyx_osv16", 532],
+        "9499169226931836849": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "14841135939793901331": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "13877129322236450083": ["convolution_gpu_bfyx_os_iyx_osv16", 875],
+        "6772340882401465511": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11743064882436041973": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10729082617196359413": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8688603561602716375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6216329929003742144": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5858568936289863149": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1258577325908211211": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10990147603320054495": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13137659893098575291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2695989423525253829": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3932955531996129807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14834765532454121330": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11205075769094656704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10056755067893619842": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8845972204063781512": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "5286686388506198758": ["convolution_gpu_bfyx_os_iyx_osv16", 670],
+        "17044347247573802405": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16931221552471635881": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3375470456077799802": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14332199338789934423": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16759785658634382018": ["convolution_gpu_bfyx_os_iyx_osv16", 385],
+        "9833509391965801955": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "853439126393091889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10335429769666812841": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "9424664012357101635": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5860372371921305416": ["convolution_gpu_bfyx_os_iyx_osv16", 970],
+        "3503193615625158929": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14744249132822614079": ["convolution_gpu_bfyx_os_iyx_osv16", 593],
+        "5243045977966841351": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "12657769780794263187": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "7005710331306745857": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17732714197816812919": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2691481290737970286": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16758724324099838132": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "13321275573521697498": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17371402188380900420": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "15857411657993741130": ["convolution_gpu_bfyx_os_iyx_osv16", 630],
+        "10824769165318760081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1670508622389791801": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6601005881101223654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3767953997999748671": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12193543332391207302": ["convolution_gpu_bfyx_os_iyx_osv16", 818],
+        "16644569811401857265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14644196187730386778": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 242],
+        "15997330269289678741": ["convolution_gpu_fs_byx_fsv32", 20],
+        "9456547817322301854": ["convolution_gpu_fs_byx_fsv32", 14],
+        "14503081204981089589": ["convolution_gpu_fs_byx_fsv32", 8],
+        "17459372555428323405": ["convolution_gpu_fs_byx_fsv32", 1],
+        "11045313080354230499": ["convolution_gpu_fs_byx_fsv32_1x1", 1],
+        "11891736161858320688": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "3837179970761308107": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "1599135987505067413": ["convolution_gpu_bfyx_os_iyx_osv16", 662],
+        "88960405449779079": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "3983071771155729815": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "4686928543634340294": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "9500201961536063781": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "5626617363814193337": ["convolution_gpu_bfyx_os_iyx_osv16", 41],
+        "9493629616033946504": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "9142997105687030758": ["convolution_gpu_bfyx_os_iyx_osv16", 755],
+        "3565303211593767799": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "1638619072790951553": ["convolution_gpu_bfyx_os_iyx_osv16", 285],
+        "16187579575395923193": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14768404566434004921": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "4439755580616372110": ["convolution_gpu_bfyx_os_iyx_osv16", 158],
+        "12082385141539179745": ["convolution_gpu_bfyx_os_iyx_osv16", 472],
+        "80211457682233943": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "2281851137797618536": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "4306482192283599644": ["convolution_gpu_bfyx_os_iyx_osv16", 3],
+        "7438079994024163367": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "11505611789014119307": ["convolution_gpu_bfyx_gemm_like", 2],
+        "437815073846842580": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1950316744853763835": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2101440743856834523": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11177728104020690382": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11070046570645256268": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12619772485618838435": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2751512607890114618": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15183698566691504656": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "12653721467536263212": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "13194232160397919757": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "204538163378003996": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "6149494643008538957": ["convolution_gpu_bfyx_os_iyx_osv16", 377],
+        "11290368603402236066": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1323592601201034234": ["convolution_gpu_bfyx_f16", 7],
+        "14798486770850675841": ["convolution_gpu_bfyx_f16", 7],
+        "11673314628747753691": ["convolution_gpu_bfyx_f16", 4],
+        "7021961511624638678": ["convolution_gpu_bfyx_f16", 4],
+        "5676198353742450430": ["convolution_gpu_bfyx_f16", 3],
+        "4929819810689803833": ["convolution_gpu_bfyx_f16", 6],
+        "240316590146675808": ["convolution_gpu_bfyx_f16", 5],
+        "17625565940895057722": ["convolution_gpu_bfyx_f16", 5],
+        "8688075088415087060": ["convolution_gpu_bfyx_f16", 3],
+        "3109943868702160503": ["convolution_gpu_bfyx_f16", 5],
+        "15650217867869430450": ["convolution_gpu_bfyx_f16", 2],
+        "17908144598228512507": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "12413306519886846795": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3485465952750021220": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16729621401445513163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5488147296483022703": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8710473738514939538": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9147606392761848284": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "5087291643342132199": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11774085137209016046": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "8929841836974581600": ["convolution_gpu_bfyx_os_iyx_osv16", 486],
+        "7073670312468097760": ["convolution_gpu_bfyx_os_iyx_osv16", 843],
+        "14911211495772743601": ["convolution_gpu_bfyx_os_iyx_osv16", 802],
+        "3856389350154673872": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "14587774878993352201": ["convolution_gpu_bfyx_os_iyx_osv16", 378],
+        "17294259290189185308": ["convolution_gpu_bfyx_os_iyx_osv16", 794],
+        "3125709089627137774": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10167929165359894539": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "9467011207416095285": ["convolution_gpu_bfyx_f16", 8],
+        "18110939720141818253": ["convolution_gpu_bfyx_f16", 8],
+        "10809361044654815291": ["convolution_gpu_bfyx_f16", 8],
+        "8729970019383795569": ["convolution_gpu_bfyx_f16", 8],
+        "17827231016257521940": ["convolution_gpu_bfyx_f16", 8],
+        "5828304029836199809": ["convolution_gpu_bfyx_f16", 8],
+        "8608098673678326421": ["convolution_gpu_bfyx_f16", 8],
+        "4799951889015766408": ["convolution_gpu_bfyx_f16", 7],
+        "11256132692380923779": ["convolution_gpu_bfyx_f16", 8],
+        "5227747889064386895": ["convolution_gpu_bfyx_f16", 8],
+        "12435125610964522916": ["convolution_gpu_bfyx_f16", 6],
+        "16247711996185304739": ["convolution_gpu_bfyx_f16", 8],
+        "18419015006676659067": ["convolution_gpu_bfyx_f16", 8],
+        "14816607493224983644": ["convolution_gpu_bfyx_f16", 6],
+        "18080751562346851399": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7358709983309335021": ["convolution_gpu_bfyx_f16", 8],
+        "5951975369177447577": ["convolution_gpu_bfyx_f16", 8],
+        "4605643396574334390": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "568067246719517739": ["convolution_gpu_bfyx_f16", 8],
+        "5442649671500173287": ["convolution_gpu_bfyx_f16", 8],
+        "8903826140874560178": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9628142105100134135": ["convolution_gpu_bfyx_f16", 8],
+        "1232826373801366623": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13694514357024954788": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11391493057056286699": ["convolution_gpu_bfyx_f16", 8],
+        "2928262304708248227": ["convolution_gpu_bfyx_f16", 8],
+        "17499908409183932218": ["convolution_gpu_bfyx_f16", 4],
+        "14859658448955499179": ["convolution_gpu_bfyx_f16", 4],
+        "15311505436254873919": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5960462069119450195": ["convolution_gpu_bfyx_f16", 8],
+        "9149564669418564479": ["convolution_gpu_bfyx_f16", 8],
+        "18163755369052842813": ["convolution_gpu_bfyx_f16", 8],
+        "8275135719522690527": ["convolution_gpu_bfyx_f16", 8],
+        "2298529747054199771": ["convolution_gpu_bfyx_f16", 8],
+        "15976944134096292352": ["convolution_gpu_bfyx_f16", 8],
+        "547454211762819596": ["convolution_gpu_bfyx_f16", 8],
+        "17906950174330335301": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13031644522906269724": ["convolution_gpu_bfyx_f16", 8],
+        "18288385173121209976": ["convolution_gpu_bfyx_f16", 8],
+        "17061924482046716630": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11361932739710301784": ["convolution_gpu_bfyx_f16", 8],
+        "16458889125940546368": ["convolution_gpu_bfyx_f16", 8],
+        "12450622764005143821": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2903571673767001640": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13347455981832054533": ["convolution_gpu_bfyx_f16", 8],
+        "12538241775315086953": ["convolution_gpu_bfyx_f16", 8],
+        "8126691205953344871": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16780977023488631804": ["convolution_gpu_bfyx_f16", 8],
+        "17136478867951302904": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13212207942275182881": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14495079358949370120": ["convolution_gpu_bfyx_f16", 8],
+        "4382417733451183736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "288372190616771633": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11617333447192485775": ["convolution_gpu_bfyx_f16", 8],
+        "6974470789123895023": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18231804735191273525": ["convolution_gpu_bfyx_f16", 6],
+        "4874987389971025643": ["convolution_gpu_bfyx_f16", 3],
+        "3058493226413875038": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "10596520518408393633": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "425449143970914526": ["convolution_gpu_bfyx_f16", 6],
+        "10926709262435197853": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15825916789772519361": ["convolution_gpu_bfyx_f16", 8],
+        "11344323188244299942": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10578113800829774986": ["convolution_gpu_bfyx_f16", 6],
+        "2375997562748751746": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2516065028198850400": ["convolution_gpu_bfyx_f16", 8],
+        "6409750149360913230": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10271647104505566706": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8508694183870500179": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5818470431977263086": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5193770703407776933": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17521329019150924840": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14014285669562955732": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6671379525619868817": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "4953425498352128032": ["convolution_gpu_bfyx_f16", 8],
+        "17554861470547691900": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10785511833112040704": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17842238305349560675": ["convolution_gpu_bfyx_f16", 8],
+        "9045432887458921887": ["convolution_gpu_bfyx_f16", 8],
+        "2317068120382359117": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13198591357050450583": ["convolution_gpu_bfyx_f16", 6],
+        "16630820631092662612": ["convolution_gpu_bfyx_f16", 8],
+        "3304799458897138796": ["convolution_gpu_bfyx_f16", 8],
+        "12719370525372818641": ["convolution_gpu_bfyx_f16", 8],
+        "13033843297334845893": ["convolution_gpu_bfyx_f16", 8],
+        "3008335290324080979": ["convolution_gpu_bfyx_f16", 8],
+        "12990586786235599879": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7414503846719526880": ["convolution_gpu_bfyx_f16", 7],
+        "5220110435197346795": ["convolution_gpu_bfyx_f16", 8],
+        "2541237388942804073": ["convolution_gpu_bfyx_os_iyx_osv16", 195],
+        "16216088294429058352": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6548372255564207516": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "7504803815827679458": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7890404193762842042": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1983441240707747971": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "4803645476690765923": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "5462029271954491399": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11800826711545841506": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "15561297213164683600": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16248626680766522088": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "3284821679004727180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16599055682441641193": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "153264675527724419": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "17075015317341671432": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 333],
+        "632977374237919309": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "14450344739436846034": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9209250404182507365": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8415669066784458057": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "185843586737457998": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1085],
+        "766969822241155083": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "16155506862324955163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11978741458254664288": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11586230734901512993": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "10087695833233777190": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1083],
+        "12128657061255822293": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "17939537883510630662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18423168447470112214": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5547921846568118067": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "5296811001677778360": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 333],
+        "10607242950204268733": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9582149845440579162": ["convolution_gpu_bfyx_gemm_like", 0],
+        "7551236304447290573": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "11927599447031406206": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "8913537827155053321": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "2694403950208867995": ["convolution_gpu_bfyx_f16", 8],
+        "8480814485407284385": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5659725884804864884": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17247202229862821831": ["convolution_gpu_bfyx_f16", 8],
+        "4591189240532371990": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11298211715109572712": ["convolution_gpu_bfyx_f16", 8],
+        "11060719795922800502": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4024014996080705374": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6324164991095466646": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2028232780476695855": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5660036914950333671": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13834181765190936016": ["convolution_gpu_bfyx_f16", 8],
+        "4422700514049031042": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9588011604564305874": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7215636441499808004": ["convolution_gpu_bfyx_f16", 8],
+        "17474693423719248969": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7554680558347192287": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3170336071769787200": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2846411667168152725": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11181943097674741934": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11639140799979401650": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16307198282569581614": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17680917278609527441": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13721389244734206054": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4859340216439264067": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14901974015744200645": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15824748596078822917": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13975002992350723907": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1856369619298816871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15980250964613837766": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "11188535023427121544": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6357407299587334303": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "1637111553324908623": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "17491572290683222099": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "276041414724056329": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "14980338802512175932": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1556522723879235440": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "16417146101754878208": ["convolution_gpu_bfyx_os_iyx_osv16", 88],
+        "15277806782771093230": ["convolution_gpu_bfyx_f16", 8],
+        "9169923259147715426": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9970341118085992354": ["convolution_gpu_bfyx_f16", 8],
+        "12104455606397742751": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "402419791784302832": ["convolution_gpu_bfyx_f16", 8],
+        "13972467430568459775": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4129442598734369883": ["convolution_gpu_bfyx_f16", 8],
+        "77779308553690261": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7805353658967212897": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17523151407361362563": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2925909029337163642": ["convolution_gpu_bfyx_f16", 8],
+        "9734910403081363436": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17920847151501327807": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12704616994417458301": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18154520535225399083": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12277279619939618291": ["convolution_gpu_bfyx_f16", 8],
+        "9784651392759875534": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2028955242295927382": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "796719947114218328": ["convolution_gpu_bfyx_f16", 4],
+        "7199374877403948402": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "7332724365602008957": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13826441040132777454": ["convolution_gpu_bfyx_f16", 8],
+        "845707625293752103": ["convolution_gpu_bfyx_f16", 6],
+        "17141580666144953740": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4767393715835846841": ["convolution_gpu_bfyx_f16", 6],
+        "11299501537291400011": ["convolution_gpu_bfyx_f16", 8],
+        "11125333100440185901": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10739033895708157022": ["convolution_gpu_bfyx_f16", 8],
+        "13410695662993340019": ["convolution_gpu_bfyx_f16", 6],
+        "9449557005968240342": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "13636060167487404399": ["convolution_gpu_bfyx_f16", 8],
+        "17615292268108897451": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "15275200239982983006": ["convolution_gpu_bfyx_f16", 8],
+        "6933005158879333442": ["convolution_gpu_bfyx_f16", 7],
+        "17404976166399004281": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15546952240223018544": ["convolution_gpu_bfyx_f16", 8],
+        "9221818348726873589": ["convolution_gpu_bfyx_f16", 8],
+        "4394722210952321875": ["convolution_gpu_bfyx_os_iyx_osv16", 375],
+        "6306121426740325003": ["convolution_gpu_bfyx_f16", 6],
+        "8909598090821234392": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8323852925946594550": ["convolution_gpu_bfyx_f16", 8],
+        "15387142572620537292": ["convolution_gpu_bfyx_f16", 8],
+        "7224633322280261797": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "4152667642459353093": ["convolution_gpu_bfyx_f16", 7],
+        "6850677395681499432": ["convolution_gpu_bfyx_f16", 7],
+        "9650348888387532125": ["convolution_gpu_bfyx_f16", 8],
+        "6081667869975438905": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12327336773475034198": ["convolution_gpu_bfyx_f16", 7],
+        "1948599748319101030": ["convolution_gpu_bfyx_f16", 7],
+        "98134405895903266": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "14701329763611994976": ["convolution_gpu_bfyx_f16", 8],
+        "6716883556122508073": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16273453628106476710": ["convolution_gpu_bfyx_f16", 7],
+        "8720949360157265320": ["convolution_gpu_bfyx_f16", 8],
+        "7198010523779834541": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "2836223913370318262": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8745298918616517419": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14153070039524436781": ["convolution_gpu_bfyx_f16", 8],
+        "13942373222048813039": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2520658429967845372": ["convolution_gpu_bfyx_os_iyx_osv16", 995],
+        "3842241205837995725": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "13045259089446188773": ["convolution_gpu_bfyx_f16", 2],
+        "1946851578807856544": ["convolution_gpu_bfyx_f16", 5],
+        "16189255500363260553": ["convolution_gpu_bfyx_f16", 5],
+        "16375480181817879443": ["convolution_gpu_bfyx_f16", 4],
+        "1731805218150075474": ["convolution_gpu_bfyx_f16", 2],
+        "11489413392122514570": ["convolution_gpu_bfyx_f16", 3],
+        "10034218261019628269": ["convolution_gpu_bfyx_f16", 2],
+        "3242010181507046540": ["convolution_gpu_bfyx_f16", 1],
+        "17874779808897661223": ["convolution_gpu_bfyx_f16", 5],
+        "8285149767406178221": ["convolution_gpu_bfyx_f16", 1],
+        "14312011904722537311": ["convolution_gpu_bfyx_f16", 1],
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10247418143396239693": ["fused_conv_eltwise_gpu_ref", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "3025829117046314851": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9151746755060262640": ["convolution_gpu_winograd_2x3_s1_fused", 0],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "482233531247606412": ["convolution_gpu_winograd_2x3_s1_fused", 0],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17167052658616496904": ["convolution_gpu_bfyx_gemm_like", 0],
+        "758159154291645307": ["fully_connected_gpu_bfyx_ref", 0],
         "15320845027635796583": ["convolution_gpu_bfyx_gemm_like", 2],
         "10569376024770516176": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
         "2438374917504708831": ["convolution_gpu_bfyx_gemm_like", 2],
         "13762162740325518554": ["convolution_gpu_bfyx_os_iyx_osv16", 87]
     },
     "72": {
+        "3057483147285040704": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4460662214292495759": ["convolution_gpu_bfyx_f16", 8],
+        "17632851940131114495": ["convolution_gpu_bfyx_f16", 8],
+        "7945867532035693686": ["convolution_gpu_bfyx_f16", 8],
+        "10798155343477437060": ["convolution_gpu_bfyx_f16", 8],
+        "14191150640021059705": ["convolution_gpu_bfyx_f16", 8],
+        "14593228968660512118": ["convolution_gpu_bfyx_f16", 8],
+        "5573515532668433114": ["convolution_gpu_bfyx_f16", 8],
+        "11642934660277782628": ["convolution_gpu_bfyx_f16", 8],
+        "4825553592910970555": ["convolution_gpu_bfyx_f16", 8],
+        "17245530055973419690": ["convolution_gpu_bfyx_f16", 8],
+        "14644519840111409049": ["convolution_gpu_bfyx_f16", 8],
+        "15093112872571669071": ["convolution_gpu_bfyx_f16", 8],
+        "6172925429706792586": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16888914688498810916": ["convolution_gpu_bfyx_f16", 8],
+        "7094210524110336636": ["convolution_gpu_bfyx_f16", 8],
+        "1102719880087191972": ["convolution_gpu_bfyx_f16", 8],
+        "17635368969132641763": ["convolution_gpu_bfyx_f16", 8],
+        "6444855710931944326": ["convolution_gpu_bfyx_f16", 8],
+        "3685203889040861337": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8983142397488339162": ["convolution_gpu_bfyx_f16", 8],
+        "2942771097961823034": ["convolution_gpu_bfyx_f16", 8],
+        "16912834065670733738": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2419223013209835757": ["convolution_gpu_bfyx_os_iyx_osv16", 1040],
+        "11179881900554989521": ["convolution_gpu_bfyx_f16", 8],
+        "16511126264743737451": ["convolution_gpu_bfyx_f16", 6],
+        "10100289629103173958": ["convolution_gpu_bfyx_os_iyx_osv16", 668],
+        "9258363108725341315": ["convolution_gpu_bfyx_f16", 7],
+        "13334138861096017540": ["convolution_gpu_bfyx_f16", 8],
+        "6513616579637283618": ["convolution_gpu_bfyx_f16", 7],
+        "881483878813237044": ["convolution_gpu_bfyx_f16", 6],
+        "9696420455787045679": ["convolution_gpu_bfyx_f16", 8],
+        "7480696988694183789": ["convolution_gpu_bfyx_f16", 8],
+        "9560848299493464065": ["convolution_gpu_bfyx_f16", 8],
+        "4670244085889208769": ["convolution_gpu_bfyx_f16", 8],
+        "11349612635173553035": ["convolution_gpu_bfyx_f16", 8],
+        "6259794269666057674": ["convolution_gpu_bfyx_f16", 8],
+        "5786551708845072629": ["convolution_gpu_bfyx_f16", 8],
+        "16619951395310930207": ["convolution_gpu_bfyx_f16", 8],
+        "3173655881192997611": ["convolution_gpu_bfyx_f16", 8],
+        "6211510258514141464": ["convolution_gpu_bfyx_f16", 3],
+        "14941982212174570311": ["convolution_gpu_bfyx_f16", 8],
+        "11364624703533653571": ["convolution_gpu_bfyx_f16", 8],
+        "338313831905889757": ["convolution_gpu_bfyx_f16", 4],
+        "13154424438571292174": ["convolution_gpu_bfyx_f16", 8],
+        "14845639704528269654": ["convolution_gpu_bfyx_f16", 8],
+        "12200202041476611175": ["convolution_gpu_bfyx_f16", 4],
+        "14166499608250271507": ["convolution_gpu_bfyx_f16", 8],
+        "13694208494559240243": ["convolution_gpu_bfyx_f16", 8],
+        "14476260143987433871": ["convolution_gpu_bfyx_f16", 4],
+        "6145395374917324923": ["convolution_gpu_bfyx_f16", 4],
+        "2094686947151722271": ["convolution_gpu_bfyx_os_iyx_osv16", 75],
+        "11589833946098195323": ["convolution_gpu_bfyx_os_iyx_osv16", 39],
+        "11775116692122787310": ["convolution_gpu_bfyx_os_iyx_osv16", 391],
+        "570493430126610249": ["fully_connected_gpu_bf_io_gemm", 2],
+        "17743072683947532579": ["fully_connected_gpu_bf_io_gemm", 1],
+        "18382443157447369363": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5689213766720451736": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11153755804932874939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13074790088623248655": ["convolution_gpu_bfyx_f16", 6],
+        "14552950763379636885": ["convolution_gpu_bfyx_f16", 7],
+        "1094600023872583173": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16827633753490728058": ["convolution_gpu_bfyx_f16", 8],
+        "6130516122331504865": ["convolution_gpu_bfyx_f16", 8],
+        "7670629548971090825": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5029322578170351026": ["convolution_gpu_bfyx_f16", 8],
+        "11682717086936489649": ["convolution_gpu_bfyx_f16", 8],
+        "9372644596618467274": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14183733053550126939": ["convolution_gpu_bfyx_f16", 7],
+        "5642981720905097704": ["convolution_gpu_bfyx_f16", 8],
+        "3924580903671169312": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17700105511171786728": ["convolution_gpu_bfyx_f16", 6],
+        "14998223809620050073": ["convolution_gpu_bfyx_f16", 8],
+        "419201770890811765": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7063350782589593425": ["convolution_gpu_bfyx_f16", 6],
+        "10687898799916833174": ["convolution_gpu_bfyx_f16", 6],
+        "5341504900604548311": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8082311845702095517": ["convolution_gpu_bfyx_f16", 7],
+        "5769891345892528049": ["convolution_gpu_bfyx_f16", 6],
+        "5034821474694053994": ["convolution_gpu_bfyx_f16", 8],
+        "2717532297792072749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "368628635269408785": ["convolution_gpu_bfyx_f16", 7],
+        "10159612784755046280": ["convolution_gpu_bfyx_f16", 8],
+        "15051374440521170869": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17031332595095892437": ["convolution_gpu_bfyx_f16", 4],
+        "6938198718430530942": ["convolution_gpu_bfyx_f16", 8],
+        "2358029178760210430": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16131007134197705525": ["convolution_gpu_bfyx_f16", 4],
+        "6612035874395100788": ["convolution_gpu_bfyx_f16", 6],
+        "15022677981959490269": ["convolution_gpu_bfyx_f16", 7],
+        "11900509609879947992": ["convolution_gpu_bfyx_f16", 5],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3968445701280656378": ["convolution_gpu_bfyx_f16", 8],
+        "7463742252314920613": ["convolution_gpu_bfyx_f16", 8],
+        "17747065651432157057": ["convolution_gpu_bfyx_f16", 8],
+        "2951437417233062866": ["convolution_gpu_bfyx_f16", 8],
+        "4695031178096669813": ["convolution_gpu_bfyx_f16", 8],
+        "13200791011072363046": ["convolution_gpu_bfyx_f16", 8],
+        "7702483443698911725": ["convolution_gpu_bfyx_f16", 8],
+        "3225276687886679210": ["convolution_gpu_bfyx_f16", 8],
+        "8406061878298060171": ["convolution_gpu_bfyx_f16", 8],
+        "11861948300376902542": ["convolution_gpu_bfyx_f16", 8],
+        "18047654118875021903": ["convolution_gpu_bfyx_f16", 8],
+        "3876838946012690078": ["convolution_gpu_bfyx_f16", 7],
+        "11532811324432477051": ["convolution_gpu_bfyx_f16", 7],
+        "16482627014547828135": ["convolution_gpu_bfyx_f16", 6],
+        "4565106422618308814": ["convolution_gpu_bfyx_f16", 7],
+        "16991433003318725315": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "16286683168753184722": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7074368169815304627": ["convolution_gpu_bfyx_os_iyx_osv16", 118],
+        "10702490327714920783": ["convolution_gpu_bfyx_gemm_like", 2],
+        "964168479107166949": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6252510766878541979": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1012052068628903875": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15499166167392043521": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14327383763442344255": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18371627210590255356": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13185859115957551268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15052792752810689842": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17918808521142517830": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1644157325342654261": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12198018126650448419": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9714393675511550323": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4928366179227934688": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15361605271135812199": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10267714663732575502": ["convolution_gpu_bfyx_1x1", 1],
+        "9990965405769569785": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10493403039286551634": ["convolution_gpu_bfyx_1x1", 1],
+        "18324310183763016728": ["convolution_gpu_bfyx_os_iyx_osv16", 376],
+        "6002923098500991259": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3429780644945779272": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6005067060818453503": ["convolution_gpu_bfyx_f16", 8],
+        "3676547304316346974": ["convolution_gpu_bfyx_f16", 8],
+        "8412675332215210248": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14477382651380138146": ["convolution_gpu_bfyx_f16", 8],
+        "15899888589766240554": ["convolution_gpu_bfyx_f16", 8],
+        "4529376177404929890": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "7210896246223636810": ["convolution_gpu_bfyx_f16", 8],
+        "2775471071662652034": ["convolution_gpu_bfyx_f16", 8],
+        "17132456912135683375": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15563691660506818555": ["convolution_gpu_bfyx_f16", 6],
+        "9997402509928965207": ["convolution_gpu_bfyx_f16", 8],
+        "7793754164423097155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4639865771698877244": ["convolution_gpu_bfyx_f16", 6],
+        "1766192115208251594": ["convolution_gpu_bfyx_f16", 8],
+        "2015853414727933068": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10306264176864957825": ["convolution_gpu_bfyx_f16", 7],
+        "4871044181497936479": ["convolution_gpu_bfyx_f16", 6],
+        "8396548857016837452": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12714653556587252941": ["convolution_gpu_bfyx_f16", 6],
+        "1967886437456544865": ["convolution_gpu_bfyx_f16", 6],
+        "11350907923254547441": ["convolution_gpu_bfyx_f16", 7],
+        "12282274184666824734": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16866941685634953173": ["convolution_gpu_bfyx_f16", 6],
+        "6312283149621718315": ["convolution_gpu_bfyx_f16", 7],
+        "9795822066940245604": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7256380059517365529": ["convolution_gpu_bfyx_f16", 3],
+        "11966909558503849515": ["convolution_gpu_bfyx_f16", 8],
+        "11277466712159791917": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4992371260504969141": ["convolution_gpu_bfyx_f16", 6],
+        "15043181455492553716": ["convolution_gpu_bfyx_f16", 7],
+        "8399107263382557054": ["convolution_gpu_bfyx_f16", 8],
+        "6350452055467384023": ["convolution_gpu_bfyx_f16", 6],
+        "14026570177552137240": ["convolution_gpu_bfyx_os_iyx_osv16", 856],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like", 2],
+        "641417817126876622": ["convolution_gpu_bfyx_os_iyx_osv16", 104],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16", 132],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16", 717],
+        "4917807560042671575": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "44341776758472069": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "3584869801682702110": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "13032463401326344281": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "12074020528214820344": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "10792368710075698135": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "14773903272136532468": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "4459329337183571568": ["convolution_gpu_bfyx_os_iyx_osv16", 906],
+        "17247158622529817069": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "15248304664655540462": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "8737603244374483727": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "6375630142791083064": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16951442326148701883": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "8824140014793073324": ["convolution_gpu_bfyx_os_iyx_osv16", 616],
+        "6420666457275061685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18191060893922845906": ["convolution_gpu_bfyx_os_iyx_osv16", 248],
+        "4914314319075651246": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2995522243104361971": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12727854191946007642": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 330],
+        "3260693384502698965": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8599674766060889778": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8021852643758937492": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 329],
+        "2492924011838985637": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1085],
+        "4309855944835724499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14741878965259218163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180612484034524170": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13300287078635373813": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13378751364754764186": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6449257695177020930": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "17627392788011440461": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "13831493475156855535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16483429728914404238": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 734],
+        "3860080842190932938": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12349486511618981663": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "15798538366019336375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17142061595610833587": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "413520381980740601": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 661],
+        "15678637644328155655": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "6526747512277607691": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16117940336643166742": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5991582579063082343": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "3294597200237228703": ["convolution_gpu_bfyx_os_iyx_osv16", 663],
+        "16191151963860109032": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4092109744625924274": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4849563739505810631": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3411824370004173602": ["convolution_gpu_bfyx_os_iyx_osv16", 246],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "15344685054531225492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14837032904820198149": ["convolution_gpu_bfyx_f16", 8],
+        "14191080790860851837": ["convolution_gpu_bfyx_f16", 8],
+        "17023834849779428858": ["convolution_gpu_bfyx_f16", 8],
+        "3329139872094988661": ["convolution_gpu_bfyx_f16", 8],
+        "4450424283454693457": ["convolution_gpu_bfyx_f16", 5],
+        "6264730897461114496": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16058636937964624617": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "499215221217528434": ["convolution_gpu_bfyx_f16", 8],
+        "14655897748934541342": ["convolution_gpu_bfyx_f16", 8],
+        "15662090780385020537": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7311728100823416883": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7221666363928264914": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "949611108582310305": ["convolution_gpu_bfyx_f16", 7],
+        "398119457330194405": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18306921825426259074": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14045661362966364917": ["convolution_gpu_bfyx_f16", 8],
+        "11211712695622132026": ["convolution_gpu_bfyx_f16", 8],
+        "13777550841624006577": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4765385132115618850": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "16898905631497333152": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2688905295933725456": ["convolution_gpu_bfyx_f16", 7],
+        "10325568251605243952": ["convolution_gpu_bfyx_f16", 8],
+        "4697609485293892109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15761571492230997960": ["convolution_gpu_bfyx_f16", 6],
+        "10403493618856101043": ["convolution_gpu_bfyx_f16", 7],
+        "15694677292906293678": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11385013883660304429": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8155797389244290087": ["convolution_gpu_bfyx_f16", 5],
+        "16706121580364790904": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "5495776091407365966": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5673972310424776040": ["convolution_gpu_bfyx_os_iyx_osv16", 238],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16", 483],
+        "4305463771822108179": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "2079318718874681198": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "17439941375453858836": ["convolution_gpu_bfyx_os_iyx_osv16", 1042],
+        "12467583825605788345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9058857190661793339": ["fused_conv_eltwise_gpu_ref", 2],
+        "11620974866622716017": ["fused_conv_eltwise_gpu_ref", 0],
+        "8857009061371774666": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5756084360647965669": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3975438095352877013": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3345987020362642539": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16755500582498207386": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1636861132129961823": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9793091808041097161": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5981205170754513046": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3025829117046314851": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "14600403613863348033": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9466249274834206569": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17167052658616496904": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "758159154291645307": ["fully_connected_gpu_bf_io_gemm", 2],
+        "14555191501995137081": ["fully_connected_gpu_bf_io_gemm", 1],
+        "2080318501154291605": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "13813582937323882369": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2653651564133701304": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "2877876834438717783": ["convolution_gpu_bfyx_os_iyx_osv16", 199],
+        "9156649014297448284": ["convolution_gpu_bfyx_os_iyx_osv16", 679],
+        "13660470643303663441": ["convolution_gpu_bfyx_os_iyx_osv16", 907],
+        "8081997809574506331": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "8199400320947837516": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "11460891889180307970": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "5643924526605879168": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14198463555297179999": ["convolution_gpu_bfyx_f16", 8],
+        "9820219997540294747": ["convolution_gpu_bfyx_os_iyx_osv16", 694],
+        "16598220433310484103": ["convolution_gpu_bfyx_f16", 8],
+        "13332579082252874358": ["convolution_gpu_bfyx_os_iyx_osv16", 348],
+        "10148956417804060854": ["convolution_gpu_bfyx_f16", 8],
+        "16052199780545784176": ["convolution_gpu_bfyx_f16", 8],
+        "17284989371701058847": ["convolution_gpu_bfyx_os_iyx_osv16", 1102],
+        "18186300610687882698": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17323620992879479455": ["convolution_gpu_bfyx_f16", 8],
+        "10782643446733040985": ["convolution_gpu_bfyx_f16", 8],
+        "3080843366919845836": ["convolution_gpu_bfyx_os_iyx_osv16", 1067],
+        "16898206352994894714": ["convolution_gpu_bfyx_f16", 8],
+        "17502393571772755646": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "12982233543299343225": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "5609871805820255743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "7971259885907841252": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15956442448148612253": ["convolution_gpu_bfyx_os_iyx_osv16", 686],
+        "7600980811977404651": ["convolution_gpu_bfyx_os_iyx_osv16", 751],
+        "6051578359778554994": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "14591236937522474591": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "380671738106280681": ["convolution_gpu_bfyx_os_iyx_osv16", 746],
+        "4583484812233029888": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8595156989254845134": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14493123117003003092": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "12372261924257291610": ["convolution_gpu_bfyx_os_iyx_osv16", 89],
+        "1547771611689525848": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15727110405754725012": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10890620280807224744": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "16079792265815446547": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "15384055407657760803": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2464531851392092325": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "5613964218561759893": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11460648773146310189": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "6593870431636005244": ["convolution_gpu_bfyx_os_iyx_osv16", 191],
+        "11529036254499853035": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2726453304845436156": ["convolution_gpu_bfyx_os_iyx_osv16", 458],
+        "2607416795507802412": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "17010201596936918243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "8480598154536665021": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17881013712456488163": ["convolution_gpu_bfyx_os_iyx_osv16", 436],
+        "9336215801757107337": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8174421295799601683": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "1967655354607438665": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "4972222030950072866": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "18113157997465675692": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1472667774257971884": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "7480855342650290772": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "17244746622354078542": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "251775001146378096": ["convolution_gpu_winograd_6x3_s1_fused", 1],
+        "14235558866846276172": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "18066867692765966577": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "1264200731459756446": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "6968087469917482002": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "1607381610581485984": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "17234843749633035510": ["convolution_gpu_bfyx_os_iyx_osv16", 497],
+        "11516168882438876247": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "8919164618663601566": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "16853448010512574338": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "3010644722195354051": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17062011653598617580": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "4614875083188849196": ["convolution_gpu_bfyx_os_iyx_osv16", 798],
+        "10859023312681572942": ["convolution_gpu_bfyx_os_iyx_osv16", 619],
+        "1377210419756613502": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17391465283540972493": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1514213112647467874": ["convolution_gpu_bfyx_os_iyx_osv16", 502],
+        "17268633106022870055": ["convolution_gpu_bfyx_os_iyx_osv16", 621],
+        "8140122945471321201": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "15079423575410353790": ["convolution_gpu_bfyx_os_iyx_osv16", 996],
+        "13787398748724798340": ["convolution_gpu_bfyx_os_iyx_osv16", 282],
+        "9739119866883611322": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "7151167803631697120": ["convolution_gpu_bfyx_os_iyx_osv16", 246],
+        "2040762223425679479": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "16532386511585070092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910582540370962997": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12335148041391647118": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "10689880083512104726": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "8870164706606458004": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "9269498023794081940": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "6779832349039897240": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "13942354789498444722": ["convolution_gpu_bfyx_os_iyx_osv16", 853],
+        "14294764660016835141": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "12323510278692809329": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "5728070995112243570": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "5381496395266530071": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "9712640406795417230": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "15036737419347383878": ["convolution_gpu_bfyx_os_iyx_osv16", 853],
+        "11552594222313787816": ["convolution_gpu_bfyx_os_iyx_osv16", 493],
+        "9399255910184037480": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "10594581016504135920": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "15640487942881889055": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "14165417928501578590": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "12251989236991754721": ["convolution_gpu_bfyx_os_iyx_osv16", 877],
+        "6675363512560434713": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "9831713940431605743": ["convolution_gpu_bfyx_os_iyx_osv16", 876],
+        "6531349504807709133": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "2726501303929773572": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "10439704858943788014": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "18137994263450376706": ["convolution_gpu_bfyx_os_iyx_osv16", 435],
+        "5711991739289045727": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "15255831401757117660": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "3906658058160172747": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "15823433297099049221": ["convolution_gpu_bfyx_os_iyx_osv16", 134],
+        "7829483638597533960": ["convolution_gpu_bfyx_os_iyx_osv16", 503],
+        "14092273913846393837": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "3746578485711843646": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "12228183555926126959": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "8776893332387904786": ["convolution_gpu_bfyx_os_iyx_osv16", 811],
+        "16672299044236704672": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "13309889945947393850": ["convolution_gpu_bfyx_os_iyx_osv16", 121],
+        "15966815420067673043": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "7415938485228396256": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "9655590024687998403": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "14798289196964890724": ["convolution_gpu_bfyx_os_iyx_osv16", 810],
+        "9794684437872784678": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "16729204245488754836": ["convolution_gpu_bfyx_os_iyx_osv16", 422],
+        "15185983488152870534": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "13821372148587948765": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "4727004015814244856": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "1738348894912205653": ["convolution_gpu_bfyx_os_iyx_osv16", 502],
+        "559491455289877068": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "17312172687490475177": ["convolution_gpu_bfyx_os_iyx_osv16", 460],
+        "3470176432841342662": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "8950283515337670839": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "3995072673238444396": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "1238913228370790536": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "928677976151553489": ["convolution_gpu_bfyx_os_iyx_osv16", 952],
+        "4059887681292863495": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "5665180797552893949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7180904384828396567": ["convolution_gpu_bfyx_os_iyx_osv16", 106],
+        "17041465029020839746": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "8648502659728489503": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "2007359338465363037": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "16300204511212928772": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "10636266218009746496": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "17502734572225953539": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "9266211532252099402": ["fully_connected_gpu_fb_oi_ref", 0],
+        "6763848192987176713": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "6123737429963241103": ["convolution_gpu_bfyx_os_iyx_osv16", 483],
+        "10102406370623883494": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "16125206369312086947": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "16927483709629289661": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "3196823812655863240": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "7968691295772769464": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "6100031133333761315": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "4055514200737135942": ["fully_connected_gpu_bfyx_ref", 1],
+        "8508119169246513026": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14616145871710456304": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5168719682914827724": ["convolution_gpu_bfyx_os_iyx_osv16", 906],
+        "9473263513191498949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13461678175466315866": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5434387853485184980": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1580848418974169308": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6784038318046980185": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6248879028648699716": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "1436424324238684653": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13189391944650202330": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1199836165181399413": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5850612837647497531": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14740129361300854586": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5500102903434438965": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "7297288884568452370": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5136459381906620211": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17411381157694639837": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8553537608760917592": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12734736056404146766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "706526643700857104": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14937087468947592213": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "10242452169628899571": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16629319403227634487": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3072344987020666532": ["convolution_gpu_bfyx_os_iyx_osv16", 840],
+        "5932710369376133446": ["convolution_gpu_bfyx_os_iyx_osv16", 465],
+        "15493383292734604744": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5089311900051393846": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8721087995946196075": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14614506535270942373": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1289727743091243002": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "18141581865855554514": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14773903272136532468": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "4459329337183571568": ["convolution_gpu_bfyx_os_iyx_osv16", 905],
+        "16956102699411887521": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17247158622529817069": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 711],
+        "11526253915485637934": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "15696872908795836832": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15332512198621601617": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5702206454207934253": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "8824140014793073324": ["convolution_gpu_bfyx_os_iyx_osv16", 264],
+        "6420666457275061685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16951442326148701883": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1111],
+        "15414564531144316178": ["convolution_gpu_bfyx_gemm_like", 2],
+        "386448290084824203": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15390537225231495870": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10038180349007230302": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "3260693384502698965": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "8599674766060889778": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12727854191946007642": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "2492924011838985637": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 335],
+        "6817180081986948843": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "1527649565538821618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7004336584711849988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2157468701794819044": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "12349486511618981663": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "15798538366019336375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16483429728914404238": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "15678637644328155655": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 734],
+        "15920115680945815097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 663],
+        "17778554668592635168": ["convolution_gpu_bfyx_os_iyx_osv16", 468],
+        "6999571050665340986": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "9879436330613366129": ["convolution_gpu_bfyx_os_iyx_osv16", 669],
+        "16191151963860109032": ["convolution_gpu_bfyx_os_iyx_osv16", 669],
+        "726019095679197164": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "7606282654661282476": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6201358671959761215": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4829111442270007186": ["convolution_gpu_bfyx_os_iyx_osv16", 1102],
+        "7267651931396380072": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "1279682391530947146": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2655979063469551930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14425547983540742516": ["convolution_gpu_bfyx_gemm_like", 2],
+        "981419593633555198": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12324657364444167791": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3246153532847702583": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4202705710324555180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12272318018055307535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "396815044270978782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15633173680908856082": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16635731992372618666": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10418466892824851134": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "3244777852750357718": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "2443758478383854939": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "13503934436248311972": ["convolution_gpu_bfyx_os_iyx_osv16", 363],
+        "2594310972560076285": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2424349375092546581": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7104985983444651979": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "13518747015059826801": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "11675809062974151496": ["convolution_gpu_bfyx_os_iyx_osv16", 372],
+        "4725349695436675084": ["convolution_gpu_bfyx_os_iyx_osv16", 363],
+        "17351243519367619322": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17026338651868178077": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8730407034445893642": ["convolution_gpu_bfyx_os_iyx_osv16", 1113],
+        "144434691308306757": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "4114184149613179671": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2558882920723584206": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "16481414687792927331": ["convolution_gpu_bfyx_os_iyx_osv16", 362],
+        "17756651805686889890": ["convolution_gpu_bfyx_os_iyx_osv16", 348],
+        "2228533392085335649": ["convolution_gpu_bfyx_os_iyx_osv16", 362],
+        "9038567144062573854": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1345293381483212104": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "729683192738752814": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "458997435535883643": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "16955907389221472146": ["convolution_gpu_bfyx_os_iyx_osv16", 362],
+        "17927673764274384911": ["convolution_gpu_bfyx_os_iyx_osv16", 1114],
+        "6418222853479731432": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7539191242110313918": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "18014188548165359278": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "16640379332042800496": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "14856197725306980283": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "9279474331309267880": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "5717588912072437191": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "1143426643765799488": ["convolution_gpu_bfyx_os_iyx_osv16", 598],
+        "1049385516019456025": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "10766144770072425534": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "6442062011017461761": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "6063490496423709036": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "3892512749863226006": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "4970240836537468609": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 1],
+        "2017817372328795772": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18312913026696855515": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1323873987880062206": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7947635298491683844": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "3828289925836476678": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10112041311060264798": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7966725359592006848": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "2213697863012348994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5200128826708487987": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910238486908592807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13616909429370698140": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5170073622279980223": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7110283028091835342": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16035239784731081694": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8190708817382075098": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14088072670684726938": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "4594156436010043898": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "11599404585487705575": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "12238796233133147488": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "16062641979970268785": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "17970835612618431265": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "2793976170555467399": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "5268998395189523109": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "10247076603819003292": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "10411646581372174184": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "3783590807023839590": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "13040613656895011417": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "3426085674061936062": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "18191480673111859449": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "3168817659922190247": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "18315877695535348266": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "12547634427503359071": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "16329007163840646462": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "10029877845127663589": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "2314415797696124986": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "16980380685273501504": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "3178865432099367094": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "14025615946937229331": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "9213611800089847066": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "16929122365386190391": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "2135878993442720196": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "9676824536524126662": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "920276615573431782": ["convolution_gpu_bfyx_os_iyx_osv16", 202],
+        "14160730014298968824": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17736530310730065811": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "2980714886349866400": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16634588113528268855": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "11974061312537998708": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "16035580169248458433": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "9866780121729912726": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "9774829335571618473": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12220806137793480020": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18351615003377381150": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "5523604552813225273": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "7679309022130741323": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "5318931986270088360": ["convolution_gpu_bfyx_gemm_like", 1],
+        "515117191459385744": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8719869282082754142": ["convolution_gpu_bfyx_os_iyx_osv16", 1031],
+        "7982863980065943223": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11226945962148431484": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4241838582334505669": ["convolution_gpu_bfyx_gemm_like", 2],
+        "377042666741080260": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18145274589954906463": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6999860230736815298": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "16857606646270000245": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12338108420996610172": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10159450328554854004": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9170293267334520501": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "566685987437510322": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3194003345823695583": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "12107562407862382766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7161737091607459281": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9553813691004246971": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10335630215626781232": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660045223846569448": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14844074799300904420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5366152766029340057": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "8299878919282539563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18146920703695658789": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "9019625678983697946": ["convolution_gpu_bfyx_os_iyx_osv16", 695],
+        "10578264750808095350": ["convolution_gpu_bfyx_os_iyx_osv16", 314],
+        "17553228602707603911": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "11544029240137241864": ["convolution_gpu_bfyx_os_iyx_osv16", 375],
+        "3625681568469091400": ["convolution_gpu_bfyx_os_iyx_osv16", 748],
+        "8849298369373186729": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "10796031718453810929": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9053983956770697828": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "1865317677339946921": ["convolution_gpu_bfyx_os_iyx_osv16", 671],
+        "4849563739505810631": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12018933315566840474": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "6446557539680352152": ["convolution_gpu_bfyx_os_iyx_osv16", 844],
+        "1642704598828904520": ["convolution_gpu_bfyx_os_iyx_osv16", 551],
+        "8319779172385327650": ["convolution_gpu_bfyx_os_iyx_osv16", 155],
+        "11579387987720364831": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "12754351323109225715": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 740],
+        "7903220569487431556": ["convolution_gpu_bfyx_os_iyx_osv16", 900],
+        "3905190080706902824": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8296759260312471619": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17301520533084822859": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1119],
+        "14740238736074743734": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "11837023395630571569": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18200031323963616161": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1119],
+        "4125453719396313232": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3653945386031463537": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "290357754290893078": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3852245179144851596": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13731852935536160843": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8777588932609025138": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1316118918790851994": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "11178580933542373407": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1117],
+        "17878041282431477247": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18049861144026923516": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2141454343831534876": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9144400494257163130": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "13190119938630028553": ["convolution_gpu_bfyx_os_iyx_osv16", 1108],
+        "4903536862079845135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15066104804156933222": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13457620264718125011": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "16436525035845780373": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "11501452337228727462": ["convolution_gpu_bfyx_os_iyx_osv16", 237],
+        "14843223893923209210": ["convolution_gpu_bfyx_os_iyx_osv16", 239],
+        "3403065541792865347": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "5747468958285466504": ["convolution_gpu_bfyx_os_iyx_osv16", 359],
+        "17552192746313035704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4855884888715402777": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "6932556634380539441": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9400396209180747044": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "16124622994105864663": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "10431774409348875623": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "9495099584417616887": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "9115704215611322151": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "11735107098356940998": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15204384674852423405": ["convolution_gpu_bfyx_os_iyx_osv16", 109],
+        "16866113149488400688": ["convolution_gpu_bfyx_os_iyx_osv16", 965],
+        "15389774302738715375": ["convolution_gpu_bfyx_os_iyx_osv16", 587],
+        "8101177730804364242": ["convolution_gpu_bfyx_os_iyx_osv16", 211],
+        "10149791427786334512": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "11053198857132396443": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "3963577328998759824": ["fully_connected_gpu_fb_oi_ref", 1],
+        "800184023925596362": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13839532421033004873": ["convolution_gpu_bfyx_os_iyx_osv16", 486],
+        "8262487256974801864": ["convolution_gpu_bfyx_os_iyx_osv16", 995],
+        "3693217331248996607": ["convolution_gpu_bfyx_os_iyx_osv16", 516],
+        "10388555096612441710": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "8892698757722619628": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "9606108204575763003": ["convolution_gpu_bfyx_os_iyx_osv16", 963],
+        "8449999818915991236": ["fully_connected_gpu_fb_oi_ref", 2],
+        "6954046921635466236": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12133573113666871990": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18286924901612269315": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "16168987643236739114": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17573344121250212662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8792004303945144557": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "6055054188657886157": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16692293796070898202": ["convolution_gpu_bfyx_gemm_like", 1],
+        "18377591093081814522": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "7171735046681228890": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2461164836823254208": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14430129165479757357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14698972830975282413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3479216436904445131": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5269956004669551826": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "13594976208424418204": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "12373590460058087695": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "4405236452109167503": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14132900527730577142": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1349033639465657142": ["convolution_gpu_bfyx_gemm_like", 1],
+        "812985719328060901": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "12407276986845062239": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9170373506597510005": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1389904024718949479": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "7933040116770016066": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1919536721555752974": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10686800639842865597": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8687217977804450176": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3954066703109036822": ["convolution_gpu_bfyx_gemm_like", 2],
+        "723914723460931977": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11198516910049713685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1635320120115967164": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15344790681368521678": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12844169781725567332": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17741034184665639196": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15923530138304858829": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10444674910548414627": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10302498589531075361": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4924266705550545296": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18358817826057771246": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5814292023792160102": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11190351855453911732": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9686754964115262880": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10699818671891976144": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11629568560686145289": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2754112975365662883": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14572211541644991947": ["convolution_gpu_bfyx_os_iyx_osv16", 42],
+        "15460159349027866277": ["convolution_gpu_bfyx_os_iyx_osv16", 420],
+        "11509503516680870396": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3553844546517243430": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "11739050017164389431": ["convolution_gpu_bfyx_os_iyx_osv16", 851],
+        "14683616789766294266": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1178443422000627700": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "3959894501921049830": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "6268257722565030993": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8104007721367839894": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "11004242349744689661": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "18331651243656907622": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "165832937834890614": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13820132527548818114": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "11494973886338256684": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "9562717353252171645": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15182874743616431755": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "11923231799522030843": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "7212742683076043022": ["convolution_gpu_bfyx_os_iyx_osv16", 955],
+        "1535659774314187616": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9077124630226762093": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "10707439442194349922": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "13670707208998927662": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11898738546265963886": ["convolution_gpu_bfyx_os_iyx_osv16", 965],
+        "7218310781442328740": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "17307988793370069255": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "3159313229944494871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2202381460552007272": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "4539543204582046751": ["convolution_gpu_bfyx_os_iyx_osv16", 43],
+        "2922645767583925625": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "1933120851078072002": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15544724104656453486": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "9953946296788154289": ["convolution_gpu_bfyx_os_iyx_osv16", 216],
+        "5949275355217152112": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "9953648472305845286": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "4585615709600143734": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5688607327240251933": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17872945111265083716": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7002575346587056029": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "4053858347143322566": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "15684381282886192452": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9172655573618628060": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "10794126133490266436": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13850228162972171575": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "129286539782466549": ["convolution_gpu_bfyx_gemm_like", 2],
+        "405864173902226347": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "11446357246069900060": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11612145813762780082": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15323010740285064115": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "9782042377801038578": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15340106601175659588": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3000754961057044652": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13882543862049484032": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "459319667430150397": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12757674875116871887": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16119575123089076330": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "17015151842140598799": ["convolution_gpu_bfyx_os_iyx_osv16", 418],
+        "634038212244146017": ["convolution_gpu_bfyx_os_iyx_osv16", 794],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3726173595578668243": ["convolution_gpu_bfyx_f16", 8],
+        "1069242824083103727": ["convolution_gpu_bfyx_f16", 8],
+        "10139803717927136766": ["convolution_gpu_bfyx_f16", 8],
+        "10426525571408284384": ["convolution_gpu_bfyx_f16", 8],
+        "6036447764961737632": ["convolution_gpu_bfyx_f16", 8],
+        "16859712173301423348": ["convolution_gpu_bfyx_f16", 8],
+        "4950939249231517650": ["convolution_gpu_bfyx_f16", 8],
+        "15428640534166306063": ["convolution_gpu_bfyx_f16", 8],
+        "12539440450141711052": ["convolution_gpu_bfyx_f16", 8],
+        "4694865878411993051": ["convolution_gpu_bfyx_f16", 8],
+        "7855581105034231853": ["convolution_gpu_bfyx_f16", 8],
+        "16357120378854173738": ["convolution_gpu_bfyx_f16", 8],
+        "9788176856201644185": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3526857091962358658": ["convolution_gpu_bfyx_f16", 6],
+        "9524927752153133377": ["convolution_gpu_bfyx_f16", 3],
+        "967593872851912083": ["convolution_gpu_bfyx_f16", 8],
+        "8544250266821361254": ["convolution_gpu_bfyx_f16", 3],
+        "14702583823206509221": ["convolution_gpu_bfyx_f16", 8],
+        "6562594370920553562": ["convolution_gpu_bfyx_f16", 3],
+        "4871626169134099270": ["convolution_gpu_bfyx_f16", 6],
+        "4306257530819109379": ["convolution_gpu_bfyx_f16", 3],
+        "499215221217528434": ["convolution_gpu_bfyx_f16", 6],
+        "13097490329579729355": ["convolution_gpu_bfyx_f16", 4],
+        "7536472342317469819": ["convolution_gpu_bfyx_f16", 6],
+        "17240729682157914878": ["convolution_gpu_bfyx_f16", 3],
+        "4338687769151300794": ["convolution_gpu_bfyx_f16", 7],
+        "9217611707355973890": ["convolution_gpu_bfyx_f16", 3],
+        "16565126239389697019": ["convolution_gpu_bfyx_f16", 6],
+        "9706046427344615745": ["convolution_gpu_bfyx_f16", 3],
+        "8724624785920420532": ["convolution_gpu_bfyx_f16", 6],
+        "3678291868919586746": ["convolution_gpu_bfyx_f16", 3],
+        "357806365552700839": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13063387805113848039": ["convolution_gpu_bfyx_f16", 4],
+        "1557184360709050836": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8608461026786312785": ["convolution_gpu_bfyx_f16", 3],
+        "9987273496502066597": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "64106675123073412": ["convolution_gpu_bfyx_f16", 4],
+        "4220695701755939736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12963348434542940033": ["convolution_gpu_bfyx_f16", 8],
+        "16181124988724765560": ["convolution_gpu_bfyx_f16", 0],
+        "346998321908284784": ["convolution_gpu_bfyx_f16", 7],
+        "2318421272788358186": ["convolution_gpu_bfyx_f16", 1],
+        "15927802155084275629": ["convolution_gpu_bfyx_f16", 6],
+        "8773070973133375779": ["convolution_gpu_bfyx_f16", 1],
+        "9940763571380473237": ["convolution_gpu_bfyx_f16", 6],
+        "16277913671917468663": ["convolution_gpu_bfyx_f16", 1],
+        "1474918596978458534": ["convolution_gpu_bfyx_f16", 7],
+        "2186150200961617234": ["convolution_gpu_bfyx_f16", 1],
+        "10577259940464718041": ["convolution_gpu_bfyx_f16", 7],
+        "10352584043544857764": ["convolution_gpu_bfyx_f16", 1],
+        "9144746358156959840": ["convolution_gpu_bfyx_f16", 6],
+        "13301166545153738930": ["convolution_gpu_bfyx_f16", 1],
+        "10753675657145151848": ["convolution_gpu_bfyx_f16", 7],
+        "10604750453275830911": ["convolution_gpu_bfyx_f16", 0],
+        "9243411386937443096": ["convolution_gpu_bfyx_f16", 6],
+        "12042818423431873035": ["convolution_gpu_bfyx_f16", 0],
+        "6683976234770455967": ["convolution_gpu_bfyx_f16", 6],
+        "6298190398591064450": ["convolution_gpu_bfyx_f16", 1],
+        "17196237025206156806": ["convolution_gpu_bfyx_f16", 7],
+        "5853381784506376944": ["convolution_gpu_bfyx_f16", 0],
+        "7339440798895952661": ["convolution_gpu_bfyx_f16", 7],
+        "309066171876496786": ["convolution_gpu_bfyx_f16", 0],
+        "17843616251377971109": ["convolution_gpu_bfyx_f16", 6],
+        "12223137580096133095": ["convolution_gpu_bfyx_f16", 1],
+        "7577659638199402167": ["convolution_gpu_bfyx_f16", 7],
+        "565723015051709107": ["convolution_gpu_bfyx_f16", 1],
+        "14416887345595384816": ["convolution_gpu_bfyx_f16", 7],
+        "13314165049380641802": ["convolution_gpu_bfyx_f16", 0],
+        "7520511107200802065": ["convolution_gpu_bfyx_f16", 6],
+        "11534561269762454076": ["convolution_gpu_bfyx_f16", 2],
+        "10368570488453413379": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15747873854346463294": ["convolution_gpu_bfyx_f16", 0],
+        "7824157744505687913": ["convolution_gpu_bfyx_f16", 7],
+        "5462648317757708951": ["convolution_gpu_bfyx_f16", 0],
+        "3493741914954272091": ["convolution_gpu_bfyx_f16", 7],
+        "18286084829637877271": ["convolution_gpu_bfyx_f16", 1],
+        "260499864874634958": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10167218530612525698": ["convolution_gpu_bfyx_f16", 2],
+        "11647470184823377234": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6976222743405170101": ["convolution_gpu_bfyx_f16", 0],
+        "7655642513340250684": ["convolution_gpu_bfyx_f16", 6],
+        "2708987188750383204": ["convolution_gpu_bfyx_f16", 1],
+        "3147813143325864684": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13481932492220060429": ["convolution_gpu_bfyx_f16", 2],
+        "8069058927528586404": ["convolution_gpu_bfyx_f16", 6],
+        "9624255156096106627": ["convolution_gpu_bfyx_f16", 2],
+        "17730913632234504096": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11384790797228210583": ["convolution_gpu_bfyx_f16", 4],
+        "16177287431434086806": ["convolution_gpu_bfyx_f16", 0],
+        "2990533830830456778": ["convolution_gpu_bfyx_f16", 4],
+        "8610276394762287397": ["convolution_gpu_bfyx_f16", 2],
+        "14889103084722200470": ["convolution_gpu_bfyx_f16", 5],
+        "1845895244697890167": ["convolution_gpu_bfyx_f16", 1],
+        "9079010613051503735": ["convolution_gpu_bfyx_f16", 3],
+        "12061818277351885597": ["convolution_gpu_bfyx_f16", 0],
+        "9390843066348290833": ["convolution_gpu_bfyx_f16", 3],
+        "10509352827759959818": ["convolution_gpu_bfyx_f16", 1],
+        "7121505015354362475": ["convolution_gpu_bfyx_f16", 3],
+        "3145839553769702558": ["convolution_gpu_bfyx_f16", 1],
+        "9437978197962731993": ["convolution_gpu_bfyx_f16", 3],
+        "16274951933822979821": ["convolution_gpu_bfyx_f16", 1],
+        "14030311264395486109": ["convolution_gpu_bfyx_f16", 4],
+        "6745402198112522691": ["convolution_gpu_bfyx_f16", 1],
+        "17535374606849768070": ["convolution_gpu_bfyx_f16", 3],
+        "13107074908777587001": ["convolution_gpu_bfyx_f16", 1],
+        "12441704244463007888": ["convolution_gpu_bfyx_f16", 3],
+        "9830487478445609618": ["convolution_gpu_bfyx_f16", 2],
+        "2607686439369816702": ["convolution_gpu_bfyx_f16", 5],
+        "11952384679771234258": ["convolution_gpu_bfyx_f16", 1],
+        "3189741427811982954": ["convolution_gpu_bfyx_f16", 4],
+        "7501115822974560125": ["convolution_gpu_bfyx_f16", 1],
+        "5461533362170148981": ["convolution_gpu_bfyx_f16", 4],
+        "10622846706558433994": ["convolution_gpu_bfyx_f16", 1],
+        "14985143127047962687": ["convolution_gpu_bfyx_f16", 3],
+        "9631129065088682473": ["convolution_gpu_bfyx_f16", 0],
+        "9287906640814562678": ["convolution_gpu_bfyx_f16", 5],
+        "10312813290107807302": ["convolution_gpu_bfyx_f16", 2],
+        "12443171163993705676": ["convolution_gpu_bfyx_f16", 5],
+        "3168498630594159758": ["convolution_gpu_bfyx_f16", 1],
+        "1224004372693674977": ["convolution_gpu_bfyx_f16", 1],
+        "11479153223948565455": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15137118881649312407": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7380413826069265610": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16535858081334660130": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3621905235571219180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15946837476334836670": ["convolution_gpu_bfyx_gemm_like", 2],
+        "245178301664812042": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11536204967390696799": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13202661087717766278": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17082033214052891239": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10972993149458384549": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13266975232886004160": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5239323177752135143": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13950458285304028472": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1153656272296563651": ["convolution_gpu_bfyx_os_iyx_osv16", 727],
+        "15832393447136864275": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449769853632530": ["convolution_gpu_bfyx_os_iyx_osv16", 1103],
+        "16481491209623188639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16355932574879498582": ["convolution_gpu_bfyx_os_iyx_osv16", 723],
+        "9885117015102902622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17948745397003387421": ["convolution_gpu_bfyx_os_iyx_osv16", 1103],
+        "6169584310346033045": ["convolution_gpu_bfyx_os_iyx_osv16", 727],
+        "11946156629252758613": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8766639290602892682": ["convolution_gpu_bfyx_os_iyx_osv16", 1101],
+        "4124732995953832580": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14120940518810838558": ["convolution_gpu_bfyx_os_iyx_osv16", 345],
+        "15477415938111847293": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7899374704077099747": ["convolution_gpu_bfyx_os_iyx_osv16", 726],
+        "1738224818674864374": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4675498016268563894": ["convolution_gpu_bfyx_os_iyx_osv16", 349],
+        "11678653628752466495": ["convolution_gpu_bfyx_gemm_like", 2],
+        "823094503720427089": ["convolution_gpu_bfyx_os_iyx_osv16", 726],
+        "6268238156027633260": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12067387912557140291": ["convolution_gpu_bfyx_os_iyx_osv16", 721],
+        "14700484317091478179": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5093753362153705304": ["convolution_gpu_bfyx_os_iyx_osv16", 726],
+        "7185731190256343440": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7147929965532955967": ["convolution_gpu_bfyx_os_iyx_osv16", 1115],
+        "11272978444176415320": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "3664831747298375482": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "5055315246446375474": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "11248871352103466387": ["convolution_gpu_bfyx_os_iyx_osv16", 1113],
+        "14138271699174946769": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "11248138620600796041": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "8218608499996018829": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "492405382055839338": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "13627463949725014842": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "10442692749607465731": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "5257716983547940732": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "4531738938698034182": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "4103900860372048770": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "1763848406836981250": ["convolution_gpu_bfyx_os_iyx_osv16", 739],
+        "13050289716763141821": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "5246872552943832761": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "8103482664263052993": ["convolution_gpu_bfyx_os_iyx_osv16", 738],
+        "4890599355418453618": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "13440603011986281192": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "7470027005329223304": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "10193635775409684341": ["convolution_gpu_bfyx_os_iyx_osv16", 739],
+        "9727214793392528330": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "10481905734789810461": ["convolution_gpu_bfyx_os_iyx_osv16", 362],
+        "17748868035178556381": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "1557394183568627973": ["convolution_gpu_bfyx_os_iyx_osv16", 363],
+        "1431347831018127681": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "11729412526159852880": ["convolution_gpu_bfyx_os_iyx_osv16", 365],
+        "4899105740108544338": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "8050406060207298909": ["convolution_gpu_bfyx_os_iyx_osv16", 739],
+        "7380902367877842940": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "12400142005537988277": ["convolution_gpu_bfyx_os_iyx_osv16", 738],
+        "7084726217254409262": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "13881126705282937733": ["convolution_gpu_bfyx_os_iyx_osv16", 363],
+        "3268450385258447029": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "3315969006703902437": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "7995430380267318045": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "13355664807789465988": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "1814690350132893834": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "10977798741323641518": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "16290685659520662243": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "14814993085047057124": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "16036211705705298060": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "3314627126439576532": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "5397150622881607923": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "4417629288282219686": ["convolution_gpu_bfyx_os_iyx_osv16", 1114],
+        "2593493324630665553": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "2115136697391853510": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "3903972756038760641": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "18309964708787622418": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "10898709444676724488": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "3114718546872961667": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "4116817191288103322": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "5759507923877307269": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "13521523772245595449": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "7025699501997365179": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "17325198932789845471": ["convolution_gpu_bfyx_os_iyx_osv16", 271],
+        "1929216390450946038": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "9359713794448163515": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "1064765432017421754": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "17903113127620271097": ["convolution_gpu_bfyx_os_iyx_osv16", 606],
+        "955947984048164651": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "6871124717336911723": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "17054742656500024833": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8735118147118298928": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "7689370938722443575": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "7389433284327478008": ["convolution_gpu_bfyx_os_iyx_osv16", 977],
+        "6352588504037946062": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "3420065266906936372": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "5158493429539582334": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8584667522373731666": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "16628885743804758299": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "9979885527081183609": ["convolution_gpu_bfyx_os_iyx_osv16", 269],
+        "11585377068025763798": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "270198976247871883": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "14806119107242947719": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "6237698548794601324": ["convolution_gpu_bfyx_os_iyx_osv16", 977],
+        "16586342221264661586": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8378911742901238960": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "8878591357527094058": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16", 384],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16142734280696556211": ["convolution_gpu_bfyx_f16", 8],
+        "635140168178230171": ["convolution_gpu_bfyx_f16", 8],
+        "17935287735372634102": ["convolution_gpu_bfyx_f16", 8],
+        "15817877524852645836": ["convolution_gpu_bfyx_f16", 8],
+        "10065955805093424080": ["convolution_gpu_bfyx_f16", 6],
+        "11821370621780817632": ["convolution_gpu_bfyx_f16", 8],
+        "677921946529877110": ["convolution_gpu_bfyx_f16", 6],
+        "5361664571196670427": ["convolution_gpu_bfyx_f16", 8],
+        "2901538337520242272": ["convolution_gpu_bfyx_f16", 7],
+        "5581843211058265455": ["convolution_gpu_bfyx_f16", 8],
+        "217667049553318429": ["convolution_gpu_bfyx_f16", 7],
+        "5337496722551766654": ["convolution_gpu_bfyx_f16", 8],
+        "52740663361396709": ["convolution_gpu_bfyx_f16", 7],
+        "6991371618000668418": ["convolution_gpu_bfyx_f16", 7],
+        "2326385631302475177": ["convolution_gpu_bfyx_f16", 7],
+        "8721996744048476299": ["convolution_gpu_bfyx_f16", 7],
+        "453498137980697662": ["convolution_gpu_bfyx_f16", 6],
+        "15807266772870766609": ["convolution_gpu_bfyx_f16", 7],
+        "6553421087532441250": ["convolution_gpu_bfyx_f16", 6],
+        "12573289076827071790": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8853947103468767323": ["convolution_gpu_bfyx_f16", 7],
+        "6453143304950619430": ["convolution_gpu_bfyx_f16", 7],
+        "1775677589702924323": ["convolution_gpu_bfyx_f16", 7],
+        "16761512340234377511": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2448165393673590598": ["convolution_gpu_bfyx_f16", 7],
+        "11041313275514857930": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8908290078256179450": ["convolution_gpu_bfyx_f16", 6],
+        "6872057470208040983": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3462663905986148169": ["convolution_gpu_bfyx_f16", 7],
+        "9998472323723395768": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9695005447848657794": ["convolution_gpu_bfyx_f16", 6],
+        "864050420562880191": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16884753149447117871": ["convolution_gpu_bfyx_f16", 6],
+        "9413300293443003372": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9584473138046573481": ["convolution_gpu_bfyx_f16", 6],
+        "17226124546002868085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5703305874425530284": ["convolution_gpu_bfyx_f16", 6],
+        "16357533604618943588": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8568882981604412701": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6735600860810305128": ["convolution_gpu_bfyx_f16", 4],
+        "9976345793999587972": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "15346869959264738522": ["convolution_gpu_bfyx_f16", 4],
+        "18151038936580799249": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11956105843463290323": ["convolution_gpu_bfyx_f16", 5],
+        "2197043795215802833": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "7837223160972083111": ["convolution_gpu_bfyx_f16", 5],
+        "17991319065386721750": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8684426249485914306": ["convolution_gpu_bfyx_f16", 4],
+        "15440765487742350713": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "4006988924644151380": ["convolution_gpu_bfyx_f16", 4],
+        "1165323482766442288": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6216179328027568162": ["convolution_gpu_bfyx_f16", 5],
+        "5085232160533811804": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5560503476513957999": ["convolution_gpu_bfyx_f16", 3],
+        "11899886655444339788": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8035035668897300219": ["convolution_gpu_bfyx_f16", 5],
+        "15531280953380757927": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5417611188973238514": ["convolution_gpu_bfyx_f16", 5],
+        "13845305820052266938": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "156328565120552800": ["convolution_gpu_bfyx_f16", 5],
+        "15783591814248428053": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5753913342838023682": ["convolution_gpu_bfyx_f16", 5],
+        "3207990305547692029": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18084824492918706199": ["convolution_gpu_bfyx_f16", 4],
+        "8033743776899693075": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "243712386211233379": ["convolution_gpu_bfyx_f16", 4],
+        "2965177266959923348": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13237451337340946362": ["convolution_gpu_bfyx_f16", 4],
+        "9188120772772842413": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "1249134296559537004": ["convolution_gpu_bfyx_f16", 4],
+        "6776437678382831419": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9140223146321937006": ["convolution_gpu_bfyx_f16", 4],
+        "7509732267784929557": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9869335174149535367": ["convolution_gpu_bfyx_f16", 5],
+        "15410089184813419927": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "12736591082694609735": ["convolution_gpu_bfyx_f16", 4],
+        "10111465201148839782": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6977012639021700914": ["convolution_gpu_bfyx_f16", 4],
+        "10452382209692659038": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13099335757796409253": ["convolution_gpu_bfyx_f16", 5],
+        "8355446198162136384": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6457714394569252436": ["convolution_gpu_bfyx_f16", 4],
+        "1870949498151438396": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6325249952936664765": ["convolution_gpu_bfyx_f16", 5],
+        "4283372428897156128": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15284708683366527091": ["convolution_gpu_bfyx_f16", 4],
+        "12367140420770161260": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "17302868757320805407": ["convolution_gpu_bfyx_f16", 3],
+        "12812798569408798714": ["convolution_gpu_bfyx_f16", 1],
+        "18027642894783121874": ["convolution_gpu_bfyx_f16", 3],
+        "3766561909462900481": ["convolution_gpu_bfyx_f16", 4],
+        "8126433884587687354": ["convolution_gpu_bfyx_f16", 4],
+        "7431739774665400867": ["convolution_gpu_bfyx_f16", 5],
+        "15213968303698655071": ["convolution_gpu_bfyx_f16", 5],
+        "1895954773577076065": ["convolution_gpu_bfyx_f16", 0],
+        "10820634669412096693": ["convolution_gpu_bfyx_f16", 5],
+        "9105871040526273510": ["convolution_gpu_bfyx_f16", 3],
+        "6253056982440997971": ["convolution_gpu_bfyx_f16", 4],
+        "14271936409538632354": ["convolution_gpu_bfyx_f16", 4],
+        "7830723669305086809": ["convolution_gpu_bfyx_f16", 3],
+        "16905205856195133489": ["convolution_gpu_bfyx_f16", 5],
+        "17744780595721014433": ["convolution_gpu_bfyx_f16", 4],
+        "1185658428449577287": ["convolution_gpu_bfyx_f16", 4],
+        "4322844512730914538": ["convolution_gpu_bfyx_f16", 3],
+        "8559998096869077061": ["convolution_gpu_bfyx_f16", 5],
+        "12935328860605637188": ["convolution_gpu_bfyx_f16", 5],
+        "17826095303533956022": ["convolution_gpu_bfyx_f16", 4],
+        "6059064882469521870": ["convolution_gpu_bfyx_f16", 3],
+        "17987726224817029150": ["convolution_gpu_bfyx_f16", 3],
+        "1752617074755449766": ["convolution_gpu_bfyx_f16", 4],
+        "1147527648969475665": ["convolution_gpu_bfyx_f16", 5],
+        "336079374726362009": ["convolution_gpu_bfyx_f16", 5],
+        "3956037701575034246": ["convolution_gpu_bfyx_f16", 5],
+        "9177200416044551211": ["convolution_gpu_bfyx_f16", 4],
+        "3580337905402094261": ["convolution_gpu_bfyx_f16", 4],
+        "8657404564308325878": ["convolution_gpu_bfyx_f16", 5],
+        "9660551017019324634": ["convolution_gpu_bfyx_f16", 3],
+        "2283387892607580344": ["convolution_gpu_bfyx_f16", 5],
+        "9757276965383246450": ["convolution_gpu_bfyx_f16", 4],
+        "5662627047941545281": ["convolution_gpu_bfyx_f16", 1],
+        "4652102901251847499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8552605555461651066": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4821707856043228388": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like", 2],
+        "867673900353092030": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9400507072890048966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13391871893495885313": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10371076921125171059": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10315090439844489700": ["convolution_gpu_bfyx_gemm_like", 2],
+        "671453551040072499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7957019749780783255": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3916912615549949771": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5115007207028125638": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3702373232430988630": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16866405531619284081": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3118240332710616352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18271341717679165017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1520529227443340435": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6547588888976666790": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2920840796593281126": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3243287355593359731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15289152041466330689": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11745487821055710420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10330180429524641331": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2413743706626149595": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2891977832675907820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5140042030231193807": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "12362834244136780846": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "12975331316527510995": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "11296280342006832013": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "2729382724566640622": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "1973144337799131575": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "11237620198863831646": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "13522230668952002294": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "1878253869657286717": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "15537416934472628620": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "1614676161640914325": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "7887122837178625925": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "13932612600851474669": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "108336648992892440": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "10684345634354913297": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "10916615896929712681": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "6401617291202138329": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "14418145752469985573": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "10000629948062903268": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "17913158947435785150": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "3683538222536942924": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "3497309410275654168": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "5849203144808104114": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "6079947803671938062": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "10023464714622430341": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "10125169683435871224": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "16183189414217717282": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "4778769961736466493": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "14240807033488944743": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "9611215430798915107": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "13082046205786468713": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "12207197008210652563": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "10581403540319621428": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "8583043839495629208": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "14447820502121172060": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "16001364310945493562": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "8482359546526573989": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "1012101590389722479": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "14078917033502693044": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "3484370445244910200": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16", 761],
+        "2094686947151722271": ["convolution_gpu_bfyx_os_iyx_osv16", 828],
+        "11589833946098195323": ["convolution_gpu_bfyx_os_iyx_osv16", 783],
+        "11775116692122787310": ["convolution_gpu_bfyx_os_iyx_osv16", 7],
+        "570493430126610249": ["fully_connected_gpu_bf_io_gemm", 1],
+        "17743072683947532579": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18382443157447369363": ["fully_connected_gpu_bf_io_gemm", 0],
+        "2438463778071005693": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10241616750018729197": ["convolution_gpu_bfyx_os_iyx_osv16", 1002],
+        "16093736249698386830": ["convolution_gpu_bfyx_os_iyx_osv16", 103],
+        "15577855965797137317": ["fully_connected_gpu_fb_oi_ref", 2],
+        "2793239401424346732": ["fully_connected_gpu_fb_oi_ref", 1],
+        "1090168454685651958": ["fully_connected_gpu_fb_oi_ref", 2],
+        "14491983419826529399": ["convolution_gpu_bfyx_os_iyx_osv16", 827],
+        "11866343372130060111": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "3750595711145201146": ["convolution_gpu_bfyx_os_iyx_osv16", 759],
+        "555112033233919049": ["fully_connected_gpu_bf_io_gemm", 1],
+        "9449916193007510499": ["fully_connected_gpu_bf_io_gemm", 0],
+        "821153009898835283": ["fully_connected_gpu_bf_io_gemm", 0],
+        "8694043970360551765": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8773350383870039461": ["convolution_gpu_bfyx_os_iyx_osv16", 621],
+        "18102285308171488538": ["convolution_gpu_bfyx_os_iyx_osv16", 91],
+        "4504463103561729721": ["fully_connected_gpu_fb_io_ref", 2],
+        "13026398103046869012": ["fully_connected_gpu_fb_oi_ref", 1],
+        "4938053383542014494": ["fully_connected_gpu_fb_oi_ref", 1],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16", 1101],
+        "10292349730148518173": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16", 950],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16", 192],
+        "17322754821646330275": ["convolution_gpu_bfyx_os_iyx_osv16", 950],
+        "9463001223908267526": ["convolution_gpu_bfyx_os_iyx_osv16", 950],
+        "2819993544283340217": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4891941794728322149": ["convolution_gpu_bfyx_os_iyx_osv16", 100],
+        "17966409116732724850": ["convolution_gpu_bfyx_os_iyx_osv16", 100],
+        "16912834065670733738": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2419223013209835757": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "11179881900554989521": ["convolution_gpu_bfyx_f16", 8],
+        "16511126264743737451": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "10100289629103173958": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "9258363108725341315": ["convolution_gpu_bfyx_f16", 7],
+        "13334138861096017540": ["convolution_gpu_bfyx_f16", 8],
+        "6513616579637283618": ["convolution_gpu_bfyx_f16", 6],
+        "881483878813237044": ["convolution_gpu_bfyx_f16", 7],
+        "9696420455787045679": ["convolution_gpu_bfyx_f16", 8],
+        "7480696988694183789": ["convolution_gpu_bfyx_f16", 8],
+        "9560848299493464065": ["convolution_gpu_bfyx_f16", 6],
+        "4670244085889208769": ["convolution_gpu_bfyx_f16", 8],
+        "11349612635173553035": ["convolution_gpu_bfyx_f16", 8],
+        "6259794269666057674": ["convolution_gpu_bfyx_f16", 7],
+        "5786551708845072629": ["convolution_gpu_bfyx_f16", 7],
+        "16619951395310930207": ["convolution_gpu_bfyx_f16", 8],
+        "3173655881192997611": ["convolution_gpu_bfyx_f16", 8],
+        "6211510258514141464": ["convolution_gpu_bfyx_f16", 4],
+        "14941982212174570311": ["convolution_gpu_bfyx_f16", 5],
+        "11364624703533653571": ["convolution_gpu_bfyx_f16", 7],
+        "338313831905889757": ["convolution_gpu_bfyx_f16", 3],
+        "13154424438571292174": ["convolution_gpu_bfyx_f16", 8],
+        "14845639704528269654": ["convolution_gpu_bfyx_f16", 7],
+        "12200202041476611175": ["convolution_gpu_bfyx_f16", 5],
+        "14166499608250271507": ["convolution_gpu_bfyx_f16", 8],
+        "13694208494559240243": ["convolution_gpu_bfyx_f16", 6],
+        "14476260143987433871": ["convolution_gpu_bfyx_f16", 3],
+        "6145395374917324923": ["convolution_gpu_bfyx_f16", 3],
+        "16009549743559486766": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14591935906857802585": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "111424963409848995": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2739383731123097925": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17284261626529871462": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "5668693380660004839": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "12579230945548766456": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5167557197439368430": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1540552565663233708": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16096568902203474447": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5028262864972382565": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1974417291828577": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2599172922002088957": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11660160310320618383": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6321333724966975926": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "13769852278335802471": ["convolution_gpu_bfyx_gemm_like", 2],
+        "79817180213970569": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7224734161984848733": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2597920881875761524": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17470658487460623535": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3304768856579090475": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13876951907579147655": ["convolution_gpu_bfyx_os_iyx_osv16", 267],
+        "10366703264083184092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7178492718471026756": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2607889212984737257": ["convolution_gpu_bfyx_os_iyx_osv16", 643],
+        "6528945595038330865": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9831986499172731633": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18440050172847926353": ["convolution_gpu_bfyx_os_iyx_osv16", 640],
+        "6512088599266777589": ["convolution_gpu_bfyx_os_iyx_osv16", 640],
+        "10018756206737727294": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5830779024517851317": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "7913817244562964901": ["convolution_gpu_bfyx_f16", 8],
+        "11779589567746893119": ["convolution_gpu_bfyx_f16", 8],
+        "5287441936829096354": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "16879635677321458783": ["convolution_gpu_bfyx_f16", 8],
+        "5936894667802097344": ["convolution_gpu_bfyx_f16", 8],
+        "12029555773381953470": ["convolution_gpu_bfyx_f16", 6],
+        "1395714970525756800": ["convolution_gpu_bfyx_f16", 5],
+        "18366381433142273315": ["convolution_gpu_bfyx_f16", 8],
+        "17839315025229585473": ["convolution_gpu_bfyx_f16", 8],
+        "7428339090190576585": ["convolution_gpu_bfyx_f16", 8],
+        "16427721132197847241": ["convolution_gpu_bfyx_f16", 6],
+        "929038963682864275": ["convolution_gpu_bfyx_f16", 8],
+        "6348679735483401866": ["convolution_gpu_bfyx_f16", 7],
+        "17409943223289937333": ["convolution_gpu_bfyx_f16", 7],
+        "10896472785943286419": ["convolution_gpu_bfyx_f16", 8],
+        "8675423965229942895": ["convolution_gpu_bfyx_f16", 8],
+        "15359653790909326580": ["convolution_gpu_bfyx_f16", 4],
+        "937772044105590355": ["convolution_gpu_bfyx_f16", 6],
+        "11630003841984891663": ["convolution_gpu_bfyx_f16", 8],
+        "15721323944762357421": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18032560040713612222": ["convolution_gpu_bfyx_f16", 8],
+        "16185581163541386950": ["convolution_gpu_bfyx_f16", 8],
+        "7296460872108123423": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "18375557444371775299": ["convolution_gpu_bfyx_f16", 8],
+        "10922059457537054563": ["convolution_gpu_bfyx_f16", 6],
+        "122295605901184339": ["convolution_gpu_bfyx_f16", 3],
+        "12164250230746861951": ["convolution_gpu_bfyx_f16", 3],
+        "10631671892805059138": ["convolution_gpu_bfyx_os_iyx_osv16", 908],
+        "13517627553690454113": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "17864904691465978047": ["convolution_gpu_bfyx_os_iyx_osv16", 532],
+        "7688613129211669281": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15276587352894128846": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "16583642152876546031": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "1540351396976309640": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "12018060391889249406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1703594828023385832": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16671415101494484639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4660166087476681397": ["convolution_gpu_bfyx_gemm_like", 2],
+        "824242000358871449": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9894766303335506733": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17266480567140619519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13198159541095771298": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "3117760785038488579": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1476464784116064433": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4485934013026623941": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16673650204498772920": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8328912827514946731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15425046562310745575": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9559768114277499815": ["convolution_gpu_bfyx_os_iyx_osv16", 290],
+        "8904325051665606784": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2193347488577584488": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6699483770041820657": ["convolution_gpu_bfyx_os_iyx_osv16", 669],
+        "789366296550494453": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14473138580870542149": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1428800094127546021": ["convolution_gpu_bfyx_os_iyx_osv16", 647],
+        "14142504888572786665": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "1984025014517619256": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "7957167898986800985": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "412995552853553524": ["convolution_gpu_bfyx_f16", 8],
+        "7058232330882130703": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15549425900373079382": ["convolution_gpu_bfyx_f16", 8],
+        "2713038204741622907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1878980012173918209": ["convolution_gpu_bfyx_f16", 7],
+        "12468208151780727122": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "6674575974748163031": ["convolution_gpu_bfyx_f16", 8],
+        "5591111867402032949": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3413916493145831316": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12421615174911349736": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689084255978323672": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12474210147973914830": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14174889288973953645": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18224887830367116006": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16049847963625476676": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "3817623781909159313": ["convolution_gpu_bfyx_f16", 7],
+        "3004968067582685285": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6876765637331622545": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6802301901709446085": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13245964863324091195": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "953254263392356310": ["convolution_gpu_bfyx_f16", 2],
+        "5388858533648189105": ["convolution_gpu_bfyx_f16", 4],
+        "3226238265868290723": ["convolution_gpu_bfyx_f16", 8],
+        "10098858620420134682": ["convolution_gpu_bfyx_f16", 1],
+        "18308172581381789101": ["convolution_gpu_bfyx_f16", 2],
+        "12846183737006963638": ["convolution_gpu_bfyx_f16", 4],
+        "8746233054079242877": ["convolution_gpu_bfyx_f16", 2],
+        "7516276889336424671": ["convolution_gpu_bfyx_f16", 4],
+        "8240661672477348007": ["convolution_gpu_bfyx_f16", 1],
+        "7421142512620741721": ["convolution_gpu_bfyx_f16", 2],
+        "17095633565672192085": ["convolution_gpu_bfyx_f16", 2],
+        "7381046541836362634": ["convolution_gpu_bfyx_f16", 5],
+        "7006663637645720459": ["convolution_gpu_bfyx_f16", 5],
+        "554667746487334145": ["convolution_gpu_bfyx_f16", 4],
+        "1899794088311416867": ["convolution_gpu_bfyx_f16", 3],
+        "4461871297663195464": ["convolution_gpu_bfyx_f16", 2],
+        "845238018552466931": ["convolution_gpu_bfyx_f16", 2],
+        "1588946175550138318": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15493305609986974083": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18266429764179335648": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4773783671939023015": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4841057875316789358": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10434845132440395347": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4971104866692187809": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3918510119122483722": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10511458406494047485": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4104477639131772427": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14619253124444303162": ["convolution_gpu_bfyx_os_iyx_osv16", 349],
+        "2303241947828987936": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15440788136860909526": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5886674354741908134": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8121822626577551399": ["convolution_gpu_bfyx_os_iyx_osv16", 648],
+        "6561450336890348030": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16", 1049],
+        "6864098212683093769": ["convolution_gpu_bfyx_os_iyx_osv16", 318],
+        "12286768317527546407": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15803888689432429483": ["convolution_gpu_bfyx_os_iyx_osv16", 1029],
+        "2969163284049372725": ["convolution_gpu_bfyx_os_iyx_osv16", 1043],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16", 654],
+        "13943934495343791315": ["convolution_gpu_bfyx_os_iyx_osv16", 907],
+        "2864116308996401112": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16", 124],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "12840351521230542751": ["convolution_gpu_bfyx_os_iyx_osv16", 854],
+        "10101063893937511289": ["convolution_gpu_bfyx_os_iyx_osv16", 854],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16", 876],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16", 479],
+        "16052212361531923323": ["convolution_gpu_bfyx_os_iyx_osv16", 55],
+        "14021819955559248258": ["convolution_gpu_bfyx_os_iyx_osv16", 55],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16", 421],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16", 54],
+        "18183296320499063227": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "13889057206654080908": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "2813710942447372241": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13633232435632839044": ["convolution_gpu_bfyx_f16", 8],
+        "2883172178329270363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9432546329737888706": ["convolution_gpu_bfyx_f16", 7],
+        "12985746913235154779": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17940668702908419725": ["convolution_gpu_bfyx_f16", 8],
+        "2064000219100642226": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "5833649709217830223": ["convolution_gpu_bfyx_f16", 8],
+        "10849235794440642481": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6321445979984216128": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14697315322325185660": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "36079357617783912": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4063865474431180498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13167503358764278233": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17498603449428007802": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "6304136029727027056": ["convolution_gpu_bfyx_os_iyx_osv16", 123],
+        "1754448782405089213": ["convolution_gpu_bfyx_f16", 7],
+        "15489166244290113065": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5756918986564223629": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8035545676843269497": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17042017278300937839": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11260048813076144906": ["convolution_gpu_bfyx_f16", 2],
+        "6873924247641352061": ["convolution_gpu_bfyx_f16", 4],
+        "6474957215284027135": ["convolution_gpu_bfyx_f16", 6],
+        "16573724507496129614": ["convolution_gpu_bfyx_f16", 3],
+        "11210971373278055121": ["convolution_gpu_bfyx_f16", 1],
+        "185717560970701618": ["convolution_gpu_bfyx_f16", 4],
+        "11817410866221484993": ["convolution_gpu_bfyx_f16", 5],
+        "9765519004693711463": ["convolution_gpu_bfyx_f16", 8],
+        "14300671725579588671": ["convolution_gpu_bfyx_f16", 5],
+        "1297549572559338433": ["convolution_gpu_bfyx_f16", 3],
+        "4346210823986581329": ["convolution_gpu_bfyx_f16", 6],
+        "2750608965765787878": ["convolution_gpu_bfyx_f16", 5],
+        "14245442283142381063": ["convolution_gpu_bfyx_f16", 8],
+        "2942593456597250269": ["convolution_gpu_bfyx_f16", 8],
+        "14807774261203767931": ["convolution_gpu_bfyx_f16", 5],
+        "2024891861044519704": ["convolution_gpu_bfyx_f16", 7],
+        "12988352411577718659": ["convolution_gpu_bfyx_f16", 4],
+        "7546167886043158750": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12777758044198094011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17999895886988202252": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7284204319739516687": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11574916930945966662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12181953262469206135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11001131415959768285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11516255774873880270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17905472119711952421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3708423242842748011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16735610121492345646": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10393786933242452104": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8593006729492614006": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8080047256092430454": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5827132729840694911": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2862262622518056270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7592655788466931007": ["convolution_gpu_bfyx_os_iyx_osv16", 1006],
+        "10751447918697845967": ["convolution_gpu_bfyx_os_iyx_osv16", 260],
+        "14327549932088763609": ["convolution_gpu_bfyx_os_iyx_osv16", 249],
+        "9139350052341521235": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2268155498775258271": ["convolution_gpu_bfyx_os_iyx_osv16", 249],
+        "9252995576301318377": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "16131094933895726474": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "2390813972238809739": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6575286116803785519": ["convolution_gpu_bfyx_os_iyx_osv16", 159],
+        "9509860212160444680": ["convolution_gpu_bfyx_os_iyx_osv16", 909],
+        "2025729513014515133": ["convolution_gpu_bfyx_os_iyx_osv16", 498],
+        "7012386443457106080": ["convolution_gpu_bfyx_os_iyx_osv16", 532],
+        "10807317048120773939": ["convolution_gpu_bfyx_os_iyx_osv16", 853],
+        "13800264518247731721": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "10381956671421182115": ["convolution_gpu_bfyx_os_iyx_osv16", 82],
+        "4874673523117573787": ["convolution_gpu_bfyx_os_iyx_osv16", 200],
+        "18140414399325733479": ["convolution_gpu_bfyx_os_iyx_osv16", 803],
+        "5854165399605633326": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "17238058461587589303": ["convolution_gpu_bfyx_os_iyx_osv16", 43],
+        "4101383449947395379": ["convolution_gpu_bfyx_os_iyx_osv16", 420],
+        "2697043651083211983": ["convolution_gpu_bfyx_os_iyx_osv16", 604],
+        "1196153439884178828": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "1408371298472575421": ["convolution_gpu_bfyx_os_iyx_osv16", 756],
+        "2856387545805299627": ["fully_connected_gpu_bf_io_ref", 1],
+        "6931984251726006059": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "10053897550646291639": ["convolution_gpu_bfyx_os_iyx_osv16", 852],
+        "166522152877705111": ["convolution_gpu_bfyx_os_iyx_osv16", 456],
+        "8194080531314571831": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "8462596687449136841": ["convolution_gpu_bfyx_os_iyx_osv16", 102],
+        "16641148739441654579": ["convolution_gpu_bfyx_os_iyx_osv16", 456],
+        "3012332306785177280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1667559253581127345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17950962563816983793": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "15920581282829793263": ["convolution_gpu_bfyx_os_iyx_osv16", 458],
+        "4931844549089354374": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11227326613484178737": ["convolution_gpu_bfyx_os_iyx_osv16", 834],
+        "8926339988827333993": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14947161471102583853": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7959005479751426244": ["convolution_gpu_bfyx_os_iyx_osv16", 543],
+        "13876295120508241721": ["convolution_gpu_bfyx_os_iyx_osv16", 44],
+        "5450799298000231966": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "745049678230480319": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17799305583546345514": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "15448134419455024563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10247046915015701375": ["convolution_gpu_bfyx_os_iyx_osv16", 462],
+        "818326236814735107": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "11621993279519931789": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10879300979808656559": ["fully_connected_gpu_bf_io_gemm", 1],
+        "9614122272772797675": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7199567766573336359": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13573164884579883011": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15482728985931330311": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4607650298345740971": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16086873164128770879": ["convolution_gpu_bfyx_gemm_like", 2],
+        "105926781977700977": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11591232422517503119": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11582016741808877197": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16914574072145986060": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6022176855777948587": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8941858845051007302": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9357675997524716463": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3521176117120705338": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12045093589986262223": ["convolution_gpu_bfyx_os_iyx_osv16", 214],
+        "470065517654323782": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16734161909350784601": ["convolution_gpu_bfyx_os_iyx_osv16", 966],
+        "11121230809258677064": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6349024748484491361": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9689224985169331447": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3750053020466161808": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "15788948623626667459": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13291988829313422545": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 1],
+        "8049787711095084959": ["convolution_gpu_bfyx_os_iyx_osv16", 476],
+        "8361191677655973935": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "6455326407035817823": ["convolution_gpu_bfyx_os_iyx_osv16", 88],
+        "4549875381866576113": ["convolution_gpu_bfyx_os_iyx_osv16", 92],
+        "14780479128645572595": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "9221666339438514459": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "17091218700152862273": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9951123692498529061": ["convolution_gpu_bfyx_os_iyx_osv16", 662],
+        "15226633731441516361": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "4453349487216529991": ["convolution_gpu_bfyx_os_iyx_osv16", 834],
+        "17929115705990268026": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6621532750524834097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "16562571407098459049": ["convolution_gpu_bfyx_os_iyx_osv16", 162],
+        "2873284221161386597": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3769897639705493224": ["convolution_gpu_bfyx_os_iyx_osv16", 537],
+        "5447803100312758964": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 538],
+        "9163238347824560017": ["convolution_gpu_bfyx_os_iyx_osv16", 797],
+        "1688979903294911182": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9338092674592431198": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "15522545626077485199": ["convolution_gpu_bfyx_os_iyx_osv16", 162],
+        "1797489112792772811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5478531388148194783": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "3289369122755371980": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 83],
+        "14572382016053496602": ["convolution_gpu_bfyx_os_iyx_osv16", 423],
+        "16841168676076935693": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18407347961782182453": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 839],
+        "8695092335925023399": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "17375427967226537519": ["convolution_gpu_bfyx_os_iyx_osv16", 283],
+        "356011965155211999": ["convolution_gpu_bfyx_os_iyx_osv16", 202],
+        "10249443290070223207": ["convolution_gpu_bfyx_os_iyx_osv16", 486],
+        "11731131619682311119": ["convolution_gpu_bfyx_gemm_like", 2],
+        "499465197159774125": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "6713136765330410003": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "10482500982261483441": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12868046747643626115": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3118940652855466279": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "9133477146144263621": ["convolution_gpu_bfyx_os_iyx_osv16", 663],
+        "6014658843738581344": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2254000832500315403": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "2201913047888029571": ["convolution_gpu_bfyx_os_iyx_osv16", 664],
+        "6765174963106729735": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6860612036193780126": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4053722516029644812": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "3872902814632377403": ["convolution_gpu_bfyx_os_iyx_osv16", 173],
+        "11807558788154880902": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11052363375504603312": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "5704480811160976661": ["convolution_gpu_bfyx_os_iyx_osv16", 935],
+        "2631038501229053001": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11448877892018743111": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10504809699083269708": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 834],
+        "221686752427251764": ["convolution_gpu_bfyx_os_iyx_osv16", 246],
+        "8099629938775512387": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5641577920984461497": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 80],
+        "12153763576335891417": ["fully_connected_gpu_fb_io_b8_f8_vload", 1],
+        "14168685794682021826": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12423218459706339590": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "8734189831526420226": ["convolution_gpu_bfyx_os_iyx_osv16", 995],
+        "14362182205968229036": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "13157476677873103938": ["convolution_gpu_bfyx_os_iyx_osv16", 468],
+        "11940005480315119153": ["convolution_gpu_bfyx_os_iyx_osv16", 93],
+        "1302512649939808216": ["convolution_gpu_bfyx_os_iyx_osv16", 502],
+        "16919811480058643640": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "5208084625746441471": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "8262549900448065079": ["convolution_gpu_bfyx_os_iyx_osv16", 46],
+        "5227665249672396809": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "10715707282679913174": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15195978022706554558": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4702145645721143238": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10415281487218000500": ["convolution_gpu_bfyx_gemm_like", 1],
+        "680533894953795110": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1524996376386486665": ["convolution_gpu_bfyx_os_iyx_osv16", 634],
+        "2180727313291426024": ["convolution_gpu_bfyx_os_iyx_osv16", 911],
+        "13865408769089368168": ["convolution_gpu_bfyx_os_iyx_osv16", 895],
+        "17729561573161674389": ["convolution_gpu_bfyx_os_iyx_osv16", 141],
+        "14102092207521274159": ["convolution_gpu_bfyx_os_iyx_osv16", 497],
+        "14601505600623942303": ["convolution_gpu_bfyx_os_iyx_osv16", 126],
+        "4933328578946081154": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "13882747247011638614": ["convolution_gpu_bfyx_os_iyx_osv16", 997],
+        "814582084353022226": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "4844820846457555156": ["convolution_gpu_bfyx_os_iyx_osv16", 876],
+        "6607603202773469786": ["convolution_gpu_bfyx_os_iyx_osv16", 917],
+        "15439502814859116813": ["convolution_gpu_bfyx_os_iyx_osv16", 543],
+        "15777107988701235428": ["convolution_gpu_bfyx_os_iyx_osv16", 877],
+        "12832042711454018844": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "6099745418702030715": ["convolution_gpu_bfyx_os_iyx_osv16", 20],
+        "4230880085403638923": ["convolution_gpu_bfyx_os_iyx_osv16", 798],
+        "62516450676185117": ["convolution_gpu_bfyx_os_iyx_osv16", 44],
+        "93092162022748986": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15895053123520992434": ["convolution_gpu_bfyx_os_iyx_osv16", 186],
+        "14005851072926998714": ["convolution_gpu_bfyx_os_iyx_osv16", 939],
+        "13582287631171243512": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10982128848228134282": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "7236965443679023925": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "1267627207431132628": ["convolution_gpu_bfyx_os_iyx_osv16", 910],
+        "2427481818567622188": ["convolution_gpu_bfyx_os_iyx_osv16", 532],
+        "9499169226931836849": ["convolution_gpu_bfyx_os_iyx_osv16", 535],
+        "14841135939793901331": ["convolution_gpu_bfyx_os_iyx_osv16", 873],
+        "13877129322236450083": ["convolution_gpu_bfyx_os_iyx_osv16", 873],
+        "17180103562901495937": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16817205245313896299": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2495268194877370173": ["convolution_gpu_bfyx_f16", 6],
+        "12476976926994223419": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "837759583632984386": ["convolution_gpu_bfyx_f16", 7],
+        "15704905077262309915": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15294932718062276977": ["convolution_gpu_bfyx_f16", 6],
+        "4080044423867161503": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12782915336639648289": ["convolution_gpu_bfyx_f16", 6],
+        "6939516498492475263": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "16689321018957344059": ["convolution_gpu_bfyx_f16", 4],
+        "2757721937742809580": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "10786200002789430346": ["convolution_gpu_bfyx_f16", 3],
+        "1941288041804222048": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14851676883700287486": ["convolution_gpu_bfyx_f16", 4],
+        "17430311645965116316": ["convolution_gpu_bfyx_f16", 3],
+        "3115685904789548595": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12312218395355058343": ["convolution_gpu_bfyx_f16", 7],
+        "17435783978159028678": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18104511008021666751": ["convolution_gpu_bfyx_f16", 6],
+        "2889130721514872852": ["convolution_gpu_bfyx_f16", 2],
+        "6772340882401465511": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11743064882436041973": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10729082617196359413": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8688603561602716375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6216329929003742144": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5858568936289863149": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1258577325908211211": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10990147603320054495": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13137659893098575291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2695989423525253829": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "3932955531996129807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14834765532454121330": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11205075769094656704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10056755067893619842": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "8845972204063781512": ["convolution_gpu_bfyx_os_iyx_osv16", 292],
+        "5286686388506198758": ["convolution_gpu_bfyx_os_iyx_osv16", 1042],
+        "17044347247573802405": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16931221552471635881": ["convolution_gpu_bfyx_os_iyx_osv16", 909],
+        "3375470456077799802": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14332199338789934423": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "16759785658634382018": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "5919114362027813213": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1357304910509750335": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2624254602965505549": ["convolution_gpu_bfyx_f16", 7],
+        "5577742374711315791": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "578315994260636114": ["convolution_gpu_bfyx_f16", 6],
+        "1262880924315152695": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6168533266847660009": ["convolution_gpu_bfyx_f16", 6],
+        "14627313247209797163": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1270860549971294137": ["convolution_gpu_bfyx_f16", 4],
+        "4422458267180761143": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16820926361172105951": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "7270466581298144020": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17886363415956316754": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "1392628448770002052": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "6733088214815340670": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9311722977080169500": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17741687009005052531": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "16599775094194414107": ["convolution_gpu_bfyx_f16", 7],
+        "17406888356387369802": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14665993929606055479": ["convolution_gpu_bfyx_f16", 7],
+        "1257358912309769908": ["convolution_gpu_bfyx_f16", 8],
+        "9833509391965801955": ["convolution_gpu_bfyx_os_iyx_osv16", 111],
+        "853439126393091889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10335429769666812841": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9424664012357101635": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5860372371921305416": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3503193615625158929": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14744249132822614079": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5243045977966841351": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12657769780794263187": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7005710331306745857": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17732714197816812919": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2691481290737970286": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16758724324099838132": ["convolution_gpu_bfyx_os_iyx_osv16", 249],
+        "13321275573521697498": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17371402188380900420": ["convolution_gpu_bfyx_os_iyx_osv16", 252],
+        "15857411657993741130": ["convolution_gpu_bfyx_os_iyx_osv16", 1004],
+        "10824769165318760081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1670508622389791801": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6601005881101223654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "3767953997999748671": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12193543332391207302": ["convolution_gpu_bfyx_os_iyx_osv16", 378],
+        "2779831597589397721": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14888498856025675875": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "13008816286946828339": ["convolution_gpu_bfyx_os_iyx_osv16", 883],
+        "14472562307183930494": ["convolution_gpu_bfyx_os_iyx_osv16", 502],
+        "12260051528344627305": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "12237139830764526217": ["convolution_gpu_bfyx_os_iyx_osv16", 766],
+        "12839904859734107448": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "2557331839687658350": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "14711934417369240383": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "16644569811401857265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14644196187730386778": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 242],
+        "15997330269289678741": ["convolution_gpu_fs_byx_fsv32", 20],
+        "9456547817322301854": ["convolution_gpu_fs_byx_fsv32", 13],
+        "14503081204981089589": ["convolution_gpu_fs_byx_fsv32", 6],
+        "17459372555428323405": ["convolution_gpu_fs_byx_fsv32", 1],
+        "11045313080354230499": ["convolution_gpu_fs_byx_fsv32_1x1", 1],
+        "11891736161858320688": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "3837179970761308107": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "1599135987505067413": ["convolution_gpu_bfyx_os_iyx_osv16", 281],
+        "88960405449779079": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "3983071771155729815": ["convolution_gpu_bfyx_os_iyx_osv16", 133],
+        "4686928543634340294": ["convolution_gpu_bfyx_os_iyx_osv16", 126],
+        "9500201961536063781": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "5626617363814193337": ["convolution_gpu_bfyx_os_iyx_osv16", 377],
+        "9493629616033946504": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "9142997105687030758": ["convolution_gpu_bfyx_os_iyx_osv16", 42],
+        "3565303211593767799": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "1638619072790951553": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16187579575395923193": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14768404566434004921": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "4439755580616372110": ["convolution_gpu_bfyx_os_iyx_osv16", 532],
+        "12082385141539179745": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "80211457682233943": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "2281851137797618536": ["convolution_gpu_bfyx_os_iyx_osv16", 418],
+        "4306482192283599644": ["convolution_gpu_bfyx_os_iyx_osv16", 3],
+        "7438079994024163367": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "2027062613896109334": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "2494989528221736054": ["convolution_gpu_bfyx_f16", 5],
+        "10481457184081052557": ["convolution_gpu_bfyx_f16", 3],
+        "17843566914419305583": ["convolution_gpu_bfyx_f16", 6],
+        "10440359951914302042": ["convolution_gpu_bfyx_f16", 0],
+        "12355534646291322950": ["convolution_gpu_bfyx_f16", 3],
+        "1312046147551402733": ["convolution_gpu_bfyx_f16", 7],
+        "17747064821498992452": ["convolution_gpu_bfyx_f16", 1],
+        "15727623554601964014": ["convolution_gpu_bfyx_f16", 2],
+        "1123438482147655288": ["convolution_gpu_bfyx_f16", 2],
+        "7126696940487701707": ["convolution_gpu_bfyx_f16", 7],
+        "3872390202906772826": ["convolution_gpu_bfyx_f16", 4],
+        "2880589787553789663": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "11505611789014119307": ["convolution_gpu_bfyx_gemm_like", 2],
+        "437815073846842580": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1950316744853763835": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2101440743856834523": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11177728104020690382": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11070046570645256268": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12619772485618838435": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2751512607890114618": ["convolution_gpu_bfyx_os_iyx_osv16", 106],
+        "15183698566691504656": ["convolution_gpu_bfyx_os_iyx_osv16", 485],
+        "12653721467536263212": ["convolution_gpu_bfyx_os_iyx_osv16", 471],
+        "13194232160397919757": ["convolution_gpu_bfyx_os_iyx_osv16", 54],
+        "204538163378003996": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "6149494643008538957": ["convolution_gpu_bfyx_os_iyx_osv16", 754],
+        "11290368603402236066": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "1323592601201034234": ["convolution_gpu_bfyx_f16", 8],
+        "14798486770850675841": ["convolution_gpu_bfyx_f16", 8],
+        "11673314628747753691": ["convolution_gpu_bfyx_f16", 3],
+        "7021961511624638678": ["convolution_gpu_bfyx_f16", 4],
+        "5676198353742450430": ["convolution_gpu_bfyx_f16", 5],
+        "4929819810689803833": ["convolution_gpu_bfyx_f16", 8],
+        "240316590146675808": ["convolution_gpu_bfyx_f16", 3],
+        "17625565940895057722": ["convolution_gpu_bfyx_f16", 4],
+        "8688075088415087060": ["convolution_gpu_bfyx_f16", 8],
+        "3109943868702160503": ["convolution_gpu_bfyx_f16", 3],
+        "15650217867869430450": ["convolution_gpu_bfyx_f16", 1],
+        "17908144598228512507": ["convolution_gpu_bfyx_os_iyx_osv16", 754],
+        "12413306519886846795": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3485465952750021220": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "16729621401445513163": ["convolution_gpu_bfyx_os_iyx_osv16", 859],
+        "5488147296483022703": ["convolution_gpu_bfyx_os_iyx_osv16", 980],
+        "8710473738514939538": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9147606392761848284": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "5087291643342132199": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11774085137209016046": ["convolution_gpu_bfyx_os_iyx_osv16", 860],
+        "8929841836974581600": ["convolution_gpu_bfyx_os_iyx_osv16", 858],
+        "7073670312468097760": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "14911211495772743601": ["convolution_gpu_bfyx_os_iyx_osv16", 427],
+        "3856389350154673872": ["convolution_gpu_bfyx_os_iyx_osv16", 48],
+        "14587774878993352201": ["convolution_gpu_bfyx_os_iyx_osv16", 378],
+        "16581313033870107357": ["convolution_gpu_bfyx_os_iyx_osv16", 695],
+        "15132868076468531540": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4964421818619633295": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "18154134293896237020": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9604863051097029874": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "12931069967038668164": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "6806199908367808607": ["convolution_gpu_bfyx_os_iyx_osv16", 486],
+        "11683146685348965370": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8154297486284619437": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14336744408490491240": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "4571901717343198720": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "6532394816830144120": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2666796249274140911": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11653606109120321972": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "6204893434840435239": ["convolution_gpu_bfyx_os_iyx_osv16", 93],
+        "13218364348439640168": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10201555771333451359": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "6327608958004075948": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10481749345430191494": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "3465618418555443152": ["convolution_gpu_bfyx_os_iyx_osv16", 494],
+        "6220132353152696371": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "767822057476164981": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9047957325396112699": ["convolution_gpu_bfyx_os_iyx_osv16", 240],
+        "4356441299961129632": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "10144632434338007132": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "15158722447225497040": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14636891429613595743": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "10686925946858146532": ["convolution_gpu_bfyx_os_iyx_osv16", 952],
+        "8212789547545225423": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "11769756626318373236": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5110309993577022127": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
         "10298865798559508430": ["convolution_gpu_bfyx_gemm_like", 2],
         "8036745915261696332": ["convolution_gpu_bfyx_gemm_like", 2],
         "10569376024770516176": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
         "7478911643148989038": ["convolution_gpu_bfyx_os_iyx_osv16", 251],
         "14725552910225528458": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
         "15344861463117063737": ["convolution_gpu_bfyx_os_iyx_osv16", 1004],
-               "16131094933895726474": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "16131094933895726474": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
         "7554430797788594700": ["convolution_gpu_bfyx_os_iyx_osv16", 152],
         "1645104669889145065": ["convolution_gpu_bfyx_os_iyx_osv16", 159],
         "18430449754758524246": ["convolution_gpu_bfyx_gemm_like", 2],
         "13762162740325518554": ["convolution_gpu_bfyx_os_iyx_osv16", 87]
     },
     "64": {
+        "910748662803624556": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 81],
+        "4519346022156371606": ["convolution_gpu_fs_byx_fsv32", 20],
+        "3153176811816688992": ["convolution_gpu_fs_byx_fsv32", 23],
+        "18407198563592068406": ["convolution_gpu_fs_byx_fsv32", 23],
+        "7738205255093972719": ["convolution_gpu_fs_byx_fsv32", 23],
+        "10911508970295688995": ["convolution_gpu_fs_byx_fsv32", 23],
+        "11234044950031301008": ["convolution_gpu_fs_byx_fsv32", 20],
+        "18188222607605599542": ["convolution_gpu_fs_byx_fsv32_1x1", 43],
+        "10115505917773209611": ["convolution_gpu_fs_byx_fsv32_1x1", 72],
+        "10227865805836113697": ["convolution_gpu_fs_byx_fsv32", 39],
+        "2049663083507517471": ["convolution_gpu_fs_byx_fsv32", 18],
+        "9623342210945438241": ["convolution_gpu_fs_byx_fsv32_1x1", 44],
+        "10253244041159400554": ["convolution_gpu_fs_byx_fsv32", 20],
+        "15366242457459683634": ["convolution_gpu_fs_byx_fsv32", 20],
+        "1822958298528464910": ["convolution_gpu_fs_byx_fsv32", 19],
+        "16719642333001190073": ["convolution_gpu_fs_byx_fsv32", 41],
+        "12815342516652762277": ["convolution_gpu_fs_byx_fsv32_1x1", 117],
+        "12568556685386421906": ["convolution_gpu_fs_byx_fsv32_1x1", 94],
+        "6022769716879130400": ["convolution_gpu_fs_byx_fsv32", 18],
+        "9581184464360436099": ["convolution_gpu_fs_byx_fsv32_1x1", 118],
+        "1290411699350897368": ["convolution_gpu_fs_byx_fsv32_1x1", 43],
+        "8915313814934838539": ["convolution_gpu_fs_byx_fsv32_1x1", 94],
+        "14574517999399394154": ["convolution_gpu_fs_byx_fsv32", 19],
+        "4121936391568515973": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "12841725653279382824": ["convolution_gpu_fs_byx_fsv32_1x1", 18],
+        "18167583164974728659": ["convolution_gpu_fs_byx_fsv32_1x1", 95],
+        "1193589967034749943": ["convolution_gpu_fs_byx_fsv32", 19],
+        "13383569873729045504": ["convolution_gpu_fs_byx_fsv32_1x1", 44],
+        "12594539202605959855": ["convolution_gpu_fs_byx_fsv32_1x1", 90],
+        "6172767547801205918": ["fully_connected_gpu_fs_byx_fsv32", 2],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14837032904820198149": ["convolution_gpu_bfyx_f16", 6],
+        "15344685054531225492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14191080790860851837": ["convolution_gpu_bfyx_f16", 8],
+        "17023834849779428858": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3329139872094988661": ["convolution_gpu_bfyx_f16", 8],
+        "4450424283454693457": ["convolution_gpu_bfyx_f16", 5],
+        "6264730897461114496": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "499215221217528434": ["convolution_gpu_bfyx_f16", 6],
+        "16058636937964624617": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14655897748934541342": ["convolution_gpu_bfyx_f16", 7],
+        "15662090780385020537": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7311728100823416883": ["convolution_gpu_bfyx_f16", 6],
+        "7221666363928264914": ["convolution_gpu_bfyx_f16", 7],
+        "949611108582310305": ["convolution_gpu_bfyx_f16", 6],
+        "398119457330194405": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "14045661362966364917": ["convolution_gpu_bfyx_f16", 6],
+        "18306921825426259074": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11211712695622132026": ["convolution_gpu_bfyx_f16", 7],
+        "13777550841624006577": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "4765385132115618850": ["convolution_gpu_bfyx_f16", 6],
+        "16898905631497333152": ["convolution_gpu_bfyx_f16", 6],
+        "2688905295933725456": ["convolution_gpu_bfyx_f16", 6],
+        "10325568251605243952": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "15761571492230997960": ["convolution_gpu_bfyx_f16", 6],
+        "4697609485293892109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10403493618856101043": ["convolution_gpu_bfyx_f16", 7],
+        "15694677292906293678": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11385013883660304429": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13933912937625580405": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "11922487499552549932": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 83],
+        "12970438081364226523": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "11308694794965721688": ["convolution_gpu_fs_byx_fsv32", 29],
+        "13715005843933350037": ["convolution_gpu_fs_byx_fsv32", 28],
+        "1148413520208339490": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "11716928374969333113": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "16494187389478673128": ["convolution_gpu_fs_byx_fsv32", 18],
+        "357421659267466320": ["convolution_gpu_fs_byx_fsv32_1x1", 68],
+        "696097655686548117": ["convolution_gpu_fs_byx_fsv32_1x1", 19],
+        "14097099709001259929": ["convolution_gpu_fs_byx_fsv32", 39],
+        "15998994292645959709": ["convolution_gpu_fs_byx_fsv32", 18],
+        "2627340562631112021": ["convolution_gpu_fs_byx_fsv32", 41],
+        "17896157411325184870": ["convolution_gpu_fs_byx_fsv32_1x1", 18],
+        "2348059748605323550": ["convolution_gpu_fs_byx_fsv32_1x1", 91],
+        "16631500667355400979": ["convolution_gpu_fs_byx_fsv32", 18],
+        "4916238900958678407": ["convolution_gpu_fs_byx_fsv32", 40],
+        "197968000712296413": ["convolution_gpu_fs_byx_fsv32_1x1", 90],
+        "15020752541808561986": ["convolution_gpu_fs_byx_fsv32", 40],
+        "971357579643802322": ["convolution_gpu_fs_byx_fsv32", 19],
+        "2663936934621310461": ["convolution_gpu_fs_byx_fsv32", 40],
+        "11750493802548491696": ["convolution_gpu_fs_byx_fsv32_1x1", 91],
+        "15114315857226577628": ["convolution_gpu_fs_byx_fsv32_1x1", 18],
+        "14434615608138029428": ["convolution_gpu_fs_byx_fsv32", 13],
+        "4199590769228595358": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 81],
+        "7839563669435475954": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "1141991968771839472": ["convolution_gpu_fs_byx_fsv32", 21],
+        "263509469285411366": ["convolution_gpu_fs_byx_fsv32", 23],
+        "3547839498999305303": ["convolution_gpu_fs_byx_fsv32", 22],
+        "8683685705303303221": ["convolution_gpu_fs_byx_fsv32", 23],
+        "14744643882970884571": ["convolution_gpu_fs_byx_fsv32", 20],
+        "11891194858781881610": ["convolution_gpu_fs_byx_fsv32", 41],
+        "3043080892709481146": ["convolution_gpu_fs_byx_fsv32_1x1", 44],
+        "4805027939746074311": ["convolution_gpu_fs_byx_fsv32_1x1", 118],
+        "16345745202657238024": ["convolution_gpu_fs_byx_fsv32", 27],
+        "5357748083375129607": ["convolution_gpu_fs_byx_fsv32_1x1", 117],
+        "11037961888250452169": ["convolution_gpu_fs_byx_fsv32_1x1", 69],
+        "7637444154433664641": ["convolution_gpu_fs_byx_fsv32_1x1", 42],
+        "17451913917796952961": ["convolution_gpu_fs_byx_fsv32", 19],
+        "16510274807057459913": ["convolution_gpu_fs_byx_fsv32", 39],
+        "14334897511311962579": ["convolution_gpu_fs_byx_fsv32_1x1", 43],
+        "3637707827339403390": ["convolution_gpu_fs_byx_fsv32_1x1", 94],
+        "9898628245664893782": ["convolution_gpu_fs_byx_fsv32", 18],
+        "17986532238858274021": ["convolution_gpu_fs_byx_fsv32", 39],
+        "12758164167408438914": ["convolution_gpu_fs_byx_fsv32_1x1", 118],
+        "12483793724356828710": ["convolution_gpu_fs_byx_fsv32_1x1", 42],
+        "15833518985934734171": ["convolution_gpu_fs_byx_fsv32", 18],
+        "2822668303392012405": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "2990377492527643597": ["convolution_gpu_fs_byx_fsv32_1x1", 18],
+        "14227528573954277193": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "10349597079921511866": ["convolution_gpu_fs_byx_fsv32", 19],
+        "15321256223968882074": ["convolution_gpu_fs_byx_fsv32_1x1", 92],
+        "7637569993382017562": ["convolution_gpu_fs_byx_fsv32_1x1", 18],
+        "4340521982244590395": ["fully_connected_gpu_fs_byx_fsv32", 1],
+        "7612459008355120907": ["convolution_gpu_bfyx_os_iyx_osv16", 760],
+        "2259983618915707691": ["convolution_gpu_bfyx_os_iyx_osv16", 793],
+        "13678741578702922441": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "17310844417517474522": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "1547471890307888038": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "5857101685300045443": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "4492049100194222504": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 0],
+        "11159429929932958728": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "3307687633803839199": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "1081196396912580777": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "748301576795035305": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "13051406650237455505": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "15438530452161762045": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "13171156770366652142": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 1],
+        "7446661399223808792": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "8244276268485406306": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "5305511645030056258": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "3818176283290803023": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "7709128150391636219": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "16127482065413259805": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "12961109385388101976": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "12990695784685939569": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 1],
+        "16855828799826043472": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "16807078095615832147": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "4323079735931570978": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "9975103514236138870": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 1],
+        "13239007898564403022": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "3326691585067800328": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "10718764522366711114": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "5022444343992542447": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 1],
+        "13643973579671217152": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "7685370318437061317": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 1],
+        "15334633238378804940": ["convolution_gpu_yxfb_ref", 2],
+        "5658567026478236676": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "14520461267731870642": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "14478151143114959230": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "8183383667948205424": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "10280103280661141995": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "11396985422513105543": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "3072680381954611079": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "13923749042058740213": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "9035445496715584647": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "16498300259966485293": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "1173136780324694038": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "1946708042002037847": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 0],
+        "15786764202107923723": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "363253754059034577": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 0],
+        "2879408131530417634": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "13764191119738645791": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "10929096538131969056": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "10993107955805947401": ["convolution_gpu_yxfb_yxio_b16", 2],
+        "6214677989814002369": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "17129645303683422987": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 1],
+        "10884202393733523875": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "9211429678133558886": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "13256041685958568718": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "281659747168748090": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "9974238928758614510": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "105055722864217258": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "9775648000771985077": ["convolution_gpu_yxfb_yxio_b16", 0],
+        "2757327701217323530": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "15967893151722576439": ["convolution_gpu_yxfb_yxio_b16", 1],
+        "7913751481527058458": ["fused_conv_eltwise_gpu_yxfb_yxio_b16", 2],
+        "3660499157976278294": ["convolution_gpu_yxfb_ref", 2],
+        "11360155147631826044": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13728914881583145008": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1569671702178904173": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7565147824711814661": ["convolution_gpu_bfyx_os_iyx_osv16", 684],
+        "10841519281995210334": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5814379432380491933": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14136670791850817381": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9962268812119403740": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14363025897042686609": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5219377585459567306": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2635778763059062442": ["convolution_gpu_bfyx_os_iyx_osv16", 349],
+        "4669204329917622837": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6114788124604766283": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10923101966259038130": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14707006736790420809": ["convolution_gpu_bfyx_os_iyx_osv16", 719],
+        "17713633631990639961": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3504771662735847466": ["convolution_gpu_bfyx_os_iyx_osv16", 1121],
+        "15999250494536901225": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "32743373531902454": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "6564126728704461285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11017179702883573226": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "10222460110608531664": ["convolution_gpu_bfyx_os_iyx_osv16", 1127],
+        "989618432925496996": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16725595449660868380": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6576629942543206833": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17732941944414783792": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16020609768341427435": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "876164657126345894": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14944034218263936349": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5682635320360744091": ["convolution_gpu_bfyx_os_iyx_osv16", 470],
+        "11155444222714959508": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "5774841809066688068": ["fully_connected_gpu_fb_io_b8_f8_vload", 1],
+        "7770438611007743835": ["fully_connected_gpu_fb_io_block_fp16", 1],
+        "4047297221972879788": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15450497529345296385": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2205922486581639676": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11745022614090875813": ["convolution_gpu_bfyx_os_iyx_osv16", 310],
+        "17585538293744016566": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8444494906852742421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10885442787023100489": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16110741407146436740": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13563776212202320786": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3624752355820186197": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10145266698938789546": ["convolution_gpu_bfyx_os_iyx_osv16", 725],
+        "4747404796484882161": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16778528373644904167": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6798759075152052854": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3761431225719973581": ["convolution_gpu_bfyx_os_iyx_osv16", 722],
+        "15772258956922304025": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13618397296321270389": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "3193015766533516038": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "7153707305580485910": ["convolution_gpu_bfyx_os_iyx_osv16", 715],
+        "11975467189534635645": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12310086317567578422": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "14791274489873110852": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "9106902859069072460": ["convolution_gpu_bfyx_gemm_like", 0],
+        "7751970175598153868": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3532605721993650980": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14750411915718441545": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8555176305060053627": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "6916665432260806066": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16975075932819772013": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11140174410573547007": ["fully_connected_gpu_fb_io_block_fp16", 0],
+        "12521704453281256747": ["convolution_gpu_fs_byx_fsv32", 24],
+        "1326922608383282653": ["convolution_gpu_fs_byx_fsv32_1x1", 91],
+        "4441511695478461473": ["convolution_gpu_fs_byx_fsv32_1x1", 90],
+        "11055674338836803346": ["convolution_gpu_fs_byx_fsv32", 19],
+        "13146981993912259510": ["convolution_gpu_fs_byx_fsv32_1x1", 90],
+        "12985778394717272800": ["convolution_gpu_fs_byx_fsv32_1x1", 19],
+        "18360220784823492369": ["fully_connected_gpu_bf_io_ref", 1],
+        "10638449107140469730": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 81],
+        "2256189126102740944": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "864855997469383178": ["convolution_gpu_fs_byx_fsv32_1x1", 44],
+        "5715677089026406156": ["convolution_gpu_fs_byx_fsv32", 27],
+        "8818145557038499161": ["convolution_gpu_fs_byx_fsv32", 47],
+        "2918106569079642832": ["convolution_gpu_fs_byx_fsv32", 35],
+        "5178622557342013293": ["convolution_gpu_fs_byx_fsv32", 18],
+        "17579501950743021322": ["convolution_gpu_fs_byx_fsv32", 29],
+        "1177474667726771626": ["convolution_gpu_fs_byx_fsv32_1x1", 68],
+        "6898037081343959843": ["convolution_gpu_fs_byx_fsv32_1x1", 43],
+        "15860405963536185032": ["convolution_gpu_fs_byx_fsv32", 28],
+        "18392332342334572995": ["convolution_gpu_fs_byx_fsv32", 39],
+        "551135788957797393": ["convolution_gpu_fs_byx_fsv32_1x1", 117],
+        "2226859786809443020": ["convolution_gpu_fs_byx_fsv32_1x1", 42],
+        "9820115912054761905": ["convolution_gpu_fs_byx_fsv32", 18],
+        "7569974257643285443": ["convolution_gpu_fs_byx_fsv32", 40],
+        "12275538857633519699": ["convolution_gpu_fs_byx_fsv32_1x1", 90],
+        "7138247722211199808": ["convolution_gpu_fs_byx_fsv32", 39],
+        "17553450983852684500": ["convolution_gpu_fs_byx_fsv32", 19],
+        "3143012304635366687": ["convolution_gpu_fs_byx_fsv32", 39],
+        "9311472148911096060": ["convolution_gpu_fs_byx_fsv32", 40],
+        "3179931836072864925": ["convolution_gpu_fs_byx_fsv32_1x1", 94],
+        "14756887642167915087": ["convolution_gpu_fs_byx_fsv32", 19],
+        "16573970549360490821": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "10723100916297597266": ["convolution_gpu_fs_byx_fsv32_1x1", 90],
+        "16092403278783486497": ["convolution_gpu_fs_byx_fsv32_1x1", 19],
+        "15204524535540014365": ["convolution_gpu_fs_byx_fsv32", 19],
+        "18172326743684695878": ["convolution_gpu_fs_byx_fsv32_1x1", 20],
+        "5785454736574189561": ["convolution_gpu_fs_byx_fsv32_1x1", 19],
+        "12466882202135216455": ["fully_connected_gpu_fs_byx_fsv32", 1],
+        "5950220553148534703": ["convolution_gpu_bfyx_os_iyx_osv16", 505],
+        "2035830808186046507": ["convolution_gpu_bfyx_os_iyx_osv16", 10],
+        "15233068047804647451": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "3726173595578668243": ["convolution_gpu_bfyx_f16", 8],
+        "1069242824083103727": ["convolution_gpu_bfyx_f16", 8],
+        "10139803717927136766": ["convolution_gpu_bfyx_f16", 8],
+        "10426525571408284384": ["convolution_gpu_bfyx_f16", 8],
+        "6036447764961737632": ["convolution_gpu_bfyx_f16", 8],
+        "16859712173301423348": ["convolution_gpu_bfyx_f16", 8],
+        "4950939249231517650": ["convolution_gpu_bfyx_f16", 8],
+        "15428640534166306063": ["convolution_gpu_bfyx_f16", 8],
+        "12539440450141711052": ["convolution_gpu_bfyx_f16", 8],
+        "4694865878411993051": ["convolution_gpu_bfyx_f16", 8],
+        "7855581105034231853": ["convolution_gpu_bfyx_f16", 8],
+        "16357120378854173738": ["convolution_gpu_bfyx_f16", 8],
+        "9788176856201644185": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3526857091962358658": ["convolution_gpu_bfyx_f16", 7],
+        "9524927752153133377": ["convolution_gpu_bfyx_f16", 6],
+        "967593872851912083": ["convolution_gpu_bfyx_f16", 6],
+        "8544250266821361254": ["convolution_gpu_bfyx_f16", 7],
+        "14702583823206509221": ["convolution_gpu_bfyx_f16", 7],
+        "6562594370920553562": ["convolution_gpu_bfyx_f16", 6],
+        "4871626169134099270": ["convolution_gpu_bfyx_f16", 7],
+        "4306257530819109379": ["convolution_gpu_bfyx_f16", 6],
+        "13097490329579729355": ["convolution_gpu_bfyx_f16", 6],
+        "7536472342317469819": ["convolution_gpu_bfyx_f16", 6],
+        "17240729682157914878": ["convolution_gpu_bfyx_f16", 6],
+        "4338687769151300794": ["convolution_gpu_bfyx_f16", 7],
+        "9217611707355973890": ["convolution_gpu_bfyx_f16", 7],
+        "16565126239389697019": ["convolution_gpu_bfyx_f16", 7],
+        "9706046427344615745": ["convolution_gpu_bfyx_f16", 6],
+        "8724624785920420532": ["convolution_gpu_bfyx_f16", 6],
+        "3678291868919586746": ["convolution_gpu_bfyx_f16", 6],
+        "357806365552700839": ["convolution_gpu_bfyx_f16", 6],
+        "13063387805113848039": ["convolution_gpu_bfyx_f16", 7],
+        "1557184360709050836": ["convolution_gpu_bfyx_f16", 6],
+        "8608461026786312785": ["convolution_gpu_bfyx_f16", 7],
+        "9987273496502066597": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "64106675123073412": ["convolution_gpu_bfyx_f16", 6],
+        "4220695701755939736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12963348434542940033": ["convolution_gpu_bfyx_f16", 7],
+        "16181124988724765560": ["convolution_gpu_bfyx_f16", 5],
+        "346998321908284784": ["convolution_gpu_bfyx_f16", 6],
+        "2318421272788358186": ["convolution_gpu_bfyx_f16", 3],
+        "15927802155084275629": ["convolution_gpu_bfyx_f16", 6],
+        "8773070973133375779": ["convolution_gpu_bfyx_f16", 5],
+        "9940763571380473237": ["convolution_gpu_bfyx_f16", 6],
+        "16277913671917468663": ["convolution_gpu_bfyx_f16", 5],
+        "1474918596978458534": ["convolution_gpu_bfyx_f16", 6],
+        "2186150200961617234": ["convolution_gpu_bfyx_f16", 5],
+        "10577259940464718041": ["convolution_gpu_bfyx_f16", 7],
+        "10352584043544857764": ["convolution_gpu_bfyx_f16", 3],
+        "9144746358156959840": ["convolution_gpu_bfyx_f16", 6],
+        "13301166545153738930": ["convolution_gpu_bfyx_f16", 5],
+        "10753675657145151848": ["convolution_gpu_bfyx_f16", 6],
+        "10604750453275830911": ["convolution_gpu_bfyx_f16", 4],
+        "9243411386937443096": ["convolution_gpu_bfyx_f16", 7],
+        "12042818423431873035": ["convolution_gpu_bfyx_f16", 5],
+        "6683976234770455967": ["convolution_gpu_bfyx_f16", 6],
+        "6298190398591064450": ["convolution_gpu_bfyx_f16", 5],
+        "17196237025206156806": ["convolution_gpu_bfyx_f16", 7],
+        "5853381784506376944": ["convolution_gpu_bfyx_f16", 3],
+        "7339440798895952661": ["convolution_gpu_bfyx_f16", 7],
+        "309066171876496786": ["convolution_gpu_bfyx_f16", 4],
+        "17843616251377971109": ["convolution_gpu_bfyx_f16", 6],
+        "12223137580096133095": ["convolution_gpu_bfyx_f16", 5],
+        "7577659638199402167": ["convolution_gpu_bfyx_f16", 6],
+        "565723015051709107": ["convolution_gpu_bfyx_f16", 5],
+        "14416887345595384816": ["convolution_gpu_bfyx_f16", 7],
+        "13314165049380641802": ["convolution_gpu_bfyx_f16", 3],
+        "7520511107200802065": ["convolution_gpu_bfyx_f16", 6],
+        "11534561269762454076": ["convolution_gpu_bfyx_f16", 5],
+        "10368570488453413379": ["convolution_gpu_bfyx_f16", 7],
+        "15747873854346463294": ["convolution_gpu_bfyx_f16", 5],
+        "7824157744505687913": ["convolution_gpu_bfyx_f16", 7],
+        "5462648317757708951": ["convolution_gpu_bfyx_f16", 5],
+        "3493741914954272091": ["convolution_gpu_bfyx_f16", 6],
+        "18286084829637877271": ["convolution_gpu_bfyx_f16", 5],
+        "260499864874634958": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10167218530612525698": ["convolution_gpu_bfyx_f16", 3],
+        "11647470184823377234": ["convolution_gpu_bfyx_f16", 6],
+        "6976222743405170101": ["convolution_gpu_bfyx_f16", 5],
+        "7655642513340250684": ["convolution_gpu_bfyx_f16", 7],
+        "2708987188750383204": ["convolution_gpu_bfyx_f16", 3],
+        "3147813143325864684": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13481932492220060429": ["convolution_gpu_bfyx_f16", 5],
+        "8069058927528586404": ["convolution_gpu_bfyx_f16", 7],
+        "9624255156096106627": ["convolution_gpu_bfyx_f16", 3],
+        "17730913632234504096": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11384790797228210583": ["convolution_gpu_bfyx_f16", 4],
+        "16177287431434086806": ["convolution_gpu_bfyx_f16", 2],
+        "2990533830830456778": ["convolution_gpu_bfyx_f16", 3],
+        "8610276394762287397": ["convolution_gpu_bfyx_f16", 2],
+        "14889103084722200470": ["convolution_gpu_bfyx_f16", 6],
+        "1845895244697890167": ["convolution_gpu_bfyx_f16", 2],
+        "9079010613051503735": ["convolution_gpu_bfyx_f16", 4],
+        "12061818277351885597": ["convolution_gpu_bfyx_f16", 2],
+        "9390843066348290833": ["convolution_gpu_bfyx_f16", 5],
+        "10509352827759959818": ["convolution_gpu_bfyx_f16", 0],
+        "7121505015354362475": ["convolution_gpu_bfyx_f16", 3],
+        "3145839553769702558": ["convolution_gpu_bfyx_f16", 0],
+        "9437978197962731993": ["convolution_gpu_bfyx_f16", 3],
+        "16274951933822979821": ["convolution_gpu_bfyx_f16", 2],
+        "14030311264395486109": ["convolution_gpu_bfyx_f16", 3],
+        "6745402198112522691": ["convolution_gpu_bfyx_f16", 2],
+        "17535374606849768070": ["convolution_gpu_bfyx_f16", 4],
+        "13107074908777587001": ["convolution_gpu_bfyx_f16", 2],
+        "12441704244463007888": ["convolution_gpu_bfyx_f16", 5],
+        "9830487478445609618": ["convolution_gpu_bfyx_f16", 2],
+        "2607686439369816702": ["convolution_gpu_bfyx_f16", 4],
+        "11952384679771234258": ["convolution_gpu_bfyx_f16", 2],
+        "3189741427811982954": ["convolution_gpu_bfyx_f16", 4],
+        "7501115822974560125": ["convolution_gpu_bfyx_f16", 1],
+        "5461533362170148981": ["convolution_gpu_bfyx_f16", 3],
+        "10622846706558433994": ["convolution_gpu_bfyx_f16", 2],
+        "14985143127047962687": ["convolution_gpu_bfyx_f16", 3],
+        "9631129065088682473": ["convolution_gpu_bfyx_f16", 2],
+        "9287906640814562678": ["convolution_gpu_bfyx_f16", 4],
+        "10312813290107807302": ["convolution_gpu_bfyx_f16", 2],
+        "12443171163993705676": ["convolution_gpu_bfyx_f16", 3],
+        "3168498630594159758": ["convolution_gpu_bfyx_f16", 2],
+        "1224004372693674977": ["convolution_gpu_bfyx_f16", 4],
+        "7606282654661282476": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11479153223948565455": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15137118881649312407": ["convolution_gpu_bfyx_gemm_like", 0],
+        "7380413826069265610": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16535858081334660130": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3621905235571219180": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15946837476334836670": ["convolution_gpu_bfyx_gemm_like", 1],
+        "245178301664812042": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11536204967390696799": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13202661087717766278": ["convolution_gpu_bfyx_gemm_like", 0],
+        "17082033214052891239": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10972993149458384549": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13266975232886004160": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5239323177752135143": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13950458285304028472": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1153656272296563651": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15832393447136864275": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449769853632530": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16481491209623188639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16355932574879498582": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9885117015102902622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17948745397003387421": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4202705710324555180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6169584310346033045": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11946156629252758613": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8766639290602892682": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4124732995953832580": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14120940518810838558": ["convolution_gpu_bfyx_os_iyx_osv16", 348],
+        "15477415938111847293": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7899374704077099747": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1738224818674864374": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4675498016268563894": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11678653628752466495": ["convolution_gpu_bfyx_gemm_like", 2],
+        "823094503720427089": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6268238156027633260": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12067387912557140291": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14700484317091478179": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5093753362153705304": ["convolution_gpu_bfyx_gemm_like", 0],
+        "7185731190256343440": ["convolution_gpu_bfyx_gemm_like", 0],
+        "7147929965532955967": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "11272978444176415320": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "3664831747298375482": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "5055315246446375474": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "11248871352103466387": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "14138271699174946769": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "11248138620600796041": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "8218608499996018829": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "492405382055839338": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "13627463949725014842": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "10442692749607465731": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "5257716983547940732": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "4531738938698034182": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "4103900860372048770": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "1763848406836981250": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "13050289716763141821": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "8730407034445893642": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "5246872552943832761": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "8103482664263052993": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "4890599355418453618": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "13440603011986281192": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "7470027005329223304": ["convolution_gpu_bfyx_os_iyx_osv16", 707],
+        "10193635775409684341": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "9727214793392528330": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "10481905734789810461": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "17748868035178556381": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "1557394183568627973": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "1431347831018127681": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "11729412526159852880": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "4899105740108544338": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "8050406060207298909": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "7380902367877842940": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "12400142005537988277": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "7084726217254409262": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "13881126705282937733": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "3268450385258447029": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "3315969006703902437": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "7995430380267318045": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "13355664807789465988": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "1814690350132893834": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "10977798741323641518": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "16290685659520662243": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "14814993085047057124": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "16036211705705298060": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "3314627126439576532": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "5397150622881607923": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "4417629288282219686": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "2593493324630665553": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "2115136697391853510": ["convolution_gpu_bfyx_os_iyx_osv16", 375],
+        "3903972756038760641": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "18309964708787622418": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "10898709444676724488": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "3114718546872961667": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "4116817191288103322": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "5759507923877307269": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "13521523772245595449": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "7025699501997365179": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "17325198932789845471": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "1929216390450946038": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "9359713794448163515": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "1064765432017421754": ["convolution_gpu_bfyx_os_iyx_osv16", 962],
+        "17903113127620271097": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "955947984048164651": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "6871124717336911723": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "17054742656500024833": ["convolution_gpu_bfyx_os_iyx_osv16", 211],
+        "8735118147118298928": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "7689370938722443575": ["convolution_gpu_bfyx_os_iyx_osv16", 588],
+        "7389433284327478008": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "6352588504037946062": ["convolution_gpu_bfyx_os_iyx_osv16", 591],
+        "3420065266906936372": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "5158493429539582334": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "8584667522373731666": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "16628885743804758299": ["convolution_gpu_bfyx_os_iyx_osv16", 587],
+        "9979885527081183609": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "11585377068025763798": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "270198976247871883": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "14806119107242947719": ["convolution_gpu_bfyx_os_iyx_osv16", 962],
+        "6237698548794601324": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "16586342221264661586": ["convolution_gpu_bfyx_os_iyx_osv16", 586],
+        "8378911742901238960": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "8878591357527094058": ["convolution_gpu_bfyx_os_iyx_osv16", 963],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16", 280],
+        "4583484812233029888": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7560832358324865221": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7382044526960590018": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12372261924257291610": ["convolution_gpu_bfyx_os_iyx_osv16", 172],
+        "1547771611689525848": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3134973665622945888": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "18260030211719729324": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "7416143717989012766": ["convolution_gpu_bfyx_os_iyx_osv16", 469],
+        "12028963907131702705": ["convolution_gpu_bfyx_os_iyx_osv16", 255],
+        "2464531851392092325": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "8181308759455478086": ["convolution_gpu_bfyx_gemm_like", 0],
+        "546062289721803579": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4889405384318695802": ["convolution_gpu_bfyx_os_iyx_osv16", 1108],
+        "12841232643395100314": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "14108361259911144680": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2726453304845436156": ["convolution_gpu_bfyx_os_iyx_osv16", 497],
+        "2607416795507802412": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "8653107899138843872": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "8480598154536665021": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17881013712456488163": ["convolution_gpu_bfyx_os_iyx_osv16", 438],
+        "9336215801757107337": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "8174421295799601683": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "1967655354607438665": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "15762542971370422224": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "8183203099539372914": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "4075343423548891274": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "13264497096898621015": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "679058537775669048": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "3375634256357960999": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "2844616672368585285": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "14235558866846276172": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "18066867692765966577": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "9861424412782371874": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "607078314875528651": ["convolution_gpu_bfyx_os_iyx_osv16", 954],
+        "6234885984223387670": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "7223570329858821704": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "17234843749633035510": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "11516168882438876247": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "11312664612825940140": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "14846039494240217143": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "3390376200501119384": ["convolution_gpu_bfyx_os_iyx_osv16", 511],
+        "1113077760071340574": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "4614875083188849196": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "10859023312681572942": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "897600209505963438": ["convolution_gpu_bfyx_os_iyx_osv16", 621],
+        "17391465283540972493": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1514213112647467874": ["convolution_gpu_bfyx_os_iyx_osv16", 843],
+        "17268633106022870055": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8140122945471321201": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "15079423575410353790": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "12844146569641472927": ["convolution_gpu_bfyx_os_iyx_osv16", 618],
+        "13443914015380511668": ["convolution_gpu_bfyx_os_iyx_osv16", 201],
+        "13404457916017756196": ["convolution_gpu_bfyx_os_iyx_osv16", 95],
+        "6402415801415013013": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_gemm", 2],
+        "6201358671959761215": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4829111442270007186": ["convolution_gpu_bfyx_os_iyx_osv16", 1059],
+        "7267651931396380072": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1279682391530947146": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2655979063469551930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14425547983540742516": ["convolution_gpu_bfyx_gemm_like", 2],
+        "981419593633555198": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12324657364444167791": ["convolution_gpu_bfyx_gemm_like", 1],
+        "3246153532847702583": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12272318018055307535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "396815044270978782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15633173680908856082": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16635731992372618666": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10418466892824851134": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "3244777852750357718": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "2443758478383854939": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "13503934436248311972": ["convolution_gpu_bfyx_os_iyx_osv16", 739],
+        "2594310972560076285": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2424349375092546581": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "7104985983444651979": ["convolution_gpu_bfyx_os_iyx_osv16", 1114],
+        "13518747015059826801": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "11675809062974151496": ["convolution_gpu_bfyx_os_iyx_osv16", 747],
+        "4725349695436675084": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "17351243519367619322": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17026338651868178077": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "144434691308306757": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "4114184149613179671": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "2558882920723584206": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "16481414687792927331": ["convolution_gpu_bfyx_os_iyx_osv16", 747],
+        "17756651805686889890": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "2228533392085335649": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "9038567144062573854": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1345293381483212104": ["convolution_gpu_bfyx_os_iyx_osv16", 707],
+        "729683192738752814": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "458997435535883643": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "16955907389221472146": ["convolution_gpu_bfyx_os_iyx_osv16", 750],
+        "17927673764274384911": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "6418222853479731432": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7539191242110313918": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "18014188548165359278": ["convolution_gpu_bfyx_os_iyx_osv16", 202],
+        "16640379332042800496": ["convolution_gpu_bfyx_os_iyx_osv16", 606],
+        "14856197725306980283": ["convolution_gpu_bfyx_os_iyx_osv16", 266],
+        "9279474331309267880": ["convolution_gpu_bfyx_os_iyx_osv16", 606],
+        "5717588912072437191": ["convolution_gpu_bfyx_os_iyx_osv16", 980],
+        "1143426643765799488": ["convolution_gpu_bfyx_os_iyx_osv16", 605],
+        "1049385516019456025": ["convolution_gpu_bfyx_os_iyx_osv16", 966],
+        "10766144770072425534": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6442062011017461761": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6063490496423709036": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "3892512749863226006": ["convolution_gpu_bfyx_os_iyx_osv16", 980],
+        "4970240836537468609": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 2],
+        "2205831610731713610": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "10156165092711350821": ["convolution_gpu_bfyx_f16", 8],
+        "18247115958647686123": ["convolution_gpu_bfyx_f16", 8],
+        "14753870252970038848": ["convolution_gpu_bfyx_f16", 8],
+        "3444463213706539843": ["convolution_gpu_bfyx_f16", 8],
+        "7877528622084379689": ["convolution_gpu_bfyx_f16", 6],
+        "15866954888467744269": ["convolution_gpu_bfyx_f16", 8],
+        "9539152949072945130": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10310196097041679791": ["convolution_gpu_bfyx_f16", 6],
+        "7007411152105941003": ["convolution_gpu_bfyx_f16", 7],
+        "13521396654536153663": ["convolution_gpu_bfyx_f16", 8],
+        "5627950048473526786": ["convolution_gpu_bfyx_f16", 8],
+        "10848396968350154403": ["convolution_gpu_bfyx_f16", 7],
+        "9228463515255376984": ["convolution_gpu_bfyx_f16", 6],
+        "14859232820755758304": ["convolution_gpu_bfyx_f16", 5],
+        "5649325350130366259": ["convolution_gpu_bfyx_f16", 5],
+        "7088894229907878578": ["convolution_gpu_bfyx_f16", 8],
+        "8358159212440128307": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "8339102927585671378": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12830057480692207716": ["convolution_gpu_bfyx_f16", 8],
+        "1501921300016434749": ["convolution_gpu_bfyx_f16", 8],
+        "11944861211852371818": ["convolution_gpu_bfyx_f16", 8],
+        "12836770593118646486": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9954767978721045608": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17539023048390892941": ["convolution_gpu_bfyx_f16", 8],
+        "6178263188060853409": ["convolution_gpu_bfyx_f16", 8],
+        "3431544695919571646": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7609530639305627533": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10080368511636727495": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3002649893623556991": ["convolution_gpu_bfyx_f16", 6],
+        "4775225581264605710": ["convolution_gpu_bfyx_f16", 8],
+        "8081831811345815679": ["convolution_gpu_bfyx_f16", 3],
+        "16652229407955687369": ["convolution_gpu_bfyx_f16", 6],
+        "608583762758537178": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15072630478788704960": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5484675321245094050": ["convolution_gpu_bfyx_f16", 3],
+        "12380502920181680269": ["convolution_gpu_bfyx_f16", 3],
+        "7058078270078786029": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "1767206266125694492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9041022673626233847": ["fully_connected_gpu_bf_io_gemm", 1],
+        "17189550036105947900": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7167054889777381093": ["convolution_gpu_bfyx_os_iyx_osv16", 1101],
+        "3278181836788028231": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6040360226338233118": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6877955452402826287": ["convolution_gpu_bfyx_os_iyx_osv16", 683],
+        "10864271596740164097": ["convolution_gpu_bfyx_os_iyx_osv16", 1101],
+        "14322983802576638073": ["convolution_gpu_bfyx_gemm_like", 1],
+        "678657374277098506": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8434335101659807351": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15928128327390664485": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "10990480508394584613": ["convolution_gpu_bfyx_os_iyx_osv16", 682],
+        "16504425380504793738": ["convolution_gpu_bfyx_os_iyx_osv16", 309],
+        "6480587375918509253": ["convolution_gpu_bfyx_os_iyx_osv16", 1098],
+        "9751235588096143414": ["convolution_gpu_bfyx_gemm_like", 0],
+        "10160678465371702528": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16866525370343398909": ["convolution_gpu_bfyx_os_iyx_osv16", 310],
+        "17188750289444625186": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4363379197393466424": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16403435599807360704": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9367985410929563457": ["convolution_gpu_bfyx_os_iyx_osv16", 314],
+        "10716232679616746794": ["convolution_gpu_bfyx_gemm_like", 0],
+        "622299920975636640": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10798283054583509534": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14179140464588572277": ["convolution_gpu_bfyx_gemm_like", 2],
+        "351304363117543419": ["convolution_gpu_bfyx_os_iyx_osv16", 1026],
+        "3499106702307464480": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3296098567244638489": ["convolution_gpu_bfyx_gemm_like", 2],
+        "259619428712608645": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6379337678256717737": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13593304587712966846": ["convolution_gpu_bfyx_os_iyx_osv16", 1014],
+        "7572277082530361815": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4513178474272034213": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3390430905253038550": ["convolution_gpu_bfyx_gemm_like", 0],
+        "925607706467451476": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5627536079808515754": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16464493408368412759": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like", 1],
+        "307874768879227632": ["convolution_gpu_bfyx_gemm_like", 0],
+        "10256518219432721891": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 1],
+        "6894773592689372049": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7168438768023636584": ["convolution_gpu_bfyx_f16", 3],
+        "10451355428354516953": ["convolution_gpu_bfyx_f16", 8],
+        "14472734042788843355": ["convolution_gpu_bfyx_f16", 6],
+        "10854104081943494369": ["convolution_gpu_bfyx_f16", 5],
+        "93020906459675429": ["convolution_gpu_bfyx_f16", 6],
+        "18398350909015256408": ["convolution_gpu_bfyx_f16", 6],
+        "4154340122141626612": ["convolution_gpu_bfyx_f16", 6],
+        "18200289027422735061": ["convolution_gpu_bfyx_f16", 6],
+        "5565357052205136958": ["convolution_gpu_bfyx_f16", 8],
+        "15946908544184249774": ["convolution_gpu_bfyx_f16", 5],
+        "14037627422329357174": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14408378031985995049": ["convolution_gpu_bfyx_f16", 6],
+        "13211513495214123892": ["convolution_gpu_bfyx_f16", 7],
+        "1496494589494248203": ["convolution_gpu_bfyx_f16", 6],
+        "17087805036943027743": ["convolution_gpu_bfyx_f16", 6],
+        "13247615789377163390": ["convolution_gpu_bfyx_f16", 6],
+        "5098352369763200627": ["convolution_gpu_bfyx_f16", 6],
+        "18412999191021390737": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5274456170971167904": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9275398105290923887": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11340683391412454009": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8100282867486124965": ["convolution_gpu_bfyx_os_iyx_osv16", 368],
+        "6361758198448370863": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16431503579923509596": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10280282710562383672": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9138345765585313427": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11117326838088757686": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "18222598708685323020": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5198859831430501652": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16644329894881952739": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "9367630847798077790": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "4906856539144714227": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14958085423402252319": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9835535945548454398": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "187589970359123667": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "15535862762283784879": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4449914467423404678": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13544117227994563021": ["convolution_gpu_bfyx_f16", 8],
+        "11429584360303226064": ["convolution_gpu_bfyx_f16", 8],
+        "8686735181567651375": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17317609751556064449": ["convolution_gpu_bfyx_f16", 8],
+        "10173110069079110307": ["convolution_gpu_bfyx_f16", 8],
+        "8256262799100439222": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5973810846638268737": ["convolution_gpu_bfyx_f16", 0],
+        "13027143116701737090": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13734349354191887992": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11968192766732437685": ["convolution_gpu_bfyx_f16", 8],
+        "14370161679324432329": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7767833673089272346": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "1449099748387573827": ["convolution_gpu_bfyx_f16", 8],
+        "6082159681542071766": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8428809871325798647": ["convolution_gpu_bfyx_f16", 6],
+        "3625445927016378048": ["convolution_gpu_bfyx_f16", 8],
+        "16278912754899992029": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5981194994138102303": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6745238291782149547": ["convolution_gpu_bfyx_f16", 6],
+        "17414378313275837028": ["convolution_gpu_bfyx_f16", 6],
+        "8179283465276489091": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12643209862476890693": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "4917105501636288966": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4769454452899072222": ["convolution_gpu_bfyx_f16", 8],
+        "4654031164245313134": ["convolution_gpu_bfyx_os_iyx_osv16", 123],
+        "11983416299502321227": ["convolution_gpu_bfyx_f16", 6],
+        "16922282562815050785": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15447939003958794915": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11609566695649803895": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11111698440090067021": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18164005256179423057": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9655994391583576445": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "2583933428959488266": ["convolution_gpu_bfyx_f16", 8],
+        "8751116564564475669": ["convolution_gpu_bfyx_f16", 1],
+        "3115685904789548595": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12312218395355058343": ["convolution_gpu_bfyx_f16", 6],
+        "9471530638390187505": ["convolution_gpu_bfyx_f16", 1],
+        "16096365987863385534": ["convolution_gpu_bfyx_f16", 7],
+        "2442377985676760171": ["convolution_gpu_bfyx_f16", 2],
+        "4507962999752033472": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10533592933355247091": ["convolution_gpu_bfyx_f16", 5],
+        "5992215505897305181": ["convolution_gpu_bfyx_f16", 5],
+        "6910157047797377117": ["convolution_gpu_bfyx_f16", 8],
+        "16790329077281163886": ["convolution_gpu_bfyx_f16", 2],
+        "2016479248821075497": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12942021391785315478": ["convolution_gpu_bfyx_f16", 8],
+        "17426620864606343302": ["convolution_gpu_bfyx_f16", 8],
+        "4176783476555608976": ["convolution_gpu_bfyx_f16", 1],
+        "8200477336317721546": ["convolution_gpu_bfyx_f16", 5],
+        "1065030525774008098": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9977791179268459106": ["convolution_gpu_bfyx_f16", 4],
+        "5935088419802071711": ["convolution_gpu_bfyx_f16", 1],
+        "8280065306835140958": ["convolution_gpu_bfyx_f16", 4],
+        "12412081347941555301": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17879588820429714748": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3530006644830848265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18020799130556340422": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2128170628969875259": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14071897591194037605": ["convolution_gpu_bfyx_gemm_like", 0],
+        "6491408088021883107": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12101353883559344926": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14802907902395027829": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15620484656306921514": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3678710123517436862": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12886608316658106469": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4207095343079392779": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2424987010910105610": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16495170382855519879": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11725304903893058424": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11017733436150895075": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3003359546689323984": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16763572975847971995": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5536115906491867469": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6185456170871099179": ["convolution_gpu_bfyx_gemm_like", 0],
+        "17357987826892677414": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3834578954372570745": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2722616814588455039": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4090898357536550608": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "14720015110356796696": ["convolution_gpu_bfyx_os_iyx_osv16", 267],
+        "10094292694452054942": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "1548720118123199395": ["convolution_gpu_bfyx_os_iyx_osv16", 1069],
+        "14253770195327543775": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9268223757514527059": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6050363141497736349": ["convolution_gpu_bfyx_os_iyx_osv16", 313],
+        "10031286694049433963": ["convolution_gpu_bfyx_os_iyx_osv16", 653],
+        "17225332175169286027": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2233344759271563825": ["convolution_gpu_bfyx_os_iyx_osv16", 294],
+        "14717735741752590462": ["convolution_gpu_bfyx_os_iyx_osv16", 650],
+        "16920316100463320687": ["convolution_gpu_bfyx_os_iyx_osv16", 251],
+        "17044347247573802405": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16931221552471635881": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6087583083491802739": ["convolution_gpu_bfyx_os_iyx_osv16", 535],
+        "10117837465786746844": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "8397584983137442239": ["convolution_gpu_bfyx_os_iyx_osv16", 885],
+        "4004935774665009766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1348850071000990753": ["convolution_gpu_bfyx_os_iyx_osv16", 101],
+        "14125588392587850421": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "1210023349170475921": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13800084646609419232": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "10714895045139754587": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11844075548236537312": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "14384392977058758646": ["convolution_gpu_bfyx_os_iyx_osv16", 423],
+        "9733409855963689982": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16", 799],
+        "15873043203603510240": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6945833083311627386": ["convolution_gpu_bfyx_os_iyx_osv16", 758],
+        "11210966638921575141": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6688805219047923406": ["convolution_gpu_bfyx_os_iyx_osv16", 767],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16142734280696556211": ["convolution_gpu_bfyx_f16", 8],
+        "635140168178230171": ["convolution_gpu_bfyx_f16", 8],
+        "17935287735372634102": ["convolution_gpu_bfyx_f16", 8],
+        "15817877524852645836": ["convolution_gpu_bfyx_f16", 8],
+        "10065955805093424080": ["convolution_gpu_bfyx_f16", 8],
+        "11821370621780817632": ["convolution_gpu_bfyx_f16", 8],
+        "677921946529877110": ["convolution_gpu_bfyx_f16", 8],
+        "5361664571196670427": ["convolution_gpu_bfyx_f16", 8],
+        "2901538337520242272": ["convolution_gpu_bfyx_f16", 6],
+        "5581843211058265455": ["convolution_gpu_bfyx_f16", 8],
+        "217667049553318429": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5337496722551766654": ["convolution_gpu_bfyx_f16", 8],
+        "52740663361396709": ["convolution_gpu_bfyx_f16", 6],
+        "6991371618000668418": ["convolution_gpu_bfyx_f16", 6],
+        "2326385631302475177": ["convolution_gpu_bfyx_f16", 7],
+        "8721996744048476299": ["convolution_gpu_bfyx_f16", 7],
+        "453498137980697662": ["convolution_gpu_bfyx_f16", 6],
+        "15807266772870766609": ["convolution_gpu_bfyx_f16", 7],
+        "6553421087532441250": ["convolution_gpu_bfyx_f16", 7],
+        "12573289076827071790": ["convolution_gpu_bfyx_f16", 6],
+        "8853947103468767323": ["convolution_gpu_bfyx_f16", 6],
+        "6453143304950619430": ["convolution_gpu_bfyx_f16", 7],
+        "1775677589702924323": ["convolution_gpu_bfyx_f16", 7],
+        "16761512340234377511": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2448165393673590598": ["convolution_gpu_bfyx_f16", 7],
+        "11041313275514857930": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8908290078256179450": ["convolution_gpu_bfyx_f16", 7],
+        "6872057470208040983": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3462663905986148169": ["convolution_gpu_bfyx_f16", 7],
+        "9998472323723395768": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9695005447848657794": ["convolution_gpu_bfyx_f16", 7],
+        "864050420562880191": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16884753149447117871": ["convolution_gpu_bfyx_f16", 6],
+        "9413300293443003372": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9584473138046573481": ["convolution_gpu_bfyx_f16", 6],
+        "17226124546002868085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5703305874425530284": ["convolution_gpu_bfyx_f16", 6],
+        "16357533604618943588": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8568882981604412701": ["convolution_gpu_bfyx_f16", 6],
+        "6735600860810305128": ["convolution_gpu_bfyx_f16", 4],
+        "9976345793999587972": ["convolution_gpu_bfyx_f16", 7],
+        "15346869959264738522": ["convolution_gpu_bfyx_f16", 4],
+        "18151038936580799249": ["convolution_gpu_bfyx_f16", 6],
+        "11956105843463290323": ["convolution_gpu_bfyx_f16", 3],
+        "2197043795215802833": ["convolution_gpu_bfyx_f16", 6],
+        "7837223160972083111": ["convolution_gpu_bfyx_f16", 3],
+        "17991319065386721750": ["convolution_gpu_bfyx_f16", 7],
+        "8684426249485914306": ["convolution_gpu_bfyx_f16", 3],
+        "15440765487742350713": ["convolution_gpu_bfyx_f16", 7],
+        "4006988924644151380": ["convolution_gpu_bfyx_f16", 4],
+        "1165323482766442288": ["convolution_gpu_bfyx_f16", 7],
+        "6216179328027568162": ["convolution_gpu_bfyx_f16", 3],
+        "5085232160533811804": ["convolution_gpu_bfyx_f16", 7],
+        "5560503476513957999": ["convolution_gpu_bfyx_f16", 3],
+        "11899886655444339788": ["convolution_gpu_bfyx_f16", 7],
+        "8035035668897300219": ["convolution_gpu_bfyx_f16", 4],
+        "15531280953380757927": ["convolution_gpu_bfyx_f16", 6],
+        "5417611188973238514": ["convolution_gpu_bfyx_f16", 4],
+        "13845305820052266938": ["convolution_gpu_bfyx_f16", 7],
+        "156328565120552800": ["convolution_gpu_bfyx_f16", 4],
+        "15783591814248428053": ["convolution_gpu_bfyx_f16", 7],
+        "5753913342838023682": ["convolution_gpu_bfyx_f16", 3],
+        "3207990305547692029": ["convolution_gpu_bfyx_f16", 7],
+        "18084824492918706199": ["convolution_gpu_bfyx_f16", 3],
+        "8033743776899693075": ["convolution_gpu_bfyx_f16", 6],
+        "243712386211233379": ["convolution_gpu_bfyx_f16", 4],
+        "2965177266959923348": ["convolution_gpu_bfyx_f16", 6],
+        "13237451337340946362": ["convolution_gpu_bfyx_f16", 3],
+        "9188120772772842413": ["convolution_gpu_bfyx_f16", 6],
+        "1249134296559537004": ["convolution_gpu_bfyx_f16", 3],
+        "6776437678382831419": ["convolution_gpu_bfyx_f16", 7],
+        "9140223146321937006": ["convolution_gpu_bfyx_f16", 4],
+        "7509732267784929557": ["convolution_gpu_bfyx_f16", 7],
+        "9869335174149535367": ["convolution_gpu_bfyx_f16", 3],
+        "15410089184813419927": ["convolution_gpu_bfyx_f16", 6],
+        "12736591082694609735": ["convolution_gpu_bfyx_f16", 4],
+        "10111465201148839782": ["convolution_gpu_bfyx_f16", 6],
+        "6977012639021700914": ["convolution_gpu_bfyx_f16", 4],
+        "10452382209692659038": ["convolution_gpu_bfyx_f16", 6],
+        "13099335757796409253": ["convolution_gpu_bfyx_f16", 3],
+        "8355446198162136384": ["convolution_gpu_bfyx_f16", 6],
+        "6457714394569252436": ["convolution_gpu_bfyx_f16", 4],
+        "1870949498151438396": ["convolution_gpu_bfyx_f16", 6],
+        "6325249952936664765": ["convolution_gpu_bfyx_f16", 4],
+        "4283372428897156128": ["convolution_gpu_bfyx_f16", 7],
+        "15284708683366527091": ["convolution_gpu_bfyx_f16", 3],
+        "12367140420770161260": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17302868757320805407": ["convolution_gpu_bfyx_f16", 5],
+        "12812798569408798714": ["convolution_gpu_bfyx_f16", 2],
+        "18027642894783121874": ["convolution_gpu_bfyx_f16", 4],
+        "3766561909462900481": ["convolution_gpu_bfyx_f16", 2],
+        "8126433884587687354": ["convolution_gpu_bfyx_f16", 3],
+        "7431739774665400867": ["convolution_gpu_bfyx_f16", 2],
+        "15213968303698655071": ["convolution_gpu_bfyx_f16", 4],
+        "1895954773577076065": ["convolution_gpu_bfyx_f16", 2],
+        "10820634669412096693": ["convolution_gpu_bfyx_f16", 4],
+        "9105871040526273510": ["convolution_gpu_bfyx_f16", 0],
+        "6253056982440997971": ["convolution_gpu_bfyx_f16", 8],
+        "14271936409538632354": ["convolution_gpu_bfyx_f16", 2],
+        "7830723669305086809": ["convolution_gpu_bfyx_f16", 4],
+        "16905205856195133489": ["convolution_gpu_bfyx_f16", 2],
+        "17744780595721014433": ["convolution_gpu_bfyx_f16", 4],
+        "1185658428449577287": ["convolution_gpu_bfyx_f16", 1],
+        "4322844512730914538": ["convolution_gpu_bfyx_f16", 7],
+        "8559998096869077061": ["convolution_gpu_bfyx_f16", 2],
+        "12935328860605637188": ["convolution_gpu_bfyx_f16", 3],
+        "17826095303533956022": ["convolution_gpu_bfyx_f16", 1],
+        "6059064882469521870": ["convolution_gpu_bfyx_f16", 3],
+        "17987726224817029150": ["convolution_gpu_bfyx_f16", 2],
+        "1752617074755449766": ["convolution_gpu_bfyx_f16", 8],
+        "1147527648969475665": ["convolution_gpu_bfyx_f16", 0],
+        "336079374726362009": ["convolution_gpu_bfyx_f16", 3],
+        "3956037701575034246": ["convolution_gpu_bfyx_f16", 2],
+        "9177200416044551211": ["convolution_gpu_bfyx_f16", 4],
+        "3580337905402094261": ["convolution_gpu_bfyx_f16", 1],
+        "8657404564308325878": ["convolution_gpu_bfyx_f16", 7],
+        "9660551017019324634": ["convolution_gpu_bfyx_f16", 1],
+        "2283387892607580344": ["convolution_gpu_bfyx_f16", 3],
+        "9757276965383246450": ["convolution_gpu_bfyx_f16", 2],
+        "5662627047941545281": ["convolution_gpu_bfyx_f16", 4],
+        "2017817372328795772": ["convolution_gpu_bfyx_os_iyx_osv16", 488],
+        "4652102901251847499": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "4834446692898125871": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "8552605555461651066": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4461989328775275994": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "4821707856043228388": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10837496380266058422": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "867673900353092030": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16839741351990811959": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "9400507072890048966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9193880745263317167": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "13391871893495885313": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10447947790216991304": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "10371076921125171059": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10315090439844489700": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "671453551040072499": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "7957019749780783255": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "14034525799882831106": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "3916912615549949771": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5115007207028125638": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "3702373232430988630": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7913076120244203725": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "13616909429370698140": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17778091287904736965": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "16866405531619284081": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10645625090439446714": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "3118240332710616352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7450417963648518926": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "18271341717679165017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1520529227443340435": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "6547588888976666790": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2920840796593281126": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "3243287355593359731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15289152041466330689": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "11745487821055710420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10330180429524641331": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "2413743706626149595": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17228810554159747400": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "2891977832675907820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5140042030231193807": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "12362834244136780846": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "12975331316527510995": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "11296280342006832013": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "2729382724566640622": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "1973144337799131575": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "11237620198863831646": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "13522230668952002294": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "3426085674061936062": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16", 700],
+        "1878253869657286717": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "15537416934472628620": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "1614676161640914325": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "7887122837178625925": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16", 700],
+        "13932612600851474669": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "108336648992892440": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "10684345634354913297": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "10916615896929712681": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "6401617291202138329": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "14418145752469985573": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "10000629948062903268": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "17913158947435785150": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "3683538222536942924": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "3497309410275654168": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "5849203144808104114": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "6079947803671938062": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "10023464714622430341": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "10125169683435871224": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "16183189414217717282": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "4778769961736466493": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "14240807033488944743": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "9611215430798915107": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "13082046205786468713": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "12207197008210652563": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "10581403540319621428": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "8583043839495629208": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "14447820502121172060": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "16001364310945493562": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "8482359546526573989": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "1012101590389722479": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "14078917033502693044": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "3484370445244910200": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16", 503],
+        "16532386511585070092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910582540370962997": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12335148041391647118": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "10689880083512104726": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "8870164706606458004": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "9269498023794081940": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "6779832349039897240": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "13942354789498444722": ["convolution_gpu_bfyx_os_iyx_osv16", 91],
+        "14294764660016835141": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "12323510278692809329": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "5728070995112243570": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "5381496395266530071": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9712640406795417230": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "15036737419347383878": ["convolution_gpu_bfyx_os_iyx_osv16", 856],
+        "11552594222313787816": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "9399255910184037480": ["convolution_gpu_bfyx_os_iyx_osv16", 875],
+        "10594581016504135920": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "15640487942881889055": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "14165417928501578590": ["convolution_gpu_bfyx_os_iyx_osv16", 955],
+        "12251989236991754721": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "6675363512560434713": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "9831713940431605743": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "6531349504807709133": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "2726501303929773572": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "10439704858943788014": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "18137994263450376706": ["convolution_gpu_bfyx_os_iyx_osv16", 468],
+        "5711991739289045727": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "15255831401757117660": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "3906658058160172747": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "15823433297099049221": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "7829483638597533960": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "14092273913846393837": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "3746578485711843646": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "12228183555926126959": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "8776893332387904786": ["convolution_gpu_bfyx_os_iyx_osv16", 91],
+        "16672299044236704672": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "13309889945947393850": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "15966815420067673043": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "7415938485228396256": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "9655590024687998403": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "14798289196964890724": ["convolution_gpu_bfyx_os_iyx_osv16", 504],
+        "9794684437872784678": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "16729204245488754836": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "15185983488152870534": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "13821372148587948765": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "4727004015814244856": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "1738348894912205653": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "559491455289877068": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "17312172687490475177": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "3470176432841342662": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "8950283515337670839": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "3995072673238444396": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "1238913228370790536": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "928677976151553489": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "4059887681292863495": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "18312913026696855515": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "1323873987880062206": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "7947635298491683844": ["convolution_gpu_bfyx_os_iyx_osv16", 713],
+        "3828289925836476678": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "10112041311060264798": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "7966725359592006848": ["convolution_gpu_bfyx_os_iyx_osv16", 338],
+        "2213697863012348994": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "5200128826708487987": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "4910238486908592807": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "5170073622279980223": ["convolution_gpu_bfyx_gemm_like", 0],
+        "7110283028091835342": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16035239784731081694": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "8190708817382075098": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "14088072670684726938": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "4594156436010043898": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "11599404585487705575": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "12238796233133147488": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "16062641979970268785": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "17970835612618431265": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "2793976170555467399": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "5268998395189523109": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "10247076603819003292": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "10411646581372174184": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "3783590807023839590": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "13040613656895011417": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "18191480673111859449": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "3168817659922190247": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "18315877695535348266": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "12547634427503359071": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "16329007163840646462": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "10029877845127663589": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "2314415797696124986": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "16980380685273501504": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "3178865432099367094": ["convolution_gpu_bfyx_os_iyx_osv16", 702],
+        "14025615946937229331": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "9213611800089847066": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "16929122365386190391": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "2135878993442720196": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "9676824536524126662": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "920276615573431782": ["convolution_gpu_bfyx_os_iyx_osv16", 954],
+        "14160730014298968824": ["convolution_gpu_bfyx_os_iyx_osv16", 228],
+        "17736530310730065811": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "2980714886349866400": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "16634588113528268855": ["convolution_gpu_bfyx_os_iyx_osv16", 603],
+        "11974061312537998708": ["convolution_gpu_bfyx_os_iyx_osv16", 588],
+        "16035580169248458433": ["convolution_gpu_bfyx_os_iyx_osv16", 965],
+        "9866780121729912726": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "9774829335571618473": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "12220806137793480020": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "18351615003377381150": ["convolution_gpu_bfyx_os_iyx_osv16", 227],
+        "5523604552813225273": ["convolution_gpu_bfyx_os_iyx_osv16", 606],
+        "7679309022130741323": ["fully_connected_gpu_fb_io_b8_f8_vload", 1],
+        "11768867992241402166": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14533309283952277877": ["convolution_gpu_bfyx_f16", 8],
+        "768036983879466491": ["convolution_gpu_bfyx_f16", 8],
+        "8640397399642282076": ["convolution_gpu_bfyx_f16", 8],
+        "4447241148638809019": ["convolution_gpu_bfyx_f16", 8],
+        "783937514167064185": ["convolution_gpu_bfyx_f16", 8],
+        "715515480709336477": ["convolution_gpu_bfyx_f16", 8],
+        "2178759129405158202": ["convolution_gpu_bfyx_f16", 8],
+        "10393876005917023039": ["convolution_gpu_bfyx_f16", 8],
+        "16721217843319344291": ["convolution_gpu_bfyx_f16", 7],
+        "3716831680611072075": ["convolution_gpu_bfyx_f16", 8],
+        "13531838169301698254": ["convolution_gpu_bfyx_f16", 8],
+        "8547517329636103839": ["convolution_gpu_bfyx_f16", 6],
+        "15703952951943117556": ["convolution_gpu_bfyx_f16", 6],
+        "16356886650795442216": ["convolution_gpu_bfyx_f16", 8],
+        "14825827905386756779": ["convolution_gpu_bfyx_f16", 8],
+        "9544485319724935966": ["convolution_gpu_bfyx_f16", 8],
+        "2346149331338767927": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11738950320568275422": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2766653880470962928": ["convolution_gpu_bfyx_f16", 8],
+        "5705293848380093325": ["convolution_gpu_bfyx_f16", 8],
+        "9377263655618691430": ["convolution_gpu_bfyx_f16", 6],
+        "14381639119653544814": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17861352776224211088": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "872663818200631537": ["convolution_gpu_bfyx_f16", 8],
+        "11873212817348865689": ["convolution_gpu_bfyx_f16", 8],
+        "14262888390138824866": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "13283182191889805989": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1504169490198907647": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "654353155629688683": ["convolution_gpu_bfyx_f16", 7],
+        "11008679778496876030": ["convolution_gpu_bfyx_f16", 8],
+        "1522034340423464923": ["convolution_gpu_bfyx_f16", 7],
+        "5146410220677925293": ["convolution_gpu_bfyx_f16", 6],
+        "9348781731377398142": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12743110088150270172": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8312449551479081574": ["convolution_gpu_bfyx_f16", 3],
+        "2169977778407200357": ["convolution_gpu_bfyx_f16", 6],
+        "15298209891625212621": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "8194917907003818144": ["convolution_gpu_bfyx_f16", 6],
+        "4678382896561552759": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "4875326050566085344": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8495917588316038129": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "3307746023430475723": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "2339136478286699386": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6985227720644674767": ["convolution_gpu_bfyx_os_iyx_osv16", 1054],
+        "10116252461182932181": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "8019070081772241253": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "17183204059658365454": ["convolution_gpu_bfyx_os_iyx_osv16", 1046],
+        "11146584802778488627": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "1412240409385229821": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "6441882850957156873": ["convolution_gpu_bfyx_os_iyx_osv16", 909],
+        "5574429781462425938": ["convolution_gpu_bfyx_os_iyx_osv16", 905],
+        "13275639102420638937": ["convolution_gpu_bfyx_os_iyx_osv16", 1052],
+        "11239480014318849466": ["convolution_gpu_bfyx_os_iyx_osv16", 300],
+        "11364573384763627804": ["convolution_gpu_bfyx_os_iyx_osv16", 143],
+        "17581918823452177145": ["convolution_gpu_bfyx_os_iyx_osv16", 522],
+        "16582255329717812318": ["convolution_gpu_bfyx_os_iyx_osv16", 910],
+        "1761707820353047792": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 678],
+        "1462052684491569244": ["convolution_gpu_bfyx_os_iyx_osv16", 1050],
+        "1043209871161635620": ["convolution_gpu_bfyx_os_iyx_osv16", 179],
+        "6580870850026637085": ["convolution_gpu_bfyx_os_iyx_osv16", 575],
+        "14590914974400169174": ["convolution_gpu_bfyx_os_iyx_osv16", 934],
+        "10336523847607095564": ["convolution_gpu_bfyx_os_iyx_osv16", 1010],
+        "9401313698306553642": ["convolution_gpu_bfyx_os_iyx_osv16", 259],
+        "876760078268365513": ["convolution_gpu_bfyx_os_iyx_osv16", 549],
+        "12433808969257923503": ["convolution_gpu_bfyx_os_iyx_osv16", 1014],
+        "7708805027140393275": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1015],
+        "14155482762134954253": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14210639698665432825": ["convolution_gpu_bfyx_os_iyx_osv16", 634],
+        "13812347350440044025": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5671680386726398590": ["convolution_gpu_bfyx_os_iyx_osv16", 928],
+        "17176795418817714855": ["convolution_gpu_bfyx_os_iyx_osv16", 599],
+        "6082190021542211729": ["convolution_gpu_bfyx_os_iyx_osv16", 584],
+        "16801661478447828958": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4283736930611947868": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11956830639200772990": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "17165799969769224595": ["convolution_gpu_bfyx_os_iyx_osv16", 975],
+        "15163942469957112086": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 230],
+        "2528583305942269942": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 228],
+        "3000687556020828936": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8798690812637045587": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "2562131945197556573": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "10704041599214066504": ["convolution_gpu_bfyx_f16", 6],
+        "10613229998051250501": ["convolution_gpu_bfyx_f16", 8],
+        "11371787826925681911": ["convolution_gpu_bfyx_f16", 6],
+        "1813150318517555729": ["convolution_gpu_bfyx_f16", 6],
+        "2771555413518577061": ["convolution_gpu_bfyx_f16", 6],
+        "5185490410687016716": ["convolution_gpu_bfyx_f16", 8],
+        "7950736292930841432": ["convolution_gpu_bfyx_f16", 8],
+        "8261743217235812905": ["convolution_gpu_bfyx_f16", 7],
+        "8477837540026813338": ["convolution_gpu_bfyx_f16", 3],
+        "7870792155742596714": ["convolution_gpu_bfyx_f16", 7],
+        "877301692476873394": ["convolution_gpu_bfyx_f16", 6],
+        "13056385937425838233": ["convolution_gpu_bfyx_f16", 8],
+        "8845096601815863972": ["convolution_gpu_bfyx_f16", 6],
+        "14676936757685089287": ["convolution_gpu_bfyx_f16", 8],
+        "5137720027289968571": ["convolution_gpu_bfyx_f16", 8],
+        "16357238101987779826": ["convolution_gpu_bfyx_f16", 6],
+        "9042736284060217631": ["convolution_gpu_bfyx_f16", 6],
+        "5097668076583685041": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7344844015217894164": ["convolution_gpu_bfyx_os_iyx_osv16", 976],
+        "9936474869022587983": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "329553512467461593": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "12503215232120557325": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "10108549779145923995": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "3572139589278586696": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "3447560436810347348": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "17920936460438593923": ["convolution_gpu_bfyx_os_iyx_osv16", 365],
+        "6507588273482535558": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "16538659046159384556": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "8700824938092632308": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "962921421119622695": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "16377863140408011234": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "14577872961462943907": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "12984065900044625423": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "13425472724337224370": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "3646785474374491807": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "451031796891282479": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14776080430354474182": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10650851203140083677": ["convolution_gpu_bfyx_f16", 6],
+        "13826353934358977360": ["convolution_gpu_bfyx_f16", 6],
+        "12571951090832825431": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2064844993553532417": ["convolution_gpu_bfyx_f16", 8],
+        "5607751185548741691": ["convolution_gpu_bfyx_f16", 6],
+        "10130071159958943234": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15441641613614130057": ["convolution_gpu_bfyx_f16", 0],
+        "13351202293737336350": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12978085517162975656": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13826133123491407221": ["convolution_gpu_bfyx_f16", 6],
+        "6477134764532572421": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5950083318916516698": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "8077639030020668931": ["convolution_gpu_bfyx_f16", 8],
+        "18170681684824531566": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9550268348477212203": ["convolution_gpu_bfyx_f16", 6],
+        "1170208296475819204": ["convolution_gpu_bfyx_f16", 8],
+        "12632403871163182801": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6716171661019684887": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "13663559184653002083": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5294206775176168740": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17193621366814504699": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2172317081157982849": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7498346834966503502": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2178282174219673438": ["convolution_gpu_bfyx_f16", 8],
+        "12934129950310005014": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "6715904165942173567": ["convolution_gpu_bfyx_f16", 6],
+        "17561727515287904553": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14657808045011126231": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1079529772402386147": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "3159328589365824705": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "12924965724590289961": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4598814405199376509": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11037290609162408542": ["convolution_gpu_bfyx_f16", 8],
+        "6844159929008926609": ["convolution_gpu_bfyx_f16", 2],
+        "17741687009005052531": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16599775094194414107": ["convolution_gpu_bfyx_f16", 6],
+        "10694993084063617045": ["convolution_gpu_bfyx_f16", 5],
+        "4718956221677027574": ["convolution_gpu_bfyx_f16", 7],
+        "15331321603919694243": ["convolution_gpu_bfyx_f16", 2],
+        "13587638750417688948": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11469485282469324059": ["convolution_gpu_bfyx_f16", 5],
+        "5339800634659856657": ["convolution_gpu_bfyx_f16", 4],
+        "4727811148140659977": ["convolution_gpu_bfyx_f16", 5],
+        "15395364662302266074": ["convolution_gpu_bfyx_f16", 1],
+        "4999952629984965133": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16910776375872786530": ["convolution_gpu_bfyx_f16", 5],
+        "6095294189188950942": ["convolution_gpu_bfyx_f16", 8],
+        "143401636424563432": ["convolution_gpu_bfyx_f16", 4],
+        "3388087248914710690": ["convolution_gpu_bfyx_f16", 5],
+        "5649502493014285446": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3943920952371309182": ["convolution_gpu_bfyx_f16", 6],
+        "6114751625725194851": ["convolution_gpu_bfyx_f16", 7],
+        "16010382257223767034": ["convolution_gpu_bfyx_f16", 8],
+        "5698552081847879925": ["convolution_gpu_bfyx_os_iyx_osv16", 135],
+        "17614136111305300320": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12113136482276698669": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6352305026184372614": ["convolution_gpu_bfyx_os_iyx_osv16", 926],
+        "484263022216907895": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15906176948061029085": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5968073932687849835": ["convolution_gpu_bfyx_os_iyx_osv16", 894],
+        "8916676653918372522": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7866248622207674314": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "13510743233538478822": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12769371751389866134": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "7895578642859691201": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "8213223779341828643": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10304648303720289553": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1117],
+        "2144096243788492819": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1707577858439999628": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13324143843657865667": ["convolution_gpu_bfyx_gemm_like", 1],
+        "435183760895426372": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13267820128796866947": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6320352452697656858": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 634],
+        "8567313734816639699": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "10283525193630938826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13245093829748960553": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "8058948760644705332": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1010],
+        "5829806039123687296": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5086239515295780908": ["convolution_gpu_bfyx_os_iyx_osv16", 627],
+        "10826317792245512978": ["convolution_gpu_bfyx_os_iyx_osv16", 627],
+        "12380585322003033675": ["convolution_gpu_bfyx_os_iyx_osv16", 249],
+        "1234262998883967027": ["convolution_gpu_bfyx_os_iyx_osv16", 627],
+        "6088589595999348051": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16169090145815180398": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 292],
+        "8782987166849446623": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "15787004835769841975": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "7485645273882225925": ["convolution_gpu_bfyx_os_iyx_osv16", 249],
+        "5909064634412608410": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "15287389544400987667": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "10824769165318760081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1670508622389791801": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9334136333163455343": ["convolution_gpu_bfyx_os_iyx_osv16", 535],
+        "17442453768126002136": ["convolution_gpu_bfyx_os_iyx_osv16", 152],
+        "14697908987856243075": ["convolution_gpu_bfyx_os_iyx_osv16", 510],
+        "2172524104711598902": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9414914071574936805": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "16972925761537826501": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "11086785048659266661": ["convolution_gpu_bfyx_os_iyx_osv16", 513],
+        "14196846932594549700": ["convolution_gpu_bfyx_os_iyx_osv16", 503],
+        "134710296037856555": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1813506780036246168": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "2314901580407937822": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "13067158389321096682": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "12014287997453025378": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "14433322240937916500": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11355713865624830646": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "17678953603244889049": ["convolution_gpu_bfyx_os_iyx_osv16", 1104],
+        "13762162740325518554": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "4231780976206765157": ["convolution_gpu_bfyx_f16", 8],
+        "8980221943093241716": ["convolution_gpu_bfyx_f16", 8],
+        "18299989890919839201": ["convolution_gpu_bfyx_f16", 8],
+        "156729467441671198": ["convolution_gpu_bfyx_f16", 8],
+        "8310501987356139481": ["convolution_gpu_bfyx_f16", 8],
+        "4272791707442002253": ["convolution_gpu_bfyx_f16", 8],
+        "7255733410403722204": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7070634135719152613": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1381422445726512646": ["convolution_gpu_bfyx_f16", 7],
+        "4564293152392730233": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "16311094287093896863": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3024110896512636406": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7855041023714669849": ["convolution_gpu_bfyx_f16", 7],
+        "8272666113866483677": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "17977137783789902281": ["convolution_gpu_bfyx_f16", 7],
+        "6481569584577240342": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7160644076405909358": ["convolution_gpu_bfyx_f16", 6],
+        "14249046182322576397": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6834385343004370326": ["convolution_gpu_bfyx_f16", 7],
+        "1008024514530186020": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16005429531001185688": ["convolution_gpu_bfyx_f16", 7],
+        "6495626185125462816": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "14409201680260089952": ["convolution_gpu_bfyx_f16", 6],
+        "12890358975554522309": ["convolution_gpu_bfyx_f16", 6],
+        "12550016614047236999": ["convolution_gpu_bfyx_f16", 6],
+        "576077328413845094": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "398256554051845097": ["convolution_gpu_bfyx_f16", 7],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "14105257348290411617": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "101401523793806394": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "9387863296634578229": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 2],
+        "8060636683643831780": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "14780918154047817345": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "13203321403993189063": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "5740392527807908759": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 2],
+        "7254442201128191810": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "11639967071503382387": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10257479741650070352": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 2],
+        "14208727117054072097": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16554979520080541400": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 1],
+        "2079123661145831388": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11983813489663072447": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "4863031235756403736": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 2],
+        "8829808521587554627": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "63694347927756574": ["convolution_gpu_bfyx_os_iyx_osv16", 704],
+        "4924910826040954693": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "5898560978289661425": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 2],
+        "5843926003737973832": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "4545839363852420458": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 1],
+        "3407897074495857436": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "6484909498459460088": ["convolution_gpu_bfyx_os_iyx_osv16", 970],
+        "15079214124472328635": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 1],
+        "16096022440460662756": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "14116583891373690425": ["convolution_gpu_bfyx_os_iyx_osv16", 961],
+        "18221649785651247678": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "7488737401156237830": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 2],
+        "1032722938997142573": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "17972250874305007417": ["fused_conv_eltwise_gpu_bfyx_1x1_opt", 1],
+        "6450532136308941035": ["fully_connected_gpu_fb_io_b8_f8_vload", 1],
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5981205170754513046": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3025829117046314851": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "14600403613863348033": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9466249274834206569": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17167052658616496904": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "758159154291645307": ["fully_connected_gpu_bf_io_gemm", 2],
+        "14555191501995137081": ["fully_connected_gpu_bf_io_gemm", 1],
+        "3057483147285040704": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4460662214292495759": ["convolution_gpu_bfyx_f16", 8],
+        "17632851940131114495": ["convolution_gpu_bfyx_f16", 8],
+        "7945867532035693686": ["convolution_gpu_bfyx_f16", 8],
+        "10798155343477437060": ["convolution_gpu_bfyx_f16", 8],
+        "14191150640021059705": ["convolution_gpu_bfyx_f16", 8],
+        "14593228968660512118": ["convolution_gpu_bfyx_f16", 8],
+        "5573515532668433114": ["convolution_gpu_bfyx_f16", 8],
+        "11642934660277782628": ["convolution_gpu_bfyx_f16", 8],
+        "4825553592910970555": ["convolution_gpu_bfyx_f16", 8],
+        "17245530055973419690": ["convolution_gpu_bfyx_f16", 8],
+        "14644519840111409049": ["convolution_gpu_bfyx_f16", 8],
+        "15093112872571669071": ["convolution_gpu_bfyx_f16", 8],
+        "6172925429706792586": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16888914688498810916": ["convolution_gpu_bfyx_f16", 8],
+        "7094210524110336636": ["convolution_gpu_bfyx_f16", 8],
+        "1102719880087191972": ["convolution_gpu_bfyx_f16", 8],
+        "17635368969132641763": ["convolution_gpu_bfyx_f16", 8],
+        "6444855710931944326": ["convolution_gpu_bfyx_f16", 8],
+        "3685203889040861337": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8983142397488339162": ["convolution_gpu_bfyx_f16", 8],
+        "2942771097961823034": ["convolution_gpu_bfyx_f16", 8],
+        "16912834065670733738": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2419223013209835757": ["convolution_gpu_bfyx_os_iyx_osv16", 1040],
+        "11179881900554989521": ["convolution_gpu_bfyx_f16", 8],
+        "16511126264743737451": ["convolution_gpu_bfyx_f16", 6],
+        "10100289629103173958": ["convolution_gpu_bfyx_os_iyx_osv16", 668],
+        "9258363108725341315": ["convolution_gpu_bfyx_f16", 7],
+        "13334138861096017540": ["convolution_gpu_bfyx_f16", 8],
+        "6513616579637283618": ["convolution_gpu_bfyx_f16", 7],
+        "881483878813237044": ["convolution_gpu_bfyx_f16", 6],
+        "9696420455787045679": ["convolution_gpu_bfyx_f16", 8],
+        "7480696988694183789": ["convolution_gpu_bfyx_f16", 8],
+        "9560848299493464065": ["convolution_gpu_bfyx_f16", 8],
+        "4670244085889208769": ["convolution_gpu_bfyx_f16", 8],
+        "11349612635173553035": ["convolution_gpu_bfyx_f16", 8],
+        "6259794269666057674": ["convolution_gpu_bfyx_f16", 8],
+        "5786551708845072629": ["convolution_gpu_bfyx_f16", 8],
+        "16619951395310930207": ["convolution_gpu_bfyx_f16", 8],
+        "3173655881192997611": ["convolution_gpu_bfyx_f16", 8],
+        "6211510258514141464": ["convolution_gpu_bfyx_f16", 3],
+        "14941982212174570311": ["convolution_gpu_bfyx_f16", 8],
+        "11364624703533653571": ["convolution_gpu_bfyx_f16", 8],
+        "338313831905889757": ["convolution_gpu_bfyx_f16", 4],
+        "13154424438571292174": ["convolution_gpu_bfyx_f16", 8],
+        "14845639704528269654": ["convolution_gpu_bfyx_f16", 8],
+        "12200202041476611175": ["convolution_gpu_bfyx_f16", 4],
+        "14166499608250271507": ["convolution_gpu_bfyx_f16", 8],
+        "13694208494559240243": ["convolution_gpu_bfyx_f16", 8],
+        "14476260143987433871": ["convolution_gpu_bfyx_f16", 4],
+        "6145395374917324923": ["convolution_gpu_bfyx_f16", 4],
+        "2094686947151722271": ["convolution_gpu_bfyx_os_iyx_osv16", 75],
+        "11589833946098195323": ["convolution_gpu_bfyx_os_iyx_osv16", 39],
+        "11775116692122787310": ["convolution_gpu_bfyx_os_iyx_osv16", 391],
+        "570493430126610249": ["fully_connected_gpu_bf_io_gemm", 2],
+        "17743072683947532579": ["fully_connected_gpu_bf_io_gemm", 1],
+        "18382443157447369363": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5689213766720451736": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11153755804932874939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13074790088623248655": ["convolution_gpu_bfyx_f16", 6],
+        "14552950763379636885": ["convolution_gpu_bfyx_f16", 7],
+        "1094600023872583173": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16827633753490728058": ["convolution_gpu_bfyx_f16", 8],
+        "6130516122331504865": ["convolution_gpu_bfyx_f16", 8],
+        "7670629548971090825": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5029322578170351026": ["convolution_gpu_bfyx_f16", 8],
+        "11682717086936489649": ["convolution_gpu_bfyx_f16", 8],
+        "9372644596618467274": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14183733053550126939": ["convolution_gpu_bfyx_f16", 7],
+        "5642981720905097704": ["convolution_gpu_bfyx_f16", 8],
+        "3924580903671169312": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17700105511171786728": ["convolution_gpu_bfyx_f16", 6],
+        "14998223809620050073": ["convolution_gpu_bfyx_f16", 8],
+        "419201770890811765": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7063350782589593425": ["convolution_gpu_bfyx_f16", 6],
+        "10687898799916833174": ["convolution_gpu_bfyx_f16", 6],
+        "5341504900604548311": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8082311845702095517": ["convolution_gpu_bfyx_f16", 7],
+        "5769891345892528049": ["convolution_gpu_bfyx_f16", 6],
+        "5034821474694053994": ["convolution_gpu_bfyx_f16", 8],
+        "2717532297792072749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "368628635269408785": ["convolution_gpu_bfyx_f16", 7],
+        "10159612784755046280": ["convolution_gpu_bfyx_f16", 8],
+        "15051374440521170869": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17031332595095892437": ["convolution_gpu_bfyx_f16", 4],
+        "6938198718430530942": ["convolution_gpu_bfyx_f16", 8],
+        "2358029178760210430": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16131007134197705525": ["convolution_gpu_bfyx_f16", 4],
+        "6612035874395100788": ["convolution_gpu_bfyx_f16", 6],
+        "15022677981959490269": ["convolution_gpu_bfyx_f16", 7],
+        "11900509609879947992": ["convolution_gpu_bfyx_f16", 5],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3968445701280656378": ["convolution_gpu_bfyx_f16", 8],
+        "7463742252314920613": ["convolution_gpu_bfyx_f16", 8],
+        "17747065651432157057": ["convolution_gpu_bfyx_f16", 8],
+        "2951437417233062866": ["convolution_gpu_bfyx_f16", 8],
+        "4695031178096669813": ["convolution_gpu_bfyx_f16", 8],
+        "13200791011072363046": ["convolution_gpu_bfyx_f16", 8],
+        "7702483443698911725": ["convolution_gpu_bfyx_f16", 8],
+        "3225276687886679210": ["convolution_gpu_bfyx_f16", 8],
+        "8406061878298060171": ["convolution_gpu_bfyx_f16", 8],
+        "11861948300376902542": ["convolution_gpu_bfyx_f16", 8],
+        "18047654118875021903": ["convolution_gpu_bfyx_f16", 8],
+        "3876838946012690078": ["convolution_gpu_bfyx_f16", 7],
+        "11532811324432477051": ["convolution_gpu_bfyx_f16", 7],
+        "16482627014547828135": ["convolution_gpu_bfyx_f16", 6],
+        "4565106422618308814": ["convolution_gpu_bfyx_f16", 7],
+        "16991433003318725315": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "16286683168753184722": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7074368169815304627": ["convolution_gpu_bfyx_os_iyx_osv16", 118],
+        "10702490327714920783": ["convolution_gpu_bfyx_gemm_like", 2],
+        "964168479107166949": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6252510766878541979": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1012052068628903875": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15499166167392043521": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14327383763442344255": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18371627210590255356": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13185859115957551268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15052792752810689842": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17918808521142517830": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1644157325342654261": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12198018126650448419": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9714393675511550323": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4928366179227934688": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15361605271135812199": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10267714663732575502": ["convolution_gpu_bfyx_1x1", 1],
+        "9990965405769569785": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10493403039286551634": ["convolution_gpu_bfyx_1x1", 1],
+        "18324310183763016728": ["convolution_gpu_bfyx_os_iyx_osv16", 376],
+        "6002923098500991259": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3429780644945779272": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6005067060818453503": ["convolution_gpu_bfyx_f16", 8],
+        "3676547304316346974": ["convolution_gpu_bfyx_f16", 8],
+        "8412675332215210248": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14477382651380138146": ["convolution_gpu_bfyx_f16", 8],
+        "15899888589766240554": ["convolution_gpu_bfyx_f16", 8],
+        "4529376177404929890": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "7210896246223636810": ["convolution_gpu_bfyx_f16", 8],
+        "2775471071662652034": ["convolution_gpu_bfyx_f16", 8],
+        "17132456912135683375": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15563691660506818555": ["convolution_gpu_bfyx_f16", 6],
+        "9997402509928965207": ["convolution_gpu_bfyx_f16", 8],
+        "7793754164423097155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4639865771698877244": ["convolution_gpu_bfyx_f16", 6],
+        "1766192115208251594": ["convolution_gpu_bfyx_f16", 8],
+        "2015853414727933068": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10306264176864957825": ["convolution_gpu_bfyx_f16", 7],
+        "4871044181497936479": ["convolution_gpu_bfyx_f16", 6],
+        "8396548857016837452": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12714653556587252941": ["convolution_gpu_bfyx_f16", 6],
+        "1967886437456544865": ["convolution_gpu_bfyx_f16", 6],
+        "11350907923254547441": ["convolution_gpu_bfyx_f16", 7],
+        "12282274184666824734": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16866941685634953173": ["convolution_gpu_bfyx_f16", 6],
+        "6312283149621718315": ["convolution_gpu_bfyx_f16", 7],
+        "9795822066940245604": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7256380059517365529": ["convolution_gpu_bfyx_f16", 3],
+        "11966909558503849515": ["convolution_gpu_bfyx_f16", 8],
+        "11277466712159791917": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4992371260504969141": ["convolution_gpu_bfyx_f16", 6],
+        "15043181455492553716": ["convolution_gpu_bfyx_f16", 7],
+        "8399107263382557054": ["convolution_gpu_bfyx_f16", 8],
+        "6350452055467384023": ["convolution_gpu_bfyx_f16", 6],
+        "14026570177552137240": ["convolution_gpu_bfyx_os_iyx_osv16", 856],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like", 2],
+        "641417817126876622": ["convolution_gpu_bfyx_os_iyx_osv16", 104],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16", 132],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16", 717],
+        "4917807560042671575": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "44341776758472069": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "3584869801682702110": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "13032463401326344281": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "12074020528214820344": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "10792368710075698135": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "14773903272136532468": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "4459329337183571568": ["convolution_gpu_bfyx_os_iyx_osv16", 906],
+        "17247158622529817069": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "15248304664655540462": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "8737603244374483727": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "6375630142791083064": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16951442326148701883": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "8824140014793073324": ["convolution_gpu_bfyx_os_iyx_osv16", 616],
+        "6420666457275061685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18191060893922845906": ["convolution_gpu_bfyx_os_iyx_osv16", 248],
+        "4914314319075651246": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2995522243104361971": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12727854191946007642": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 330],
+        "3260693384502698965": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8599674766060889778": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8021852643758937492": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 329],
+        "2492924011838985637": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1085],
+        "4309855944835724499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14741878965259218163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180612484034524170": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13300287078635373813": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13378751364754764186": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6449257695177020930": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "17627392788011440461": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "13831493475156855535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16483429728914404238": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 734],
+        "3860080842190932938": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12349486511618981663": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "15798538366019336375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17142061595610833587": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "413520381980740601": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 661],
+        "15678637644328155655": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "6526747512277607691": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16117940336643166742": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5991582579063082343": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "3294597200237228703": ["convolution_gpu_bfyx_os_iyx_osv16", 663],
+        "16191151963860109032": ["convolution_gpu_bfyx_gemm_like", 0],
+        "4092109744625924274": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4849563739505810631": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3411824370004173602": ["convolution_gpu_bfyx_os_iyx_osv16", 246],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "15344685054531225492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14837032904820198149": ["convolution_gpu_bfyx_f16", 8],
+        "14191080790860851837": ["convolution_gpu_bfyx_f16", 8],
+        "17023834849779428858": ["convolution_gpu_bfyx_f16", 8],
+        "3329139872094988661": ["convolution_gpu_bfyx_f16", 8],
+        "4450424283454693457": ["convolution_gpu_bfyx_f16", 5],
+        "6264730897461114496": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16058636937964624617": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "499215221217528434": ["convolution_gpu_bfyx_f16", 8],
+        "14655897748934541342": ["convolution_gpu_bfyx_f16", 8],
+        "15662090780385020537": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7311728100823416883": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7221666363928264914": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "949611108582310305": ["convolution_gpu_bfyx_f16", 7],
+        "398119457330194405": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18306921825426259074": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14045661362966364917": ["convolution_gpu_bfyx_f16", 8],
+        "11211712695622132026": ["convolution_gpu_bfyx_f16", 8],
+        "13777550841624006577": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4765385132115618850": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "16898905631497333152": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2688905295933725456": ["convolution_gpu_bfyx_f16", 7],
+        "10325568251605243952": ["convolution_gpu_bfyx_f16", 8],
+        "4697609485293892109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15761571492230997960": ["convolution_gpu_bfyx_f16", 6],
+        "10403493618856101043": ["convolution_gpu_bfyx_f16", 7],
+        "15694677292906293678": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11385013883660304429": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8155797389244290087": ["convolution_gpu_bfyx_f16", 5],
+        "16706121580364790904": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "5495776091407365966": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5673972310424776040": ["convolution_gpu_bfyx_os_iyx_osv16", 238],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16", 483],
+        "4305463771822108179": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "2079318718874681198": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "17439941375453858836": ["convolution_gpu_bfyx_os_iyx_osv16", 1042],
+        "12467583825605788345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9058857190661793339": ["fused_conv_eltwise_gpu_ref", 2],
+        "11620974866622716017": ["fused_conv_eltwise_gpu_ref", 0],
+        "8857009061371774666": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5756084360647965669": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3975438095352877013": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3345987020362642539": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16755500582498207386": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1636861132129961823": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9793091808041097161": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2080318501154291605": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "13813582937323882369": ["fully_connected_gpu_bf_io_ref", 1],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16", 680],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "2877876834438717783": ["convolution_gpu_bfyx_os_iyx_osv16", 949],
+        "9156649014297448284": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "13660470643303663441": ["convolution_gpu_bfyx_os_iyx_osv16", 909],
+        "8081997809574506331": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "8199400320947837516": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11460891889180307970": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5643924526605879168": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14198463555297179999": ["convolution_gpu_bfyx_f16", 8],
+        "9820219997540294747": ["convolution_gpu_bfyx_os_iyx_osv16", 1071],
+        "16598220433310484103": ["convolution_gpu_bfyx_f16", 8],
+        "13332579082252874358": ["convolution_gpu_bfyx_os_iyx_osv16", 727],
+        "10148956417804060854": ["convolution_gpu_bfyx_f16", 8],
+        "16052199780545784176": ["convolution_gpu_bfyx_f16", 8],
+        "17284989371701058847": ["convolution_gpu_bfyx_os_iyx_osv16", 351],
+        "18186300610687882698": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17323620992879479455": ["convolution_gpu_bfyx_f16", 8],
+        "10782643446733040985": ["convolution_gpu_bfyx_f16", 8],
+        "3080843366919845836": ["convolution_gpu_bfyx_os_iyx_osv16", 1068],
+        "16898206352994894714": ["convolution_gpu_bfyx_f16", 8],
+        "17502393571772755646": ["convolution_gpu_bfyx_os_iyx_osv16", 438],
+        "12982233543299343225": ["convolution_gpu_bfyx_os_iyx_osv16", 807],
+        "5609871805820255743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "7971259885907841252": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15956442448148612253": ["convolution_gpu_bfyx_os_iyx_osv16", 1062],
+        "7600980811977404651": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "6051578359778554994": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "14591236937522474591": ["convolution_gpu_bfyx_os_iyx_osv16", 751],
+        "380671738106280681": ["convolution_gpu_bfyx_os_iyx_osv16", 1122],
+        "16581313033870107357": ["convolution_gpu_bfyx_os_iyx_osv16", 314],
+        "15132868076468531540": ["convolution_gpu_bfyx_os_iyx_osv16", 347],
+        "4964421818619633295": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "5206589624074157418": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "15804259593852912096": ["convolution_gpu_bfyx_os_iyx_osv16", 573],
+        "9667577643691138471": ["convolution_gpu_bfyx_f16", 8],
+        "9587296295017154035": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "624896425985287215": ["convolution_gpu_bfyx_f16", 8],
+        "13698491289625410930": ["convolution_gpu_bfyx_os_iyx_osv16", 263],
+        "5501294609610168354": ["convolution_gpu_bfyx_f16", 8],
+        "14684726385174603824": ["convolution_gpu_bfyx_f16", 8],
+        "3538746967389669479": ["convolution_gpu_bfyx_os_iyx_osv16", 679],
+        "5442728869442056950": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17446903112723559991": ["convolution_gpu_bfyx_f16", 8],
+        "17314761693722740561": ["convolution_gpu_bfyx_f16", 8],
+        "12956681231908531328": ["convolution_gpu_bfyx_os_iyx_osv16", 1013],
+        "17836528995874415642": ["convolution_gpu_bfyx_f16", 8],
+        "8896717627818724430": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "14716947061630316041": ["convolution_gpu_bfyx_os_iyx_osv16", 806],
+        "9735141117399046903": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18366465884925728820": ["convolution_gpu_bfyx_os_iyx_osv16", 632],
+        "17388129439366166721": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "13724220569112734431": ["convolution_gpu_bfyx_os_iyx_osv16", 596],
+        "5529587475911632254": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "11660089067798953391": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "5181665423821543629": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "8048807352445331657": ["convolution_gpu_bfyx_os_iyx_osv16", 312],
+        "3470485673426524224": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "3135008557801015427": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "14966985685297154154": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2035874178080637954": ["convolution_gpu_bfyx_os_iyx_osv16", 687],
+        "5013120291092844103": ["convolution_gpu_bfyx_os_iyx_osv16", 745],
+        "4460181251394130653": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "5117453858905614531": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "8461950668910238851": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "1805006234516270784": ["convolution_gpu_bfyx_os_iyx_osv16", 351],
+        "2718931301666622839": ["convolution_gpu_bfyx_os_iyx_osv16", 351],
+        "7124614724653589875": ["convolution_gpu_bfyx_os_iyx_osv16", 725],
+        "7805147511722673361": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "18231162877100499337": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1081152612562015774": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "14118692364036816874": ["convolution_gpu_bfyx_os_iyx_osv16", 1018],
+        "2771511633327598307": ["convolution_gpu_bfyx_os_iyx_osv16", 219],
+        "2043520288487456245": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "10128395594093504455": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "1986294224967713231": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "8596104233487286880": ["convolution_gpu_bfyx_os_iyx_osv16", 1056],
+        "14619067706344498943": ["convolution_gpu_bfyx_os_iyx_osv16", 1070],
+        "16247177074403714471": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "17302460560764241489": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "11296929673638920561": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "2856387545805299627": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "9602711901243573665": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1389991763672509207": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7273251266921979515": ["convolution_gpu_bfyx_os_iyx_osv16", 115],
+        "7271698086258726731": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11671327445697582898": ["convolution_gpu_bfyx_gemm_like", 0],
+        "17570554483516474486": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3950135144885165819": ["convolution_gpu_bfyx_os_iyx_osv16", 664],
+        "16452498081261682201": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15356297740028337585": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8878636242180690359": ["convolution_gpu_bfyx_os_iyx_osv16", 679],
+        "12899244091844068967": ["convolution_gpu_bfyx_gemm_like", 0],
+        "18197774991654792135": ["convolution_gpu_bfyx_os_iyx_osv16", 929],
+        "11881486982136101383": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5087105232357685910": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6362183633269684086": ["convolution_gpu_bfyx_gemm_like", 2],
+        "665553611665131720": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15800366255097765592": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3441148927037088426": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10113814865022625794": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9290161943539060420": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6575054771337880905": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6930297774765427265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14687805371465731129": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17348903837738033737": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5058042344671975771": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4296792831323727718": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16569718635085620248": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6479800863775629782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8225524724938376205": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4610533059559454932": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10523639436634369983": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17500026797620139898": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9352837842671844352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15431710492660944867": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4094966339608175937": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10293540888522108040": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2178813930852805198": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4172720860698260594": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11035900209971591093": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16347989689011736788": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18426670112574344316": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9075740348545764459": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16177541412848844107": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7413356361797538770": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11878518514118760052": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1483436564981355857": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2583631235760101021": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10826337022193127499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "883141931001824331": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18286338108393131357": ["convolution_gpu_bfyx_gemm_like", 2],
+        "513328329651240169": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2866563084547740589": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6618382574307554008": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1564076582163500801": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2980118259786021998": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14006008710769892285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11047701363022632258": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17824545902528351132": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13411717706564225997": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "2553988022244380074": ["convolution_gpu_bfyx_os_iyx_osv16", 844],
+        "2007784578504956396": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "2128232248278266618": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180218859472587238": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17950127156676619192": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11734174131078900953": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17769805455612014213": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "354985206063783019": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12008052381997574625": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14579060801049956629": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "2475732477128179942": ["convolution_gpu_bfyx_os_iyx_osv16", 92],
+        "7425269551190332752": ["convolution_gpu_bfyx_os_iyx_osv16", 22],
+        "2523330181210520033": ["convolution_gpu_bfyx_os_iyx_osv16", 60],
+        "9252516395349163399": ["fused_conv_eltwise_gpu_ref", 0],
+        "3025740595729338904": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9221796417553554577": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "142182287837081331": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16817115615539634498": ["fully_connected_gpu_bf_io_gemm", 2],
+        "846549121454126986": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "1865437550609663375": ["fully_connected_gpu_bf_io_gemm", 2],
+        "14491983419826529399": ["convolution_gpu_bfyx_os_iyx_osv16", 451],
+        "11866343372130060111": ["convolution_gpu_bfyx_os_iyx_osv16", 126],
+        "3750595711145201146": ["convolution_gpu_bfyx_os_iyx_osv16", 799],
+        "555112033233919049": ["fully_connected_gpu_bf_io_gemm", 1],
+        "9449916193007510499": ["fully_connected_gpu_bf_io_gemm", 1],
+        "821153009898835283": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10053897550646291639": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "166522152877705111": ["convolution_gpu_bfyx_os_iyx_osv16", 872],
+        "8194080531314571831": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "8462596687449136841": ["convolution_gpu_bfyx_os_iyx_osv16", 85],
+        "16641148739441654579": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "3012332306785177280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1667559253581127345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17950962563816983793": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "15920581282829793263": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "4931844549089354374": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11227326613484178737": ["convolution_gpu_bfyx_os_iyx_osv16", 833],
+        "8926339988827333993": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "14947161471102583853": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7959005479751426244": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "13876295120508241721": ["convolution_gpu_bfyx_os_iyx_osv16", 43],
+        "5450799298000231966": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "745049678230480319": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17799305583546345514": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "15448134419455024563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10247046915015701375": ["convolution_gpu_bfyx_os_iyx_osv16", 84],
+        "818326236814735107": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11621993279519931789": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10879300979808656559": ["fully_connected_gpu_bf_io_gemm", 1],
+        "6931984251726006059": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "6196230740484518313": ["convolution_gpu_bfyx_os_iyx_osv16", 212],
+        "9158058375618670219": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "11236152897678664523": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "3406694758050234432": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1110],
+        "10974039527048973006": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "15899184198611288897": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "5208730096669264907": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "12427052259883778985": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "11537945670773619430": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1107],
+        "3449889481023274859": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "14190614451726695163": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "12928525615597254487": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "9516426687291882678": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17048242738976449237": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 730],
+        "9268483331991252048": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "6122901745362984256": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "5485405121200417034": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11959778533528884090": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "10066541947363706408": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12190338269093090393": ["convolution_gpu_bfyx_os_iyx_osv16", 745],
+        "18114410819861988056": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9399757365169066601": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18002225531765237416": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "493140137361754334": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1260161648603954768": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5667262301262234990": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "9589013771119948673": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "12893936099497050507": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "5453417400746204459": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "16783136590567851390": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11157538002790552612": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "232009389683898587": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11216071562773188709": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "12325371158799601152": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11203921208856246202": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "7748329451001058910": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4699825578606954745": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9148351131305560328": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5864010731331844548": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8354231196544866003": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3293708605626705859": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3238880006626116922": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6138876053139091484": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12144683985655531326": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "251191902439549345": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3094287673106030943": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15664461533342111743": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12991662142109741177": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "1919860812260988485": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12975178408849254081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "560685047966004607": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 660],
+        "1411165869695864657": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6971410560932215974": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17372785589054562125": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1164314361485656318": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9516217840174015532": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12868299597160484729": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16492694273514080106": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "14567423858977789672": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "1069440014730910857": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 661],
+        "4511752002127622518": ["convolution_gpu_bfyx_gemm_like", 2],
+        "670011076817691046": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 733],
+        "10000917296337062736": ["convolution_gpu_bfyx_os_iyx_osv16", 359],
+        "12236539205690542952": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 735],
+        "12066036542483319741": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "16551989359219084137": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14375560443851968119": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "4925269744341463388": ["convolution_gpu_bfyx_os_iyx_osv16", 476],
+        "7404267750384901384": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "1136134476921992394": ["convolution_gpu_bfyx_os_iyx_osv16", 542],
+        "12274460319290366021": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "1936271684402780579": ["convolution_gpu_bfyx_os_iyx_osv16", 805],
+        "7797907475238799442": ["fused_conv_eltwise_gpu_ref", 2],
+        "107092103514596960": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8153466715673110154": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13707460333812965439": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13482095577300687063": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12240700165957110598": ["convolution_gpu_bfyx_os_iyx_osv16", 659],
+        "13922184309028933319": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "4503204697730374875": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "15444198622559010805": ["fully_connected_gpu_bf_io_gemm", 2],
+        "3399502934446395571": ["fully_connected_gpu_bf_io_gemm", 2],
+        "13954223602112460287": ["fully_connected_gpu_bf_io_gemm", 0],
+        "10986360375271263743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "1716892750352083242": ["fully_connected_gpu_bf_io_gemm", 2],
+        "9725379584761388986": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "8812448421277455303": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10082079116080072102": ["convolution_gpu_bfyx_gemm_like", 2],
+        "399551887429980535": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4993763244005264691": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16584618141013506079": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2812521679999989071": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13008426794683170889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12575702775019311249": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7546191118828069537": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16065515254801458590": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "3425550832073889758": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12158565214239239362": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "4674402155077047884": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17292794084187069459": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "2786512217326082861": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14350551992529551543": ["convolution_gpu_bfyx_os_iyx_osv16", 992],
+        "2393708926889890184": ["convolution_gpu_bfyx_os_iyx_osv16", 994],
+        "14469325606711534393": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16745817877996028596": ["convolution_gpu_bfyx_os_iyx_osv16", 833],
+        "5009730037803270259": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12990454601941366626": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "16417111816169006680": ["convolution_gpu_bfyx_os_iyx_osv16", 842],
+        "133571575038273240": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14248134542225645633": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9431127887153901797": ["convolution_gpu_bfyx_f16", 8],
+        "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "11176513032851549145": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13523379689227815262": ["convolution_gpu_bfyx_f16", 3],
+        "6721716607254493168": ["convolution_gpu_bfyx_f16", 5],
+        "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11234282887624973651": ["convolution_gpu_bfyx_f16", 7],
+        "13831173402079080202": ["convolution_gpu_bfyx_f16", 3],
+        "8326492500469570449": ["convolution_gpu_bfyx_f16", 6],
+        "3264529476730576869": ["convolution_gpu_bfyx_f16", 4],
+        "7297268657172014757": ["convolution_gpu_bfyx_f16", 2],
+        "1625066159015188551": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6691529969484361871": ["convolution_gpu_bfyx_f16", 1],
+        "15713770358690264680": ["convolution_gpu_bfyx_f16", 5],
+        "16321675691643798095": ["convolution_gpu_bfyx_f16", 2],
+        "11669126976746433467": ["convolution_gpu_bfyx_f16", 3],
+        "343301842058050721": ["convolution_gpu_bfyx_f16", 1],
+        "2752323179285263511": ["convolution_gpu_bfyx_f16", 0],
+        "2102366789632970362": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "1306385926849681711": ["convolution_gpu_bfyx_f16", 1],
+        "126985649265174875": ["convolution_gpu_bfyx_f16", 1],
+        "1398008210451653662": ["convolution_gpu_bfyx_f16", 1],
+        "18349997465728341610": ["convolution_gpu_bfyx_f16", 2],
+        "6014604866075552044": ["convolution_gpu_bfyx_f16", 1],
+        "10704627126748844083": ["convolution_gpu_bfyx_f16", 2],
+        "2290627489333161117": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "9281553420666514549": ["convolution_gpu_bfyx_f16", 1],
+        "5307698759365425674": ["convolution_gpu_bfyx_f16", 8],
+        "16618476158797450107": ["convolution_gpu_bfyx_f16", 2],
+        "6448987340419115272": ["convolution_gpu_bfyx_f16", 1],
+        "7649625315489330023": ["convolution_gpu_bfyx_f16", 8],
+        "1407861661939721927": ["convolution_gpu_bfyx_f16", 8],
+        "7240814723112731361": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6941932686830863618": ["convolution_gpu_bfyx_f16", 5],
+        "2035160132949629453": ["convolution_gpu_bfyx_f16", 5],
+        "17827286460954881640": ["convolution_gpu_bfyx_f16", 6],
+        "6051363798671277490": ["convolution_gpu_bfyx_f16", 8],
+        "7990676476696328795": ["convolution_gpu_bfyx_f16", 6],
+        "9594336645019216285": ["convolution_gpu_bfyx_f16", 8],
+        "2826412019603377751": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "10171778444869246611": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "13742305118572588823": ["convolution_gpu_bfyx_f16", 7],
+        "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14821402568274932830": ["binary_convolution_gpu_1x1", 0],
+        "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10657672650587258853": ["convolution_gpu_bfyx_f16", 6],
+        "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13626797216057420236": ["convolution_gpu_bfyx_f16", 6],
+        "2506095387855338923": ["convolution_gpu_bfyx_f16", 2],
+        "562790620732503479": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1176788949160939554": ["convolution_gpu_bfyx_f16", 3],
+        "11395171679618279746": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9681660158274412796": ["convolution_gpu_bfyx_f16", 5],
+        "17157976605793655955": ["convolution_gpu_bfyx_f16", 1],
+        "13706914146179028206": ["convolution_gpu_bfyx_f16", 2],
+        "1586631406027561282": ["convolution_gpu_bfyx_f16", 2],
+        "9177089521763332472": ["convolution_gpu_bfyx_f16", 2],
+        "7623827168813087262": ["convolution_gpu_bfyx_f16", 2],
+        "2251294131085073114": ["convolution_gpu_bfyx_f16", 8],
+        "11257985397820322504": ["convolution_gpu_bfyx_f16", 1],
+        "3873298083628570562": ["convolution_gpu_bfyx_f16", 4],
+        "15662803497226104305": ["convolution_gpu_bfyx_f16", 5],
+        "8980088396308495358": ["convolution_gpu_bfyx_f16", 4],
+        "8049787711095084959": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "8361191677655973935": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "6455326407035817823": ["convolution_gpu_bfyx_os_iyx_osv16", 95],
+        "4549875381866576113": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "14780479128645572595": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "9221666339438514459": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "17091218700152862273": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9951123692498529061": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "15226633731441516361": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "4453349487216529991": ["convolution_gpu_bfyx_os_iyx_osv16", 833],
+        "17929115705990268026": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6621532750524834097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "16562571407098459049": ["convolution_gpu_bfyx_os_iyx_osv16", 539],
+        "2873284221161386597": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3769897639705493224": ["convolution_gpu_bfyx_os_iyx_osv16", 540],
+        "5447803100312758964": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 583],
+        "9163238347824560017": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "1688979903294911182": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9338092674592431198": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "15522545626077485199": ["convolution_gpu_bfyx_os_iyx_osv16", 163],
+        "1797489112792772811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5478531388148194783": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "3289369122755371980": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 81],
+        "14572382016053496602": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "16841168676076935693": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18407347961782182453": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 836],
+        "8695092335925023399": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "14168685794682021826": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12423218459706339590": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8734189831526420226": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "14362182205968229036": ["convolution_gpu_bfyx_os_iyx_osv16", 510],
+        "13157476677873103938": ["convolution_gpu_bfyx_os_iyx_osv16", 135],
+        "11940005480315119153": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "1302512649939808216": ["convolution_gpu_bfyx_os_iyx_osv16", 497],
+        "16919811480058643640": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "5208084625746441471": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "8262549900448065079": ["convolution_gpu_bfyx_os_iyx_osv16", 380],
+        "5227665249672396809": ["convolution_gpu_bfyx_os_iyx_osv16", 381],
+        "2276167946100759891": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "18129268521578260814": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5584283929974879275": ["convolution_gpu_bfyx_f16", 8],
+        "11429584360303226064": ["convolution_gpu_bfyx_f16", 8],
+        "8686735181567651375": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "212877757325472435": ["convolution_gpu_bfyx_f16", 8],
+        "17559312741017462443": ["convolution_gpu_bfyx_f16", 6],
+        "12388383111921084595": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6044859325822961324": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15028548616895245917": ["convolution_gpu_bfyx_f16", 6],
+        "2416358280826517238": ["convolution_gpu_bfyx_f16", 8],
+        "5078717573348951772": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16307093827408988813": ["convolution_gpu_bfyx_f16", 8],
+        "7365885889295117317": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5519237001078836815": ["convolution_gpu_bfyx_f16", 8],
+        "10536941332534385779": ["convolution_gpu_bfyx_f16", 3],
+        "4735765505172334525": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11998001963634536052": ["convolution_gpu_bfyx_f16", 3],
+        "4967372874318410724": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6469943853460256537": ["convolution_gpu_bfyx_f16", 8],
+        "4356868209069762908": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10874805992997105013": ["convolution_gpu_bfyx_f16", 5],
+        "10425195772947453108": ["convolution_gpu_bfyx_os_iyx_osv16", 811],
+        "17029022832617859459": ["convolution_gpu_bfyx_os_iyx_osv16", 434],
+        "2476051167651059767": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7844096932162345117": ["convolution_gpu_bfyx_f16", 5],
+        "2705126395780936342": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6904686223481502731": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3795064777145790033": ["convolution_gpu_bfyx_f16", 3],
+        "3981089957521430742": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12864700296881955607": ["convolution_gpu_bfyx_f16", 4],
+        "183214037684746423": ["convolution_gpu_bfyx_os_iyx_osv16", 416],
+        "14009922923845987763": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "3202655487034498666": ["convolution_gpu_bfyx_f16", 1],
+        "756854486757180730": ["convolution_gpu_bfyx_f16", 2],
+        "1750742987566783306": ["convolution_gpu_bfyx_os_iyx_osv16", 7],
+        "10704219670342115822": ["convolution_gpu_bfyx_os_iyx_osv16", 383],
+        "559540403792175610": ["convolution_gpu_bfyx_f16", 1],
+        "11042961657717641258": ["convolution_gpu_bfyx_f16", 7],
+        "7454909001123355674": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "14140446373297940618": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "12992827495874215098": ["convolution_gpu_bfyx_f16", 4],
+        "17560813776447689945": ["convolution_gpu_bfyx_f16", 6],
+        "12650154599281162622": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "9985311646893058565": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "17149651085328252645": ["convolution_gpu_bfyx_f16", 0],
+        "11779581889508749846": ["convolution_gpu_bfyx_os_iyx_osv16", 85],
+        "16817641185194791244": ["convolution_gpu_bfyx_os_iyx_osv16", 83],
+        "6143862109537773906": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6935108295659465736": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7424861737057604968": ["convolution_gpu_bfyx_os_iyx_osv16", 116],
+        "9640933201231819369": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17277279278034795112": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8111879884622212613": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3806358488669113143": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1711328697805315421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5288962955659199699": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17987842029397168642": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4203253185427070377": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9945419220893973658": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14309870202508661817": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18192195499329490812": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18075812052832099472": ["convolution_gpu_bfyx_gemm_like", 1],
+        "8192820779590386413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8925814981090917840": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5081785566500341341": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3605237561875385705": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1716302732338667414": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15442445290156572536": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14397043442550652899": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2228098191590860938": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "1573476283825580755": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "5388852746720776479": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "16916797286744339336": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3644136601694826205": ["convolution_gpu_bfyx_f16", 8],
+        "14012991338325957511": ["convolution_gpu_bfyx_f16", 5],
+        "2398531071421825711": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14629119844168195239": ["convolution_gpu_bfyx_f16", 7],
+        "9479190421236869705": ["convolution_gpu_bfyx_f16", 3],
+        "4695468509968937176": ["convolution_gpu_bfyx_f16", 3],
+        "16869493346339355004": ["convolution_gpu_bfyx_f16", 3],
+        "15546783882511835538": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14073402551217828243": ["convolution_gpu_bfyx_f16", 3],
+        "5737532382360638209": ["convolution_gpu_bfyx_f16", 6],
+        "14178075057440850235": ["convolution_gpu_bfyx_f16", 6],
+        "1041942313707882183": ["convolution_gpu_bfyx_f16", 5],
+        "14311656444636567643": ["convolution_gpu_bfyx_f16", 7],
+        "5708322911191147507": ["convolution_gpu_bfyx_f16", 3],
+        "7237541003077150774": ["convolution_gpu_bfyx_f16", 5],
+        "17097394033112334006": ["convolution_gpu_bfyx_f16", 2],
+        "10077604090153912107": ["convolution_gpu_bfyx_f16", 3],
+        "3042641872059534006": ["convolution_gpu_bfyx_f16", 2],
+        "13168100741247170644": ["convolution_gpu_bfyx_f16", 8],
+        "3376503797303782111": ["convolution_gpu_bfyx_f16", 3],
+        "8462839229772971651": ["convolution_gpu_bfyx_f16", 6],
+        "9678831759870330874": ["convolution_gpu_bfyx_f16", 5],
+        "10053808465394315011": ["convolution_gpu_bfyx_f16", 3],
+        "325275946658065165": ["convolution_gpu_bfyx_f16", 1],
+        "11261634534154975791": ["convolution_gpu_bfyx_f16", 7],
+        "11257004820976953882": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "5713491991252168637": ["convolution_gpu_bfyx_os_iyx_osv16", 420],
+        "2083080453795724323": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6452660166904314994": ["convolution_gpu_bfyx_os_iyx_osv16", 894],
+        "10411169140360183327": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "9743813978144755927": ["convolution_gpu_bfyx_os_iyx_osv16", 399],
+        "15769267071952355833": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "9816235120364293291": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5617875919579896151": ["fully_connected_gpu_bf_io_gemm", 1],
+        "18309383982594141239": ["fully_connected_gpu_bf_io_gemm", 2],
+        "6954046921635466236": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12133573113666871990": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18286924901612269315": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16168987643236739114": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17573344121250212662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8792004303945144557": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6055054188657886157": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16692293796070898202": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18377591093081814522": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7171735046681228890": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2461164836823254208": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14430129165479757357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14698972830975282413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3479216436904445131": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5269956004669551826": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "13594976208424418204": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12373590460058087695": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "4405236452109167503": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "14132900527730577142": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1349033639465657142": ["convolution_gpu_bfyx_gemm_like", 2],
+        "812985719328060901": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "12407276986845062239": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9170373506597510005": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1389904024718949479": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18042225157963583297": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6369935194881138691": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11002183397247930282": ["convolution_gpu_bfyx_os_iyx_osv16", 93],
+        "14040168861632997052": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10321120422537436943": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7628224528894213786": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "18333490976250555089": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12240359612725499137": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5214167856473943406": ["convolution_gpu_bfyx_os_iyx_osv16", 843],
+        "14052955765964466465": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7134511117843066284": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "4293870201735626607": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "7398517597116797925": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9788174666000966313": ["convolution_gpu_bfyx_os_iyx_osv16", 847],
+        "18042814645135189475": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "11236623772616442479": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11418806742471661595": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18007798448985514623": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "9699952679060486545": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "579905583383428310": ["convolution_gpu_bfyx_os_iyx_osv16", 95],
+        "11102245529349471251": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "10406248465333026906": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8032924116166179276": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "7025366523000457929": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "4140229891062448185": ["convolution_gpu_bfyx_f16", 8],
+        "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12777387776061796777": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "5183001506630431534": ["convolution_gpu_bfyx_f16", 6],
+        "13244421635448480964": ["convolution_gpu_bfyx_f16", 4],
+        "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 545],
+        "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12722030162332410659": ["convolution_gpu_bfyx_f16", 7],
+        "11753505949184833814": ["convolution_gpu_bfyx_f16", 5],
+        "1138805437505853389": ["convolution_gpu_bfyx_f16", 6],
+        "236844015511730537": ["convolution_gpu_bfyx_f16", 4],
+        "16372093802852963117": ["convolution_gpu_bfyx_f16", 4],
+        "8017045013578597247": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3767812249447618647": ["convolution_gpu_bfyx_f16", 4],
+        "14300856950146697632": ["convolution_gpu_bfyx_f16", 3],
+        "6658791967844021067": ["convolution_gpu_bfyx_f16", 2],
+        "16114394473926845719": ["convolution_gpu_bfyx_f16", 3],
+        "3329703306452769429": ["convolution_gpu_bfyx_f16", 1],
+        "12485385390638720435": ["convolution_gpu_bfyx_f16", 1],
+        "18325147922902896614": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "2042543291306492227": ["convolution_gpu_bfyx_f16", 1],
+        "14016387396197131183": ["convolution_gpu_bfyx_f16", 2],
+        "388252829841919694": ["convolution_gpu_bfyx_f16", 1],
+        "3224136725591132250": ["convolution_gpu_bfyx_f16", 1],
+        "11555472669677513180": ["convolution_gpu_bfyx_f16", 2],
+        "5878699865486527531": ["convolution_gpu_bfyx_f16", 2],
+        "14726692927619009109": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "10286586505667471565": ["convolution_gpu_bfyx_f16", 2],
+        "8635430703217243594": ["convolution_gpu_bfyx_f16", 8],
+        "3194668567618864343": ["convolution_gpu_bfyx_f16", 1],
+        "2611344153711817460": ["convolution_gpu_bfyx_f16", 1],
+        "11818558634104933451": ["convolution_gpu_bfyx_f16", 2],
+        "5390496664798965323": ["convolution_gpu_bfyx_f16", 5],
+        "13357658087174699785": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14394195619252967214": ["convolution_gpu_bfyx_f16", 7],
+        "4669930370801439013": ["convolution_gpu_bfyx_f16", 4],
+        "1370501593495473908": ["convolution_gpu_bfyx_f16", 7],
+        "4179197899143727062": ["convolution_gpu_bfyx_f16", 8],
+        "4150158815056302279": ["convolution_gpu_bfyx_f16", 3],
+        "7454737385785852953": ["convolution_gpu_bfyx_f16", 4],
+        "17477451880893252674": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "5768225444324661639": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "1885336536803061563": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "5714742374217969073": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10896471338854021271": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "3806914827253341543": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16632515980529050205": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "7466530815481157347": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9025266984842296356": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "5906873273896994744": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7802957391728955870": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "9353411647951951678": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3747726337434740481": ["convolution_gpu_bfyx_os_iyx_osv16", 586],
+        "5744097132927875811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16955287132696194727": ["convolution_gpu_bfyx_os_iyx_osv16", 585],
+        "9395452164938581548": ["convolution_gpu_bfyx_os_iyx_osv16", 585],
+        "11451950139903792107": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10917339102734674830": ["convolution_gpu_bfyx_1x1", 2],
+        "6989917785852753861": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15880522773125518978": ["convolution_gpu_bfyx_1x1", 2],
+        "747373540539235872": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1416762068965596282": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "13394998921545119351": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "15308583448258001619": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "3947469783167254911": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16513233390968673543": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "7953644920144486409": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12990194208171226009": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "212918306790163121": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12507063931340717634": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "944308351100913426": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "10557004218031162358": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "15668589103337174848": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15795875095918663327": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "719423812642773465": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17406040551647193807": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "14418191404250235680": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "4355752766807245481": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8001877558898476628": ["convolution_gpu_bfyx_os_iyx_osv16", 85],
+        "5663755974835173519": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15515579892465814722": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "6228391894735143720": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "17619521756514112890": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "7618115892322102589": ["convolution_gpu_bfyx_os_iyx_osv16", 452],
+        "10859348180122457267": ["convolution_gpu_bfyx_os_iyx_osv16", 76],
+        "8920870418107208273": ["convolution_gpu_bfyx_os_iyx_osv16", 828],
+        "15455728969592248176": ["convolution_gpu_bfyx_os_iyx_osv16", 78],
+        "17332144919524270474": ["fully_connected_gpu_bf_io_gemm", 2],
+        "9513119231978452377": ["fully_connected_gpu_bf_io_gemm", 2],
+        "6332576636757295449": ["fully_connected_gpu_bf_io_gemm", 1],
+        "2493240824080071735": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4780210213847704316": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10506991028553025432": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5428553079642989652": ["convolution_gpu_bfyx_f16", 8],
+        "16121853713631575869": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1510497042951931323": ["convolution_gpu_bfyx_f16", 8],
+        "2745430731204028395": ["convolution_gpu_bfyx_f16", 8],
+        "12876976900388547418": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16890063210386281886": ["convolution_gpu_bfyx_f16", 8],
+        "3927810740679311711": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16684473381571424732": ["convolution_gpu_bfyx_f16", 8],
+        "13284968934065954912": ["convolution_gpu_bfyx_f16", 7],
+        "2631762936534205094": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1739904418563128064": ["convolution_gpu_bfyx_f16", 8],
+        "10980290216903708719": ["convolution_gpu_bfyx_f16", 8],
+        "735103333401104515": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6983554020850996053": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5921617358811124053": ["convolution_gpu_bfyx_f16", 8],
+        "14301841882009814238": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15717838690804403986": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12405925645446300036": ["convolution_gpu_bfyx_f16", 7],
+        "2258154151361692964": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "209570180062724480": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "28298051505236331": ["convolution_gpu_bfyx_f16", 6],
+        "5213864300694772508": ["convolution_gpu_bfyx_f16", 8],
+        "5762331369519287189": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2276871110978868522": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14409415690583079892": ["convolution_gpu_bfyx_f16", 8],
+        "1937137823574087575": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "852636453039879630": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5479982500377449068": ["convolution_gpu_bfyx_f16", 8],
+        "4498519811904474615": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12692499579789677851": ["convolution_gpu_bfyx_f16", 3],
+        "7387278268805782919": ["convolution_gpu_bfyx_f16", 4],
+        "2438123442946203226": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13258719154936933305": ["convolution_gpu_bfyx_f16", 8],
+        "18100501541133790185": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17588841654811790691": ["convolution_gpu_bfyx_f16", 8],
+        "6689548390020199537": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "17024985107770974703": ["convolution_gpu_bfyx_f16", 8],
+        "9968353980515607037": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17524096092737615017": ["convolution_gpu_bfyx_f16", 8],
+        "18114533502018520363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5255663532662079743": ["convolution_gpu_bfyx_f16", 8],
+        "7603883354122442155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "846394177044425685": ["convolution_gpu_bfyx_f16", 8],
+        "13144168456084150868": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11614500724316937770": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14902332370005427398": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13885328145647066921": ["convolution_gpu_bfyx_f16", 8],
+        "12145548657602787381": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13518278887400015765": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5205936493218798381": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15427181019330262398": ["convolution_gpu_bfyx_f16", 8],
+        "10959940286555804884": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8974050386876725444": ["convolution_gpu_bfyx_f16", 8],
+        "18162518237985989872": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10232477134289465267": ["convolution_gpu_bfyx_f16", 7],
+        "6212673407294495601": ["convolution_gpu_bfyx_f16", 6],
+        "17860712443740757354": ["convolution_gpu_bfyx_f16", 4],
+        "4146950753180366119": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8930966597498483291": ["convolution_gpu_bfyx_f16", 1],
+        "17992495365931215688": ["convolution_gpu_bfyx_f16", 5],
+        "7146066549311428539": ["convolution_gpu_bfyx_os_iyx_osv16", 810],
+        "3323834459803099675": ["convolution_gpu_bfyx_f16", 3],
+        "13970979686543548079": ["convolution_gpu_bfyx_f16", 1],
+        "17224261730179174234": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8599384037400091771": ["convolution_gpu_bfyx_f16", 2],
+        "13691442749949008699": ["convolution_gpu_bfyx_f16", 2],
+        "18249370808689016771": ["convolution_gpu_bfyx_os_iyx_osv16", 798],
+        "9285872109762575050": ["convolution_gpu_bfyx_f16", 1],
+        "3778806780227752318": ["convolution_gpu_bfyx_f16", 2],
+        "14533995413631988093": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "18213608828525161872": ["convolution_gpu_bfyx_f16", 2],
+        "18201367360115946218": ["convolution_gpu_bfyx_f16", 6],
+        "2298930512642991761": ["convolution_gpu_bfyx_os_iyx_osv16", 7],
+        "7524439404315811688": ["convolution_gpu_bfyx_f16", 1],
+        "12807813148826993243": ["convolution_gpu_bfyx_f16", 1],
+        "10386449367791123777": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13710585118886614159": ["convolution_gpu_bfyx_f16", 3],
+        "5083137484520510189": ["convolution_gpu_bfyx_f16", 7],
+        "7310721662758245090": ["convolution_gpu_bfyx_os_iyx_osv16", 794],
+        "3441130252760474056": ["convolution_gpu_bfyx_f16", 5],
+        "17896513277322150873": ["convolution_gpu_bfyx_f16", 8],
+        "15804284488423654345": ["convolution_gpu_bfyx_os_iyx_osv16", 65],
+        "7664482438087213797": ["convolution_gpu_bfyx_f16", 8],
+        "11503348949826253367": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8658460661236455347": ["convolution_gpu_bfyx_f16", 8],
+        "3330382517900635622": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17858565811787708662": ["convolution_gpu_bfyx_f16", 8],
+        "10812355035102461624": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1763713485502711028": ["convolution_gpu_bfyx_f16", 8],
+        "537708856180494652": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14039400995173243881": ["convolution_gpu_bfyx_f16", 7],
+        "16144549808790730407": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3607178347319840731": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1616172004620830694": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16914324612936017713": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5643847517719208248": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10544570776782304031": ["convolution_gpu_bfyx_f16", 8],
+        "10319250281140119656": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15493574903211227269": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8519850106535467914": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10879590057665696981": ["convolution_gpu_bfyx_f16", 8],
+        "14575912896890066295": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3580607955559330220": ["convolution_gpu_bfyx_f16", 7],
+        "11846250273112462539": ["convolution_gpu_bfyx_os_iyx_osv16", 456],
+        "5211338259118953367": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2534964735134768930": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13711299339844520924": ["convolution_gpu_bfyx_f16", 7],
+        "6904712232722172471": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "14274320868149065806": ["convolution_gpu_bfyx_f16", 2],
+        "10286169767282972888": ["convolution_gpu_bfyx_f16", 1],
+        "5778557207840627218": ["convolution_gpu_bfyx_os_iyx_osv16", 418],
+        "512957346409732922": ["convolution_gpu_bfyx_f16", 8],
+        "16606774042635630114": ["convolution_gpu_bfyx_f16", 3],
+        "2762113297856010960": ["convolution_gpu_bfyx_os_iyx_osv16", 5],
+        "16799551121569588878": ["convolution_gpu_bfyx_os_iyx_osv16", 4],
+        "3047328071912702715": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "4614195525780772879": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "229690452505740843": ["convolution_gpu_bfyx_f16", 6],
+        "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13526783681740823304": ["binary_convolution_gpu_1x1", 0],
+        "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 203],
+        "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16642117060176841433": ["convolution_gpu_bfyx_f16", 6],
+        "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0],
+        "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4859984633862705344": ["convolution_gpu_bfyx_f16", 6],
+        "6643541161570220487": ["convolution_gpu_bfyx_f16", 5],
+        "4771606875232577147": ["convolution_gpu_bfyx_f16", 7],
+        "10197214218719989238": ["convolution_gpu_bfyx_f16", 4],
+        "9021222698443352890": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13650156111024145576": ["convolution_gpu_bfyx_f16", 3],
+        "1823355198957173511": ["convolution_gpu_bfyx_f16", 2],
+        "18224832115562649218": ["convolution_gpu_bfyx_f16", 3],
+        "17989075751697734530": ["convolution_gpu_bfyx_f16", 2],
+        "11065625785330976824": ["convolution_gpu_bfyx_f16", 1],
+        "16508598944144723038": ["convolution_gpu_bfyx_f16", 1],
+        "18207737700426780826": ["convolution_gpu_bfyx_f16", 6],
+        "5739556172667922404": ["convolution_gpu_bfyx_f16", 2],
+        "5648963558592113654": ["convolution_gpu_bfyx_f16", 2],
+        "12700332665217178557": ["convolution_gpu_bfyx_f16", 5],
+        "2049835121645334394": ["convolution_gpu_bfyx_f16", 4],
+        "3127350206986146597": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "413340907950386667": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12169959708985325397": ["convolution_gpu_bfyx_f16", 8],
+        "15591038406687270109": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10924767171232495386": ["convolution_gpu_bfyx_f16", 8],
+        "13999571841387221249": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "5672447484681958632": ["convolution_gpu_bfyx_f16", 5],
+        "9409310337267359456": ["convolution_gpu_bfyx_f16", 8],
+        "12167511024377353453": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15893208324896471495": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "12208132924820452372": ["convolution_gpu_bfyx_f16", 8],
+        "3293314982357251400": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10021818023939833096": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "2473005239224541536": ["convolution_gpu_bfyx_f16", 8],
+        "6856119608423875377": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18255457417918645346": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "16665418645769386939": ["convolution_gpu_bfyx_f16", 8],
+        "10430922762683242901": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15110089333676343949": ["convolution_gpu_bfyx_os_iyx_osv16", 67],
+        "9475257013522373650": ["convolution_gpu_bfyx_f16", 8],
+        "10610536157845815072": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5557066335410910062": ["convolution_gpu_bfyx_f16", 6],
+        "18146293782255442927": ["convolution_gpu_bfyx_f16", 2],
+        "11852944538668620269": ["convolution_gpu_bfyx_f16", 7],
+        "14937682075916905713": ["convolution_gpu_bfyx_f16", 7],
+        "12792249796816770204": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17312037326967676576": ["convolution_gpu_bfyx_f16", 2],
+        "11909045540447457308": ["convolution_gpu_bfyx_f16", 6],
+        "10510946825189206241": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6124992432121468125": ["convolution_gpu_bfyx_f16", 8],
+        "14190999291985701693": ["convolution_gpu_bfyx_f16", 8],
+        "13598178470968135338": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9448354674053762309": ["convolution_gpu_bfyx_f16", 2],
+        "12777599919231312068": ["convolution_gpu_bfyx_f16", 7],
+        "9337614078096106084": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16578631161511759035": ["convolution_gpu_bfyx_f16", 1],
+        "6322333494387087177": ["convolution_gpu_bfyx_f16", 7],
+        "9051299669421439712": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6900406474100422151": ["convolution_gpu_bfyx_f16", 5],
+        "1770678726875883309": ["convolution_gpu_bfyx_f16", 7],
+        "9884897216756697592": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "5833785867675066644": ["convolution_gpu_bfyx_f16", 2],
+        "16500610465961551242": ["convolution_gpu_bfyx_f16", 8],
+        "1390142483294581487": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10267854415205002238": ["convolution_gpu_bfyx_f16", 4],
+        "15623187792779892835": ["convolution_gpu_bfyx_f16", 1],
+        "13148059837896884273": ["convolution_gpu_bfyx_f16", 3],
+        "171559638613408493": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10278640368905105405": ["convolution_gpu_bfyx_f16", 4],
+        "10760404678801561747": ["convolution_gpu_bfyx_f16", 2],
+        "8548473413394744544": ["convolution_gpu_bfyx_f16", 3],
+        "16729171964149306867": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3716706098703025358": ["convolution_gpu_bfyx_f16", 3],
+        "129949756464977129": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "3542071367019145965": ["fused_conv_eltwise_gpu_ref", 2],
+        "6421891780685569059": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "12397973858549014447": ["convolution_gpu_bfyx_f16", 8],
+        "147576342753084622": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9006679435677596041": ["convolution_gpu_bfyx_f16", 7],
+        "375607190849326617": ["convolution_gpu_bfyx_f16", 8],
+        "12465373696424446749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3330746708867776870": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "175810741723366131": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1393228887151888661": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2791644023635315729": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14953047924930959040": ["convolution_gpu_bfyx_f16", 8],
+        "13576707834156737134": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17441797654332334591": ["convolution_gpu_bfyx_f16", 8],
+        "4368216880157087051": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4527131704372375891": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12532849387017200369": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6143746716136988129": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "229425834968700183": ["convolution_gpu_bfyx_f16", 6],
+        "14611155839967647053": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12220017703888172171": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3017664565974342570": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3919092484794350954": ["convolution_gpu_bfyx_f16", 7],
+        "777704696687372198": ["convolution_gpu_bfyx_f16", 3],
+        "9530089245179389803": ["convolution_gpu_bfyx_os_iyx_osv16", 834],
+        "15407099455173114443": ["convolution_gpu_bfyx_f16", 6],
+        "13830673382612975715": ["convolution_gpu_bfyx_f16", 4],
+        "15418883453881678146": ["convolution_gpu_bfyx_f16", 5],
+        "18181217963990641003": ["convolution_gpu_bfyx_f16", 2],
+        "15712589409011660453": ["convolution_gpu_bfyx_os_iyx_osv16", 416],
+        "1196403115198061647": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "643101540653656807": ["convolution_gpu_bfyx_f16", 1],
+        "13215809871210781323": ["convolution_gpu_bfyx_f16", 1],
+        "18368779848570116967": ["convolution_gpu_bfyx_f16", 1],
+        "3544698174676763847": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "12428007544423412129": ["convolution_gpu_bfyx_f16", 6],
+        "4939325123575119544": ["convolution_gpu_bfyx_f16", 2],
+        "17947736981603570615": ["convolution_gpu_bfyx_f16", 8],
+        "6620861214152396614": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "10018756206737727294": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5830779024517851317": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "7913817244562964901": ["convolution_gpu_bfyx_f16", 8],
+        "11779589567746893119": ["convolution_gpu_bfyx_f16", 8],
+        "5287441936829096354": ["convolution_gpu_bfyx_os_iyx_osv16", 289],
+        "16879635677321458783": ["convolution_gpu_bfyx_f16", 8],
+        "5936894667802097344": ["convolution_gpu_bfyx_f16", 8],
+        "12029555773381953470": ["convolution_gpu_bfyx_f16", 8],
+        "1395714970525756800": ["convolution_gpu_bfyx_f16", 8],
+        "18366381433142273315": ["convolution_gpu_bfyx_f16", 8],
+        "17839315025229585473": ["convolution_gpu_bfyx_f16", 8],
+        "7428339090190576585": ["convolution_gpu_bfyx_f16", 8],
+        "16427721132197847241": ["convolution_gpu_bfyx_f16", 8],
+        "929038963682864275": ["convolution_gpu_bfyx_f16", 8],
+        "6348679735483401866": ["convolution_gpu_bfyx_f16", 7],
+        "17409943223289937333": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10896472785943286419": ["convolution_gpu_bfyx_f16", 8],
+        "8675423965229942895": ["convolution_gpu_bfyx_f16", 8],
+        "15359653790909326580": ["convolution_gpu_bfyx_f16", 4],
+        "937772044105590355": ["convolution_gpu_bfyx_f16", 8],
+        "11630003841984891663": ["convolution_gpu_bfyx_f16", 8],
+        "15721323944762357421": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18032560040713612222": ["convolution_gpu_bfyx_f16", 8],
+        "16185581163541386950": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7296460872108123423": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18375557444371775299": ["convolution_gpu_bfyx_f16", 8],
+        "10922059457537054563": ["convolution_gpu_bfyx_f16", 8],
+        "122295605901184339": ["convolution_gpu_bfyx_f16", 4],
+        "12164250230746861951": ["convolution_gpu_bfyx_f16", 3],
+        "8176114476658865003": ["convolution_gpu_bfyx_os_iyx_osv16", 1049],
+        "7408205445085068145": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11368781584821592726": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13893351700564465666": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3518310626820299509": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14411220648355431920": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16360948136590378689": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12603778068505548164": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7393554260847466099": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11640173157120764930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10391275203444358233": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "12900060990097311151": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13949457796213177880": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12229727046452778843": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6356853913935067660": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2057724637751433123": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16951394780935673368": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4362905853733519089": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11437739738725998008": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1166763569766001639": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "13509884479614626207": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13561264673311456568": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13949179271064170300": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9085227279626009353": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "15999251370466034620": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8278218983765546430": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1283216388519834306": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10392839783862963669": ["convolution_gpu_bfyx_gemm_like", 1],
+        "446095524058497778": ["convolution_gpu_bfyx_os_iyx_osv16", 469],
+        "16418977525726114825": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "5073696559530173773": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "8986786677408239490": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "18154134293896237020": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9604863051097029874": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "12931069967038668164": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "6806199908367808607": ["convolution_gpu_bfyx_os_iyx_osv16", 483],
+        "11683146685348965370": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8154297486284619437": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14336744408490491240": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "4571901717343198720": ["convolution_gpu_bfyx_os_iyx_osv16", 562],
+        "6532394816830144120": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2666796249274140911": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "11653606109120321972": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "6204893434840435239": ["convolution_gpu_bfyx_os_iyx_osv16", 876],
+        "13218364348439640168": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10201555771333451359": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "6894773592689372049": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7168438768023636584": ["convolution_gpu_bfyx_f16", 8],
+        "10451355428354516953": ["convolution_gpu_bfyx_f16", 8],
+        "14472734042788843355": ["convolution_gpu_bfyx_f16", 8],
+        "10854104081943494369": ["convolution_gpu_bfyx_f16", 8],
+        "93020906459675429": ["convolution_gpu_bfyx_f16", 6],
+        "18398350909015256408": ["convolution_gpu_bfyx_f16", 8],
+        "4154340122141626612": ["convolution_gpu_bfyx_f16", 8],
+        "18200289027422735061": ["convolution_gpu_bfyx_f16", 7],
+        "5565357052205136958": ["convolution_gpu_bfyx_f16", 4],
+        "15946908544184249774": ["convolution_gpu_bfyx_f16", 8],
+        "14037627422329357174": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14408378031985995049": ["convolution_gpu_bfyx_f16", 4],
+        "13211513495214123892": ["convolution_gpu_bfyx_f16", 6],
+        "1496494589494248203": ["convolution_gpu_bfyx_f16", 8],
+        "17087805036943027743": ["convolution_gpu_bfyx_f16", 8],
+        "13247615789377163390": ["convolution_gpu_bfyx_f16", 3],
+        "5098352369763200627": ["convolution_gpu_bfyx_f16", 8],
+        "7557421223834089733": ["convolution_gpu_bfyx_gemm_like", 0],
+        "10657042057899091892": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2064129679519084519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13149626711154707837": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2450247775784772609": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "9349162934459662079": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13032204489661886072": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9915338154088450212": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2204239160621715211": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449351266437601922": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "1155876454105658452": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10367977997774504988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9837317326715221119": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "10725269803461677890": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "2877965337998085379": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5765037690630152391": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2731214798095843918": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "4432212871967601555": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "13674833960992369491": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2407729796226002219": ["convolution_gpu_bfyx_f16", 8],
+        "8712233195607754052": ["convolution_gpu_bfyx_f16", 8],
+        "18376338036643391330": ["convolution_gpu_bfyx_f16", 7],
+        "15000057703375682508": ["convolution_gpu_bfyx_f16", 5],
+        "11538380796610598086": ["convolution_gpu_bfyx_f16", 8],
+        "8807959414103299339": ["convolution_gpu_bfyx_f16", 8],
+        "8923709952861619751": ["convolution_gpu_bfyx_f16", 8],
+        "2281723979610106495": ["convolution_gpu_bfyx_f16", 6],
+        "7003402145984308994": ["convolution_gpu_bfyx_f16", 8],
+        "8802871881972169446": ["convolution_gpu_bfyx_f16", 8],
+        "17672255854769914684": ["convolution_gpu_bfyx_f16", 7],
+        "15989515952156087492": ["convolution_gpu_bfyx_f16", 8],
+        "12613916101209377956": ["convolution_gpu_bfyx_f16", 8],
+        "8926372099361990033": ["convolution_gpu_bfyx_f16", 7],
+        "13134908817293730842": ["convolution_gpu_bfyx_f16", 8],
+        "14010406343040661271": ["convolution_gpu_bfyx_f16", 8],
+        "2325094934617563483": ["convolution_gpu_bfyx_f16", 8],
+        "11756769107875909669": ["convolution_gpu_bfyx_f16", 3],
+        "8711172943068374489": ["convolution_gpu_bfyx_f16", 8],
+        "1909118584082415877": ["convolution_gpu_bfyx_f16", 5],
+        "12040023093627702264": ["convolution_gpu_bfyx_f16", 8],
+        "6474623094910171017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6747189810752747337": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "12216744913496272224": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4342399258032747578": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5084349834068342816": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "12786257902562938666": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "8284243114775216351": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17588749900110806571": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15418915313718368321": ["convolution_gpu_bfyx_os_iyx_osv16", 721],
+        "763194125654617818": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5660634357872541998": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16012873046323424192": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "3722402584962183950": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9761723873626289438": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4696863372127622823": ["convolution_gpu_bfyx_os_iyx_osv16", 341],
+        "3364509432107392704": ["convolution_gpu_bfyx_os_iyx_osv16", 349],
+        "17187804634689894363": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10681521954706351183": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4147438820393951383": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "11120743380724204067": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9774801800070756895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1998618394547230268": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "4933328578946081154": ["convolution_gpu_bfyx_os_iyx_osv16", 860],
+        "13882747247011638614": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "814582084353022226": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "4844820846457555156": ["convolution_gpu_bfyx_os_iyx_osv16", 509],
+        "6607603202773469786": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "15439502814859116813": ["convolution_gpu_bfyx_os_iyx_osv16", 167],
+        "15777107988701235428": ["convolution_gpu_bfyx_os_iyx_osv16", 499],
+        "12832042711454018844": ["convolution_gpu_bfyx_os_iyx_osv16", 878],
+        "6099745418702030715": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "4230880085403638923": ["convolution_gpu_bfyx_os_iyx_osv16", 46],
+        "62516450676185117": ["convolution_gpu_bfyx_os_iyx_osv16", 44],
+        "5638081054417809107": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16704551377771794086": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11414353004383751891": ["convolution_gpu_bfyx_f16", 8],
+        "13826353934358977360": ["convolution_gpu_bfyx_f16", 5],
+        "12571951090832825431": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "12750018695410865011": ["convolution_gpu_bfyx_f16", 8],
+        "6036780184043053863": ["convolution_gpu_bfyx_f16", 8],
+        "6704445240879304751": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6059617597062194696": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17195686088514144017": ["convolution_gpu_bfyx_f16", 6],
+        "6228695761133876306": ["convolution_gpu_bfyx_f16", 8],
+        "1875177778795651060": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1020688071038165625": ["convolution_gpu_bfyx_f16", 6],
+        "11609278929695762477": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "11618496013484392127": ["convolution_gpu_bfyx_f16", 7],
+        "9467068612251977759": ["convolution_gpu_bfyx_f16", 5],
+        "11965876788458629557": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1968426148563107280": ["convolution_gpu_bfyx_f16", 4],
+        "5809259008840872032": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16891389262193208125": ["convolution_gpu_bfyx_f16", 8],
+        "17789658392895927080": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13756435969613742897": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3851698237626497000": ["convolution_gpu_bfyx_os_iyx_osv16", 498],
+        "13501391260376277367": ["convolution_gpu_bfyx_os_iyx_osv16", 123],
+        "1765482196017051011": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12502159939277602017": ["convolution_gpu_bfyx_f16", 3],
+        "16051024745177409774": ["convolution_gpu_bfyx_f16", 4],
+        "13606942804997151903": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4351281046292319725": ["convolution_gpu_bfyx_f16", 4],
+        "6443607999496148234": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13547488471348547459": ["convolution_gpu_bfyx_f16", 4],
+        "18238745366827633559": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "11158062361663031443": ["convolution_gpu_bfyx_os_iyx_osv16", 416],
+        "9206861055140649226": ["convolution_gpu_bfyx_f16", 8],
+        "9426001650092504798": ["convolution_gpu_bfyx_f16", 2],
+        "13181672943699248834": ["convolution_gpu_bfyx_os_iyx_osv16", 383],
+        "6339523663850142246": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "1471109004832880586": ["convolution_gpu_bfyx_f16", 3],
+        "17107284393334082714": ["convolution_gpu_bfyx_f16", 6],
+        "13719359892110227962": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "9464351599302771690": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "2096653216949318450": ["convolution_gpu_bfyx_f16", 3],
+        "13025913519962707885": ["convolution_gpu_bfyx_f16", 7],
+        "17728310140731150226": ["convolution_gpu_bfyx_os_iyx_osv16", 792],
+        "5737189353417573057": ["convolution_gpu_bfyx_os_iyx_osv16", 382],
+        "3213984700185874261": ["convolution_gpu_bfyx_f16", 3],
+        "10740106091021667886": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "13362856801855126628": ["convolution_gpu_bfyx_os_iyx_osv16", 442],
+        "5477965717233241895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13669762279828807941": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11383807956757990177": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660099130061496863": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17151683028720387864": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1859914910272455189": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7396998153023492339": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2008700175670389343": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16827869183124732303": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13120889385491477637": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18305507733019922935": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4387964680811897490": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9490382148010824252": ["convolution_gpu_bfyx_os_iyx_osv16", 689],
+        "7607585452987307694": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6647358668213164168": ["convolution_gpu_bfyx_os_iyx_osv16", 693],
+        "3269426835760928022": ["convolution_gpu_bfyx_os_iyx_osv16", 690],
+        "8407302923973070317": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7392260165026897157": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17129583679506972654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15394113208725741887": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2232515974555590822": ["convolution_gpu_bfyx_os_iyx_osv16", 8],
+        "17180103562901495937": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16817205245313896299": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2495268194877370173": ["convolution_gpu_bfyx_f16", 7],
+        "12476976926994223419": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "837759583632984386": ["convolution_gpu_bfyx_f16", 8],
+        "15704905077262309915": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15294932718062276977": ["convolution_gpu_bfyx_f16", 8],
+        "4080044423867161503": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12782915336639648289": ["convolution_gpu_bfyx_f16", 5],
+        "6939516498492475263": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689321018957344059": ["convolution_gpu_bfyx_f16", 3],
+        "2757721937742809580": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "10786200002789430346": ["convolution_gpu_bfyx_f16", 6],
+        "1941288041804222048": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14851676883700287486": ["convolution_gpu_bfyx_f16", 6],
+        "17430311645965116316": ["convolution_gpu_bfyx_f16", 6],
+        "3115685904789548595": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12312218395355058343": ["convolution_gpu_bfyx_f16", 6],
+        "17435783978159028678": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18104511008021666751": ["convolution_gpu_bfyx_f16", 7],
+        "2889130721514872852": ["convolution_gpu_bfyx_f16", 2],
+        "10924517066879469764": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "16962109663829219905": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12727830299177939535": ["convolution_gpu_bfyx_f16", 8],
+        "14199062222704041939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10076860909609577057": ["convolution_gpu_bfyx_f16", 8],
+        "1776138842548256617": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12080107273581243331": ["convolution_gpu_bfyx_f16", 7],
+        "17797545214985482309": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1422959599890390628": ["convolution_gpu_bfyx_f16", 8],
+        "9838313987238017367": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5055964951388373312": ["convolution_gpu_bfyx_f16", 8],
+        "9954422981575375090": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6900498541045596449": ["convolution_gpu_bfyx_f16", 8],
+        "10947987508463792407": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8011212857567850331": ["convolution_gpu_bfyx_f16", 8],
+        "12066306068956923073": ["convolution_gpu_bfyx_f16", 8],
+        "10402257611113721897": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16298426629186155976": ["convolution_gpu_bfyx_f16", 6],
+        "5849544612077982343": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7486977943442929227": ["convolution_gpu_bfyx_f16", 6],
+        "4669548232510373224": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11178064889018543448": ["convolution_gpu_bfyx_os_iyx_osv16", 1042],
+        "14338047015194840420": ["convolution_gpu_bfyx_os_iyx_osv16", 290],
+        "11113611129372516159": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7571325526315806090": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "16051125771881231197": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "5500003724328450643": ["convolution_gpu_bfyx_os_iyx_osv16", 501],
+        "6990517414810688521": ["fully_connected_gpu_bf_io_gemm", 2],
+        "3365158575268504690": ["convolution_gpu_bfyx_os_iyx_osv16", 98],
+        "15532688375958629736": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "7172667569652614272": ["convolution_gpu_bfyx_os_iyx_osv16", 605],
+        "9852378413482765633": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "17891867756237002865": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "8518413618774363848": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "3035058890807107503": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10522964111588366077": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5091533143160590449": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1111],
+        "18403842741213451915": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "16363667733973120518": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17885073348446455401": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1110],
+        "11022756012642936369": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11139267075730841649": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11229901418407413996": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5222025157174261438": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 730],
+        "17449182536559459768": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6297704420477135889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11108691276983929466": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 735],
+        "17214808446370850848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10308273010954959421": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15889539072687412294": ["convolution_gpu_bfyx_gemm_like", 2],
+        "543890610580810398": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 662],
+        "10792988210112094339": ["convolution_gpu_bfyx_os_iyx_osv16", 662],
+        "7408203620228473987": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16676023485427668788": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 286],
+        "947940965229080670": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1038],
+        "13790640092608885830": ["convolution_gpu_bfyx_os_iyx_osv16", 861],
+        "6545311138362761303": ["convolution_gpu_bfyx_os_iyx_osv16", 885],
+        "5305325292949121227": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "12206315739377842316": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15794321689897308881": ["convolution_gpu_bfyx_f16", 5],
+        "5484125953239615763": ["convolution_gpu_bfyx_f16", 6],
+        "2757551509240446139": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18162571474251370775": ["convolution_gpu_bfyx_f16", 7],
+        "18166598730010472057": ["convolution_gpu_bfyx_f16", 7],
+        "9693459623757611016": ["convolution_gpu_bfyx_f16", 5],
+        "5728119660273315956": ["convolution_gpu_bfyx_f16", 4],
+        "11239914102833617438": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "12459704794510442759": ["convolution_gpu_bfyx_f16", 5],
+        "10085932287585840621": ["convolution_gpu_bfyx_f16", 6],
+        "4683959402324362591": ["convolution_gpu_bfyx_f16", 6],
+        "601777369358795451": ["convolution_gpu_bfyx_f16", 1],
+        "18164526837814844607": ["convolution_gpu_bfyx_f16", 7],
+        "10891482236028483911": ["convolution_gpu_bfyx_f16", 8],
+        "2288431871961311886": ["convolution_gpu_bfyx_f16", 8],
+        "8042721734241214802": ["convolution_gpu_bfyx_f16", 2],
+        "2099403897129551255": ["convolution_gpu_bfyx_f16", 6],
+        "13249070386604821754": ["convolution_gpu_bfyx_f16", 2],
+        "13094552025197588032": ["convolution_gpu_bfyx_f16", 7],
+        "3033228150494649847": ["convolution_gpu_bfyx_f16", 3],
+        "1886675028572526491": ["convolution_gpu_bfyx_f16", 8],
+        "13297546803430310514": ["convolution_gpu_bfyx_f16", 6],
+        "7841875474696309399": ["convolution_gpu_bfyx_f16", 8],
+        "18152244993328643321": ["convolution_gpu_bfyx_f16", 2],
+        "6360926220193053423": ["convolution_gpu_bfyx_f16", 8],
+        "17119700657499960250": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "10899267078041093597": ["convolution_gpu_bfyx_os_iyx_osv16", 44],
+        "11509503516680870396": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3553844546517243430": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "11739050017164389431": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "14683616789766294266": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1178443422000627700": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 741],
+        "3959894501921049830": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "6268257722565030993": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8104007721367839894": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11004242349744689661": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "18331651243656907622": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "165832937834890614": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13820132527548818114": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "11494973886338256684": ["convolution_gpu_bfyx_os_iyx_osv16", 598],
+        "9562717353252171645": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15182874743616431755": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "11923231799522030843": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "7212742683076043022": ["convolution_gpu_bfyx_os_iyx_osv16", 731],
+        "1535659774314187616": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9077124630226762093": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "10707439442194349922": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "13670707208998927662": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11898738546265963886": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "7218310781442328740": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 213],
+        "17307988793370069255": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "3159313229944494871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2202381460552007272": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 740],
+        "4539543204582046751": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "2922645767583925625": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "11165701472241951833": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "3582634693373659847": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "3334026180071867610": ["convolution_gpu_bfyx_os_iyx_osv16", 92],
+        "5443310231181579928": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "17203265678149575116": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "18001153514387944483": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "14678448066677992909": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1472673738079022921": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "9210929274479838540": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 358],
+        "14052560267577031250": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "11761558075765102945": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2994573423350313291": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "6446696801960621776": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "4993668527725303377": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16681164889734441913": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "5870803719794486347": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "5252877195442523975": ["convolution_gpu_bfyx_os_iyx_osv16", 81],
+        "17366351824112539739": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4000739627265205773": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "3021897915458395756": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "3587400134318800957": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "899884405480315978": ["convolution_gpu_bfyx_os_iyx_osv16", 471],
+        "16109177282570031068": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 94],
+        "4102196194477012012": ["convolution_gpu_bfyx_os_iyx_osv16", 84],
+        "6443971566937312874": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5730812864956211386": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 471],
+        "10903630002990314118": ["convolution_gpu_bfyx_f16", 7],
+        "14792133935314535772": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "134764196422339946": ["convolution_gpu_bfyx_f16", 8],
+        "7649098040464263012": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "8261353883745708993": ["convolution_gpu_bfyx_f16", 8],
+        "14729283399254215184": ["convolution_gpu_bfyx_f16", 8],
+        "15972034366129164791": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "6089665236185789777": ["convolution_gpu_bfyx_f16", 7],
+        "13871746223287309461": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10063803553810811685": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1872584393135018560": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5428450090197909187": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16371170442503065678": ["convolution_gpu_bfyx_f16", 8],
+        "4237307788889339587": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6443689845617564164": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1311581305426450842": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12519308309976060263": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9615782627992922213": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8051063619232397665": ["convolution_gpu_bfyx_f16", 8],
+        "10987585104127812498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7131640551183167105": ["convolution_gpu_bfyx_f16", 3],
+        "17528260968382789267": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3692483328113186067": ["convolution_gpu_bfyx_f16", 8],
+        "7694255321069379488": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3545269441923145336": ["convolution_gpu_bfyx_f16", 6],
+        "10643380013749923489": ["convolution_gpu_bfyx_f16", 4],
+        "13821946704646192935": ["convolution_gpu_bfyx_f16", 6],
+        "7150971004919685584": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7555796481960570354": ["convolution_gpu_bfyx_f16", 1],
+        "1802080211194796745": ["convolution_gpu_bfyx_f16", 5],
+        "11278616463993391107": ["convolution_gpu_bfyx_f16", 1],
+        "4522486456498017325": ["convolution_gpu_bfyx_f16", 1],
+        "10839722921299529226": ["convolution_gpu_bfyx_f16", 5],
+        "12049793935704273778": ["convolution_gpu_bfyx_f16", 2],
+        "11167394660860618324": ["convolution_gpu_bfyx_f16", 2],
+        "10589914405539478974": ["convolution_gpu_bfyx_f16", 0],
+        "1873986292070678779": ["convolution_gpu_bfyx_f16", 6],
+        "2954053167638478731": ["convolution_gpu_bfyx_f16", 3],
+        "4151068961170605556": ["convolution_gpu_bfyx_f16", 2],
+        "430695072683807402": ["convolution_gpu_bfyx_os_iyx_osv16", 756],
+        "2475680330312153399": ["convolution_gpu_bfyx_os_iyx_osv16", 380],
+        "4814090476154320391": ["convolution_gpu_bfyx_gemm_like", 1],
+        "401208792095350972": ["convolution_gpu_bfyx_os_iyx_osv16", 140],
+        "12980211839763569977": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "6011669866574390388": ["fully_connected_gpu_fb_oi_ref", 0],
+        "4568334008414745667": ["fully_connected_gpu_fb_oi_ref", 1],
+        "11395215181578068623": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14616145871710456304": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "5168719682914827724": ["convolution_gpu_bfyx_os_iyx_osv16", 495],
+        "9473263513191498949": ["convolution_gpu_bfyx_gemm_like", 0],
+        "10720631808458688474": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5434387853485184980": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "2668670046934680180": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6784038318046980185": ["convolution_gpu_bfyx_gemm_like", 0],
+        "6248879028648699716": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "1436424324238684653": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1852843918994539642": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1199836165181399413": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "609944608610496003": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14740129361300854586": ["convolution_gpu_bfyx_gemm_like", 1],
+        "5500102903434438965": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "7297288884568452370": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5136459381906620211": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17411381157694639837": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "2491010747718166234": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8553537608760917592": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "18310729590270667665": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "18061582718156557458": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3957386760515436702": ["convolution_gpu_bfyx_os_iyx_osv16", 1080],
+        "18218313235608627889": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "16069469614549557651": ["convolution_gpu_bfyx_gemm_like", 2],
+        "706526643700857104": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14937087468947592213": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10242452169628899571": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13635064319608016375": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "16629319403227634487": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5089311900051393846": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12955977963529216714": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9000599407449073799": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "4538238288532448191": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "5772569803234537608": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "12841353805697309892": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14614506535270942373": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3934913926529554178": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "5041391468298673889": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7553664247542433501": ["convolution_gpu_bfyx_f16", 3],
+        "11605857135211514409": ["convolution_gpu_bfyx_f16", 5],
+        "444971365656194402": ["convolution_gpu_bfyx_f16", 5],
+        "5717483566010225296": ["convolution_gpu_bfyx_f16", 5],
+        "4288000530029758806": ["convolution_gpu_bfyx_f16", 6],
+        "10127890223685122341": ["convolution_gpu_bfyx_f16", 1],
+        "9968769246349627476": ["convolution_gpu_bfyx_f16", 3],
+        "6199309548387501256": ["convolution_gpu_bfyx_f16", 5],
+        "12620136462430164778": ["convolution_gpu_bfyx_f16", 2],
+        "4030102215743087748": ["convolution_gpu_bfyx_f16", 4],
+        "5207630762737842899": ["convolution_gpu_bfyx_f16", 5],
+        "13353279607627735162": ["convolution_gpu_bfyx_f16", 2],
+        "12545074411559266651": ["convolution_gpu_bfyx_os_iyx_osv16", 14],
+        "11750405629109652478": ["convolution_gpu_bfyx_os_iyx_osv16", 451],
+        "4119964432511449865": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "13746113667444417879": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "10795302670177759469": ["convolution_gpu_bfyx_os_iyx_osv16", 77],
+        "13013373169763193744": ["convolution_gpu_bfyx_os_iyx_osv16", 77],
+        "4192778340765412918": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "1376358627098743825": ["fully_connected_gpu_bf_io_gemm", 2],
+        "1657296775356261285": ["fully_connected_gpu_bf_io_gemm", 0],
+        "5000052273967689626": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3550687136164360833": ["convolution_gpu_bfyx_os_iyx_osv16", 506],
+        "11872436735917473629": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15213766544697966206": ["convolution_gpu_bfyx_os_iyx_osv16", 138],
+        "2360564651185435605": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "8722771796488042004": ["convolution_gpu_bfyx_os_iyx_osv16", 94],
+        "8089184198607217332": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "4031469107004893821": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "16899210497921809352": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "2002591318101502434": ["fully_connected_gpu_bf_io_gemm", 1],
+        "8508119169246513026": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13461678175466315866": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1580848418974169308": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13189391944650202330": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5850612837647497531": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12734736056404146766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3072344987020666532": ["convolution_gpu_bfyx_os_iyx_osv16", 241],
+        "5932710369376133446": ["convolution_gpu_bfyx_os_iyx_osv16", 241],
+        "15493383292734604744": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8721087995946196075": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "16124622994105864663": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "1289727743091243002": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "13254721852483301327": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "14370151670822727933": ["convolution_gpu_bfyx_f16", 8],
+        "15924942580474924301": ["convolution_gpu_bfyx_f16", 8],
+        "8623363904987921339": ["convolution_gpu_bfyx_f16", 8],
+        "14279463126817989625": ["convolution_gpu_bfyx_f16", 8],
+        "5771041403997282348": ["convolution_gpu_bfyx_f16", 7],
+        "13460126563546214581": ["convolution_gpu_bfyx_f16", 8],
+        "12781821122129489865": ["convolution_gpu_bfyx_f16", 8],
+        "5488105527272322800": ["convolution_gpu_bfyx_f16", 6],
+        "17828931984028343371": ["convolution_gpu_bfyx_f16", 8],
+        "12277484678078733815": ["convolution_gpu_bfyx_f16", 8],
+        "4379377123702223052": ["convolution_gpu_bfyx_f16", 6],
+        "13844007082423168759": ["convolution_gpu_bfyx_f16", 8],
+        "3449674399921465807": ["convolution_gpu_bfyx_f16", 8],
+        "8965134305331582692": ["convolution_gpu_bfyx_f16", 6],
+        "15610672058730770735": ["convolution_gpu_bfyx_f16", 8],
+        "5896690968165005425": ["convolution_gpu_bfyx_f16", 8],
+        "3524820477574731101": ["convolution_gpu_bfyx_f16", 8],
+        "18408976645775193874": ["convolution_gpu_bfyx_f16", 3],
+        "153117019091512087": ["convolution_gpu_bfyx_f16", 5],
+        "8416875419376211043": ["convolution_gpu_bfyx_f16", 8],
+        "8583589102830838750": ["convolution_gpu_bfyx_f16", 4],
+        "3120895120217288923": ["convolution_gpu_bfyx_f16", 8],
+        "15703787067071726819": ["convolution_gpu_bfyx_f16", 8],
+        "15071840228430286000": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10604301655933773079": ["convolution_gpu_bfyx_f16", 8],
+        "5325610794742202015": ["convolution_gpu_bfyx_f16", 8],
+        "17964375317877714914": ["convolution_gpu_bfyx_f16", 3],
+        "12053395569113050851": ["convolution_gpu_bfyx_f16", 5],
+        "5324438133148786955": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11947915845132871271": ["convolution_gpu_bfyx_f16", 6],
+        "7174430747851522735": ["convolution_gpu_bfyx_f16", 8],
+        "10817252888446193325": ["convolution_gpu_bfyx_f16", 7],
+        "17227836941611747083": ["convolution_gpu_bfyx_f16", 7],
+        "4263250141898433551": ["convolution_gpu_bfyx_f16", 8],
+        "15197397105147240256": ["convolution_gpu_bfyx_f16", 8],
+        "16826153050492358220": ["convolution_gpu_bfyx_f16", 8],
+        "6280191601341007993": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14088402405545946752": ["convolution_gpu_bfyx_f16", 8],
+        "11924657120112219404": ["convolution_gpu_bfyx_f16", 8],
+        "2913643747915656322": ["convolution_gpu_bfyx_f16", 7],
+        "5972050422651513452": ["convolution_gpu_bfyx_f16", 8],
+        "594987933637320500": ["convolution_gpu_bfyx_f16", 8],
+        "9117780867293066941": ["convolution_gpu_bfyx_f16", 7],
+        "10537265908618264344": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "516889867886123061": ["convolution_gpu_bfyx_f16", 8],
+        "10843546687065559705": ["convolution_gpu_bfyx_f16", 8],
+        "3218921047283180399": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "4228809976880203196": ["convolution_gpu_bfyx_f16", 8],
+        "1254993062868768184": ["convolution_gpu_bfyx_f16", 8],
+        "9020025051393195609": ["convolution_gpu_bfyx_f16", 6],
+        "3651025785330045688": ["convolution_gpu_bfyx_f16", 8],
+        "7338126575707982952": ["convolution_gpu_bfyx_f16", 8],
+        "8920269652980704805": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11886205472834821311": ["convolution_gpu_bfyx_f16", 8],
+        "16386706804437469983": ["convolution_gpu_bfyx_f16", 8],
+        "10891126431353150021": ["convolution_gpu_bfyx_f16", 4],
+        "1543362854403350459": ["convolution_gpu_bfyx_f16", 4],
+        "355779126755706267": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5058842932401709044": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3474787752627590416": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "11397785525222803208": ["convolution_gpu_bfyx_f16", 8],
+        "17189570066626092769": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "5416759191132692795": ["convolution_gpu_bfyx_f16", 8],
+        "15779223980784666571": ["convolution_gpu_bfyx_f16", 8],
+        "9876867732461890358": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4395456194321195850": ["convolution_gpu_bfyx_f16", 8],
+        "17305875411294121419": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14177925973191748560": ["convolution_gpu_bfyx_f16", 8],
+        "11226417030784059608": ["convolution_gpu_bfyx_f16", 8],
+        "13701064960014710666": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7578986390590629232": ["convolution_gpu_bfyx_f16", 6],
+        "18249884212717189127": ["convolution_gpu_bfyx_f16", 8],
+        "15067786896746843371": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9401123449386398137": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16048844891250863961": ["convolution_gpu_bfyx_f16", 8],
+        "6728497031007384694": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2516044816386401962": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16648925195162114604": ["convolution_gpu_bfyx_f16", 7],
+        "5525235637767568664": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1390363802044780888": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4660585027739905927": ["convolution_gpu_bfyx_f16", 6],
+        "13129095945962624168": ["convolution_gpu_bfyx_f16", 8],
+        "12788331791832647513": ["convolution_gpu_bfyx_f16", 8],
+        "14679708279498263758": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11429404684477518332": ["convolution_gpu_bfyx_f16", 8],
+        "14865242305011765347": ["convolution_gpu_bfyx_f16", 8],
+        "3336471572478290650": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11500634857451727324": ["convolution_gpu_bfyx_f16", 6],
+        "11595683408837949967": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3687032763857590951": ["convolution_gpu_bfyx_f16", 4],
+        "6217128641000145091": ["convolution_gpu_bfyx_f16", 4],
+        "16821549364716957301": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15302171095409526152": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11864034994554724984": ["convolution_gpu_bfyx_gemm_like", 0],
+        "14870500658715304450": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17166137878933261586": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4747051200454897627": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "6103297927282359131": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12354577265502528987": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5629707522150533470": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "4669341675413375412": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5824689503286977960": ["convolution_gpu_bfyx_os_iyx_osv16", 1089],
+        "1546913508270024508": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12481630413997884765": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12369473519508202883": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "11638732022457475943": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12849186218218397773": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "6223786629523546158": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "16409268476361727461": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5980922898957821133": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "7460501275697392871": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "18201105716376505767": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12988798514363758096": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7520277356070624225": ["convolution_gpu_bfyx_os_iyx_osv16", 343],
+        "16428007036508355221": ["convolution_gpu_bfyx_os_iyx_osv16", 673],
+        "13507669105800739346": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6145360746375638990": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "1961091377024077375": ["convolution_gpu_bfyx_os_iyx_osv16", 724],
+        "10172928736166564721": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "17782881999244653954": ["convolution_gpu_bfyx_os_iyx_osv16", 725],
+        "6247677935035843769": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1313412961572873590": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8815917103337680437": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "2588641648834517674": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "13375988416811654681": ["convolution_gpu_bfyx_f16", 8],
+        "639248689874473989": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "18230323277806031379": ["convolution_gpu_bfyx_f16", 8],
+        "9840337783900152749": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "9675828062182624375": ["convolution_gpu_bfyx_f16", 8],
+        "9377492070079649297": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7112970727457201985": ["convolution_gpu_bfyx_f16", 7],
+        "11374310483937359607": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10352635863591448343": ["convolution_gpu_bfyx_f16", 7],
+        "6800759166076075555": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16002017704446988989": ["convolution_gpu_bfyx_f16", 7],
+        "18425882095713222772": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17369116353245315394": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "7965652103961413110": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "541252306015518029": ["convolution_gpu_bfyx_f16", 6],
+        "1060404712024230017": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3993499882812656917": ["convolution_gpu_bfyx_f16", 7],
+        "17846701225707639413": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13408643537227938026": ["convolution_gpu_bfyx_f16", 7],
+        "9522850710190578404": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12574188065500921524": ["convolution_gpu_bfyx_f16", 8],
+        "3789554453839587972": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5299625387052100099": ["convolution_gpu_bfyx_f16", 6],
+        "336892846225020589": ["convolution_gpu_bfyx_f16", 6],
+        "5185829229139389046": ["convolution_gpu_bfyx_f16", 5],
+        "2831646144717760351": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18424570657159231491": ["convolution_gpu_bfyx_f16", 5],
+        "9205364888756552960": ["convolution_gpu_bfyx_f16", 3],
+        "3887640479401316139": ["convolution_gpu_bfyx_os_iyx_osv16", 48],
+        "6656241698352770423": ["convolution_gpu_bfyx_f16", 3],
+        "7811986603236019243": ["convolution_gpu_bfyx_f16", 2],
+        "9876053612488794566": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16267089649659533695": ["convolution_gpu_bfyx_f16", 2],
+        "16995874808271476239": ["convolution_gpu_bfyx_f16", 2],
+        "9563406721506757343": ["convolution_gpu_bfyx_os_iyx_osv16", 13],
+        "5267299357355370554": ["convolution_gpu_bfyx_f16", 1],
+        "6658762342071792814": ["convolution_gpu_bfyx_f16", 2],
+        "3847156373714139957": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "2382572392955625224": ["convolution_gpu_bfyx_f16", 1],
+        "15376846894059882538": ["convolution_gpu_bfyx_f16", 6],
+        "4522481920910455569": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "13437481345646491476": ["convolution_gpu_bfyx_f16", 1],
+        "903637112880415287": ["convolution_gpu_bfyx_f16", 2],
+        "11379365004693699817": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "4598588079263356267": ["convolution_gpu_bfyx_f16", 7],
+        "3642481197610192005": ["convolution_gpu_bfyx_f16", 3],
+        "12423446124851974206": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "11020339094339633876": ["convolution_gpu_bfyx_f16", 8],
+        "1366222232036504221": ["convolution_gpu_bfyx_f16", 5],
+        "7837288500475798381": ["convolution_gpu_bfyx_os_iyx_osv16", 64],
+        "2779831597589397721": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14888498856025675875": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13008816286946828339": ["convolution_gpu_bfyx_os_iyx_osv16", 131],
+        "14472562307183930494": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "12260051528344627305": ["convolution_gpu_bfyx_os_iyx_osv16", 877],
+        "12237139830764526217": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "12839904859734107448": ["convolution_gpu_bfyx_os_iyx_osv16", 839],
+        "2557331839687658350": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "14711934417369240383": ["convolution_gpu_bfyx_os_iyx_osv16", 443],
+        "7324956106181658437": ["convolution_gpu_bfyx_f16", 8],
+        "11516100376069186015": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "75988560390265531": ["convolution_gpu_bfyx_f16", 8],
+        "7993548757830399994": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13514240768023629554": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13043388032264307920": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3870546364113237300": ["convolution_gpu_bfyx_f16", 7],
+        "260191733589958832": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15013159908977981805": ["convolution_gpu_bfyx_f16", 6],
+        "9337772532306485903": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "2586645227127931947": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10949794786261718674": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "4075981715729743261": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5260474776491928924": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1199570205321806135": ["convolution_gpu_bfyx_f16", 6],
+        "11375048135247711028": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2566520237302171109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3394745723753563598": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9187084522252003753": ["convolution_gpu_bfyx_f16", 8],
+        "5354859258229104455": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6459996129125419168": ["convolution_gpu_bfyx_f16", 7],
+        "1480958967678326823": ["convolution_gpu_bfyx_os_iyx_osv16", 804],
+        "15068007241112743131": ["convolution_gpu_bfyx_f16", 7],
+        "618975323495168026": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9529518231093074440": ["convolution_gpu_bfyx_f16", 3],
+        "15305384015295940803": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "5539082047551617378": ["convolution_gpu_bfyx_f16", 4],
+        "12707656392447062040": ["convolution_gpu_bfyx_f16", 1],
+        "11606382700107557730": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "12969132519312136058": ["convolution_gpu_bfyx_f16", 7],
+        "842687355344268246": ["convolution_gpu_bfyx_f16", 3],
+        "2656665532089288876": ["convolution_gpu_bfyx_os_iyx_osv16", 376],
+        "2150959290870604234": ["convolution_gpu_bfyx_os_iyx_osv16", 5],
+        "1544327503165399517": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2039872499448437447": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3991348982014278143": ["convolution_gpu_bfyx_f16", 8],
+        "7827718823084060727": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3681376157677527214": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16091816004313541827": ["convolution_gpu_bfyx_os_iyx_osv16", 469],
+        "8758769511439480688": ["convolution_gpu_bfyx_f16", 8],
+        "5753897782370339077": ["convolution_gpu_bfyx_f16", 8],
+        "776679759950033458": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15392321417589373062": ["convolution_gpu_bfyx_os_iyx_osv16", 93],
+        "16658067151111824217": ["convolution_gpu_bfyx_f16", 8],
+        "4205857284855499968": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16586084939410219119": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "6469574179117123167": ["convolution_gpu_bfyx_f16", 8],
+        "16376979276035825608": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8269248435817479295": ["convolution_gpu_bfyx_os_iyx_osv16", 55],
+        "1395225722502733977": ["convolution_gpu_bfyx_f16", 8],
+        "10013178580860124936": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10371314197191515": ["convolution_gpu_bfyx_os_iyx_osv16", 805],
+        "16689758532561396949": ["convolution_gpu_bfyx_f16", 8],
+        "6406311807883630817": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13420164150295884514": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17427856786317070333": ["convolution_gpu_bfyx_f16", 6],
+        "13045564637680284253": ["convolution_gpu_bfyx_f16", 1],
+        "10240575152538862347": ["convolution_gpu_bfyx_f16", 8],
+        "350106201615562244": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2408882959031808890": ["convolution_gpu_bfyx_f16", 8],
+        "13535264758398237992": ["convolution_gpu_bfyx_f16", 8],
+        "8019110633749314726": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17143135464144572440": ["convolution_gpu_bfyx_f16", 6],
+        "7164571433420538604": ["convolution_gpu_bfyx_f16", 8],
+        "759843595724886461": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8349697007955129262": ["convolution_gpu_bfyx_f16", 7],
+        "4612125500221158849": ["convolution_gpu_bfyx_f16", 7],
+        "1613453471546288707": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18082453876418923256": ["convolution_gpu_bfyx_f16", 7],
+        "2573494879369122465": ["convolution_gpu_bfyx_f16", 8],
+        "13178243377999862677": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14420748195672461101": ["convolution_gpu_bfyx_f16", 8],
+        "3780553431140772654": ["convolution_gpu_bfyx_f16", 8],
+        "7022912357125866065": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "21159126741870541": ["convolution_gpu_bfyx_f16", 8],
+        "6041100151446003929": ["convolution_gpu_bfyx_f16", 8],
+        "13214853558757669358": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12243903767779308254": ["convolution_gpu_bfyx_f16", 4],
+        "7684654778898882658": ["convolution_gpu_bfyx_f16", 8],
+        "10861634955820547836": ["convolution_gpu_bfyx_f16", 7],
+        "10937719522646877794": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3996245434452465017": ["convolution_gpu_bfyx_f16", 3],
+        "3949209947065694155": ["convolution_gpu_bfyx_f16", 6],
+        "7904844949382399644": ["convolution_gpu_bfyx_f16", 7],
+        "15791093795050497196": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1904667993331130850": ["convolution_gpu_bfyx_f16", 7],
+        "2007864734836080416": ["convolution_gpu_bfyx_f16", 7],
+        "1504595473072178549": ["fused_conv_eltwise_gpu_ref", 1],
+        "5321304003293302339": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6391803599307981783": ["convolution_gpu_bfyx_f16", 7],
+        "12753622180051769374": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1418471439817460893": ["convolution_gpu_bfyx_f16", 8],
+        "14007410751094259041": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6000600601103213217": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15944256561905190998": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9477177759691465931": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6061863826293001749": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5656695882306435761": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2754361133104409608": ["convolution_gpu_bfyx_f16", 6],
+        "16573836003993562922": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14361485468636686919": ["convolution_gpu_bfyx_f16", 7],
+        "13290181949279279819": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11573991700888599299": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3211944296604564565": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15416497136172272973": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13727677413762125787": ["convolution_gpu_bfyx_f16", 6],
+        "223582465360062033": ["convolution_gpu_bfyx_os_iyx_osv16", 203],
+        "4556622765359080875": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8367602301888142902": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5551657483299070298": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16580040324898643414": ["convolution_gpu_bfyx_f16", 3],
+        "3400482857331511907": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "12745087061597212091": ["convolution_gpu_bfyx_f16", 6],
+        "2427753259358876751": ["convolution_gpu_bfyx_f16", 8],
+        "14928919742046692806": ["convolution_gpu_bfyx_f16", 3],
+        "1086083190921173575": ["convolution_gpu_bfyx_f16", 1],
+        "7864788493798910665": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "419863291071220231": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18341088182899934803": ["convolution_gpu_bfyx_f16", 1],
+        "13936757228914648511": ["convolution_gpu_bfyx_f16", 1],
+        "3620847974583296219": ["convolution_gpu_bfyx_f16", 1],
+        "17043718605372433771": ["convolution_gpu_bfyx_os_iyx_osv16", 40],
+        "9491918828755162849": ["convolution_gpu_bfyx_f16", 8],
+        "5589151165477337040": ["convolution_gpu_bfyx_f16", 2],
+        "10228757350181746895": ["convolution_gpu_bfyx_f16", 1],
+        "1162872816262341718": ["convolution_gpu_bfyx_os_iyx_osv16", 6],
+        "16649287498467160559": ["convolution_gpu_bfyx_os_iyx_osv16", 494],
+        "7586799959717044009": ["convolution_gpu_bfyx_os_iyx_osv16", 486],
+        "9084083435358099350": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "9376423029944831246": ["convolution_gpu_bfyx_os_iyx_osv16", 119],
+        "705554998369361805": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "6245817051936572651": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1108],
+        "1323301183115767024": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "2507934590678243268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14525834002536696135": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "5474206322525908485": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "11515796160198158378": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "3132722606109144321": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14710881631609824591": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "11775792831795740823": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "5638724202264796275": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6886956365972144464": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "7665666161071576188": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 740],
+        "5417472746527799111": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "2594584321222202684": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "922877204324354246": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "7350126920223776235": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16303836867197711105": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7867449406763358779": ["convolution_gpu_bfyx_os_iyx_osv16", 237],
+        "15073897412776684512": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7483651542476843520": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15903567343998302316": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "15583443924198345750": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "17843088220627815484": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "4632062586003457136": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "10545129526795036329": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "8799068442641712278": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14112582871403839539": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 354],
+        "1537120727711441803": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "6319422805741168410": ["convolution_gpu_bfyx_os_iyx_osv16", 469],
+        "4932803782521646509": ["convolution_gpu_bfyx_os_iyx_osv16", 1108],
+        "693915964507763961": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "12231852642666528690": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "6327608958004075948": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10481749345430191494": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "3465618418555443152": ["convolution_gpu_bfyx_os_iyx_osv16", 119],
+        "6220132353152696371": ["convolution_gpu_bfyx_os_iyx_osv16", 487],
+        "767822057476164981": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9047957325396112699": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "4356441299961129632": ["convolution_gpu_bfyx_os_iyx_osv16", 191],
+        "10144632434338007132": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "15158722447225497040": ["convolution_gpu_bfyx_os_iyx_osv16", 994],
+        "14636891429613595743": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "10686925946858146532": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "8212789547545225423": ["convolution_gpu_bfyx_os_iyx_osv16", 838],
+        "11769756626318373236": ["fully_connected_gpu_bf_io_gemm", 2],
+        "5110309993577022127": ["convolution_gpu_bfyx_os_iyx_osv16", 83],
+        "2562131945197556573": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "10704041599214066504": ["convolution_gpu_bfyx_f16", 8],
+        "10613229998051250501": ["convolution_gpu_bfyx_f16", 8],
+        "11371787826925681911": ["convolution_gpu_bfyx_f16", 8],
+        "1813150318517555729": ["convolution_gpu_bfyx_f16", 8],
+        "2771555413518577061": ["convolution_gpu_bfyx_f16", 7],
+        "5185490410687016716": ["convolution_gpu_bfyx_f16", 8],
+        "7950736292930841432": ["convolution_gpu_bfyx_f16", 8],
+        "8261743217235812905": ["convolution_gpu_bfyx_f16", 6],
+        "8477837540026813338": ["convolution_gpu_bfyx_f16", 7],
+        "7870792155742596714": ["convolution_gpu_bfyx_f16", 7],
+        "877301692476873394": ["convolution_gpu_bfyx_f16", 7],
+        "13056385937425838233": ["convolution_gpu_bfyx_f16", 8],
+        "8845096601815863972": ["convolution_gpu_bfyx_f16", 6],
+        "14676936757685089287": ["convolution_gpu_bfyx_f16", 8],
+        "5137720027289968571": ["convolution_gpu_bfyx_f16", 8],
+        "16357238101987779826": ["convolution_gpu_bfyx_f16", 7],
+        "9042736284060217631": ["convolution_gpu_bfyx_f16", 8],
+        "1962817966750882229": ["convolution_gpu_bfyx_gemm_like", 2],
+        "757414390636970088": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "4392731931266884279": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14949235924854278221": ["convolution_gpu_bfyx_os_iyx_osv16", 229],
+        "8109779592266289481": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "12865143364214858603": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "15067445793956191132": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5644043280715935432": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "15401918726121762363": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "9239825223929080442": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "16388921203426413956": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "6777503252254723020": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "11128087433951850083": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "5141881043179760550": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "12802295324029349931": ["convolution_gpu_bfyx_os_iyx_osv16", 1077],
+        "15431628897951881935": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "3589251091292907354": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "6054584798362533079": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "2027062613896109334": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "2494989528221736054": ["convolution_gpu_bfyx_f16", 3],
+        "10481457184081052557": ["convolution_gpu_bfyx_f16", 3],
+        "17843566914419305583": ["convolution_gpu_bfyx_f16", 8],
+        "10440359951914302042": ["convolution_gpu_bfyx_f16", 5],
+        "12355534646291322950": ["convolution_gpu_bfyx_f16", 3],
+        "1312046147551402733": ["convolution_gpu_bfyx_f16", 4],
+        "17747064821498992452": ["convolution_gpu_bfyx_f16", 1],
+        "15727623554601964014": ["convolution_gpu_bfyx_f16", 4],
+        "1123438482147655288": ["convolution_gpu_bfyx_f16", 1],
+        "7126696940487701707": ["convolution_gpu_bfyx_f16", 3],
+        "3872390202906772826": ["convolution_gpu_bfyx_f16", 6],
+        "2880589787553789663": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "4631844879520026809": ["convolution_gpu_bfyx_os_iyx_osv16", 155],
+        "13844000686044797469": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "9270121824344599740": ["convolution_gpu_bfyx_os_iyx_osv16", 159],
+        "16709277754916245782": ["convolution_gpu_bfyx_os_iyx_osv16", 529],
+        "17921065014385217728": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "13953277739655839946": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "365791512696267923": ["convolution_gpu_bfyx_os_iyx_osv16", 157],
+        "1141261355712926031": ["convolution_gpu_bfyx_os_iyx_osv16", 529],
+        "8813719449277469033": ["convolution_gpu_bfyx_os_iyx_osv16", 328],
+        "6092477671894277230": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8722892772000291602": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11520633390649939176": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "12980812349323846110": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7188357493962808046": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "11762345626350030823": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "5428672297616140288": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "10600353264973098791": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "15588761039208349307": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "16123871259057596631": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "14991936861614608527": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "5843416316523596635": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8982180163543777584": ["convolution_gpu_bfyx_os_iyx_osv16", 1104],
+        "9165342275276264623": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "8236012626172975207": ["convolution_gpu_bfyx_f16", 8],
+        "15754415694628496024": ["convolution_gpu_bfyx_f16", 8],
+        "17406936341866296662": ["convolution_gpu_bfyx_f16", 8],
+        "6529037726400888172": ["convolution_gpu_bfyx_f16", 7],
+        "9846172794997922278": ["convolution_gpu_bfyx_f16", 8],
+        "9493935898386112535": ["convolution_gpu_bfyx_f16", 8],
+        "7306292618362586227": ["convolution_gpu_bfyx_f16", 8],
+        "14967562562496852635": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9383921996276611774": ["convolution_gpu_bfyx_f16", 8],
+        "11168147969470549122": ["convolution_gpu_bfyx_f16", 8],
+        "14053664169544819760": ["convolution_gpu_bfyx_f16", 6],
+        "9184733790339562280": ["convolution_gpu_bfyx_f16", 8],
+        "10136778987522491016": ["convolution_gpu_bfyx_f16", 8],
+        "17355459669917546629": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14199620063459632318": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9059694442320091375": ["convolution_gpu_bfyx_f16", 8],
+        "3777808646279316435": ["convolution_gpu_bfyx_f16", 8],
+        "5077764668758006577": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15528598957598089701": ["convolution_gpu_bfyx_f16", 8],
+        "4910602340026479089": ["convolution_gpu_bfyx_f16", 8],
+        "3460615852926593636": ["convolution_gpu_bfyx_f16", 8],
+        "7051704960834828963": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "5849502570947855625": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17320230733736402509": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "14376448497282593859": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11059091112167439040": ["convolution_gpu_bfyx_os_iyx_osv16", 220],
+        "11073613812342958769": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4154541958145867375": ["convolution_gpu_bfyx_os_iyx_osv16", 595],
+        "11497596156215746295": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7989457597882264703": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11224449857742374449": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8019330764912846895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8298488609133255406": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14393217564854520848": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "3141906957984957990": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "8411633870815503324": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "17289238208820562994": ["convolution_gpu_bfyx_os_iyx_osv16", 993],
+        "17376882838565917025": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "376447867595880925": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9223591734176279618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2123481240130017671": ["convolution_gpu_bfyx_gemm_like", 0],
+        "60262519627721258": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5919114362027813213": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1357304910509750335": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2624254602965505549": ["convolution_gpu_bfyx_f16", 8],
+        "5577742374711315791": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "578315994260636114": ["convolution_gpu_bfyx_f16", 8],
+        "1262880924315152695": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6168533266847660009": ["convolution_gpu_bfyx_f16", 8],
+        "14627313247209797163": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1270860549971294137": ["convolution_gpu_bfyx_f16", 8],
+        "4422458267180761143": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16820926361172105951": ["convolution_gpu_bfyx_f16", 8],
+        "7270466581298144020": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17886363415956316754": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "1392628448770002052": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6733088214815340670": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9311722977080169500": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17741687009005052531": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "16599775094194414107": ["convolution_gpu_bfyx_f16", 7],
+        "17406888356387369802": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14665993929606055479": ["convolution_gpu_bfyx_f16", 6],
+        "1257358912309769908": ["convolution_gpu_bfyx_f16", 1],
+        "10136222189601190652": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3326350735262959593": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2567573000230960427": ["convolution_gpu_bfyx_f16", 6],
+        "1291883454136679475": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4612488935509382461": ["convolution_gpu_bfyx_f16", 8],
+        "44873969645629501": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2578924800298320995": ["convolution_gpu_bfyx_f16", 8],
+        "13431627645625703425": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3411924982644761856": ["convolution_gpu_bfyx_f16", 8],
+        "8768537636114686671": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7896309749934295024": ["convolution_gpu_bfyx_f16", 8],
+        "8325953452731944450": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "429170609380225181": ["convolution_gpu_bfyx_f16", 8],
+        "2114765225420794471": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18249809691677461763": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3183599956647450025": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9082014942562277789": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10836200599982993668": ["convolution_gpu_bfyx_f16", 7],
+        "12864139447025655415": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12523250500313140847": ["convolution_gpu_bfyx_f16", 6],
+        "12000695135118665982": ["convolution_gpu_bfyx_os_iyx_osv16", 724],
+        "742817882542885943": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4002541142367187136": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "18334702187170194234": ["convolution_gpu_bfyx_f16", 7],
+        "3948406171098526504": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6872860675057100662": ["convolution_gpu_bfyx_f16", 8],
+        "17681970169299053286": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9324781373847684086": ["convolution_gpu_bfyx_f16", 8],
+        "4773059056534245515": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6177289206979472775": ["convolution_gpu_bfyx_f16", 8],
+        "13523041584984452151": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15908438282639350074": ["convolution_gpu_bfyx_f16", 8],
+        "6449849483344573800": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "874369470058074151": ["convolution_gpu_bfyx_f16", 8],
+        "9693986962988023660": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7166692858921939993": ["convolution_gpu_bfyx_f16", 8],
+        "16550351471125114158": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1398899063819018467": ["convolution_gpu_bfyx_f16", 8],
+        "18211571181565238164": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "6787622700852474159": ["convolution_gpu_bfyx_f16", 8],
+        "9330332380446446861": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3352821985265666302": ["convolution_gpu_bfyx_f16", 7],
+        "13446420473387679707": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10671502596789907716": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "10272839156464101832": ["convolution_gpu_bfyx_os_iyx_osv16", 254],
+        "2437761452427288852": ["convolution_gpu_bfyx_os_iyx_osv16", 172],
+        "10328358317722308811": ["convolution_gpu_bfyx_os_iyx_osv16", 242],
+        "8161893965853155550": ["convolution_gpu_bfyx_os_iyx_osv16", 254],
+        "10517584909517952169": ["convolution_gpu_bfyx_os_iyx_osv16", 997],
+        "16089431087164898643": ["convolution_gpu_bfyx_os_iyx_osv16", 841],
+        "18446632461258501693": ["fully_connected_gpu_bf_io_gemm", 2],
+        "16241580627391428048": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "12772269695217889469": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "11842608083285317080": ["convolution_gpu_bfyx_f16", 8],
+        "47254024987636917": ["convolution_gpu_bfyx_f16", 7],
+        "12203188887109457648": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10190268798161660806": ["convolution_gpu_bfyx_f16", 6],
+        "4893322057900198637": ["convolution_gpu_bfyx_f16", 8],
+        "14205587855693463012": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17828689021236060785": ["convolution_gpu_bfyx_f16", 8],
+        "5304616674637388104": ["convolution_gpu_bfyx_f16", 8],
+        "13774415976138800103": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16917839753648589507": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14219521195638380227": ["convolution_gpu_bfyx_f16", 8],
+        "1124640128461683757": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3648821283586419588": ["convolution_gpu_bfyx_f16", 8],
+        "8994225239524823748": ["convolution_gpu_bfyx_f16", 8],
+        "5364969049701663909": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10800673441196722081": ["convolution_gpu_bfyx_f16", 8],
+        "8758440761491268201": ["convolution_gpu_bfyx_f16", 8],
+        "2681801788585835365": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10429066432771360180": ["convolution_gpu_bfyx_f16", 8],
+        "13396899929422166121": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14329490371610928743": ["convolution_gpu_bfyx_f16", 8],
+        "10157392835347093325": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16379903423760197202": ["convolution_gpu_bfyx_f16", 8],
+        "6051540118588283350": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12727529778630405959": ["convolution_gpu_bfyx_f16", 8],
+        "18203336007027481684": ["convolution_gpu_bfyx_f16", 7],
+        "13735637322201614021": ["convolution_gpu_bfyx_f16", 8],
+        "16203649874514419110": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4970939826398189012": ["convolution_gpu_bfyx_f16", 7],
+        "8872133219292246457": ["convolution_gpu_bfyx_f16", 6],
+        "674741146618474055": ["convolution_gpu_bfyx_f16", 8],
+        "2192109161538544571": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9459049194486919395": ["convolution_gpu_bfyx_f16", 8],
+        "10883566778144231615": ["convolution_gpu_bfyx_f16", 3],
+        "13142382655510339647": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15651803299019465587": ["convolution_gpu_bfyx_f16", 7],
+        "4098800631750270081": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1700646036054230529": ["convolution_gpu_bfyx_f16", 3],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16", 489],
+        "17663469192304546280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "16672038432561840773": ["convolution_gpu_bfyx_gemm_like", 2],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8328046766891245727": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "2862029728492027826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10094608033766589665": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "4938427667130309532": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "37017760060253822": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16044646335477470657": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "571521463360043149": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3579916582911190192": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3771003491521695667": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "15514370342945522276": ["convolution_gpu_bfyx_os_iyx_osv16", 979],
+        "14438262965335231630": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "18041311106624909689": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5471430682416582179": ["convolution_gpu_bfyx_os_iyx_osv16", 235],
+        "18264290105582283647": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17555564884839598291": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "15463465056816958579": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "614603377985036814": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "17657484186971431467": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "16966946384436994988": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8451212914744825089": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "5131348852069018593": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "13619081494170885939": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "14814906622813306907": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "10390270859807723238": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9729987752669765456": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "11111492998730881451": ["convolution_gpu_bfyx_os_iyx_osv16", 1122],
+        "8751030381556349657": ["convolution_gpu_bfyx_os_iyx_osv16", 213],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "3416294810798281053": ["convolution_gpu_bfyx_os_iyx_osv16", 1035],
+        "12066560812164094695": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "9332596500956923556": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "15067550526427941795": ["convolution_gpu_bfyx_os_iyx_osv16", 370],
+        "15428062440621131394": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "12832357598114345067": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1372767468794397354": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16", 349],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16", 742],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "14109435279166116002": ["convolution_gpu_bfyx_f16", 7],
+        "13459688909495870984": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13140258746301602394": ["convolution_gpu_bfyx_f16", 8],
+        "17781214375438792660": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "10287714400844285017": ["convolution_gpu_bfyx_f16", 8],
+        "1565634623724172264": ["convolution_gpu_bfyx_f16", 8],
+        "3176458788783865475": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16205415802333521877": ["convolution_gpu_bfyx_f16", 8],
+        "9501682982876002973": ["convolution_gpu_bfyx_f16", 6],
+        "6632679386692958385": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "3606200006594557304": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9426719661295147907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9327423854596846454": ["convolution_gpu_bfyx_f16", 6],
+        "12461826750063163499": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10919680708143692288": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13973910769569755022": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "16651330026533439491": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14221540347396094429": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16715505791872304993": ["convolution_gpu_bfyx_f16", 6],
+        "18216528544556604342": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16146697325761324781": ["convolution_gpu_bfyx_f16", 6],
+        "9431016105508711343": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "2035553893876765347": ["convolution_gpu_bfyx_f16", 8],
+        "15357486621038352160": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5090963959865039880": ["convolution_gpu_bfyx_f16", 6],
+        "10683642935643589149": ["convolution_gpu_bfyx_f16", 4],
+        "17035059025727718755": ["convolution_gpu_bfyx_f16", 6],
+        "17686802379348903240": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "1168634671898399586": ["convolution_gpu_bfyx_f16", 5],
+        "10871963505418141901": ["convolution_gpu_bfyx_f16", 4],
+        "12780308533167351871": ["convolution_gpu_bfyx_f16", 1],
+        "17814572351621240649": ["convolution_gpu_bfyx_f16", 1],
+        "8199659032184139406": ["convolution_gpu_bfyx_f16", 1],
+        "17292147847795515942": ["convolution_gpu_bfyx_f16", 2],
+        "13050546314117448472": ["convolution_gpu_bfyx_f16", 1],
+        "15661919785407152450": ["convolution_gpu_bfyx_f16", 0],
+        "13361476452589900091": ["convolution_gpu_bfyx_f16", 8],
+        "6082362633323240591": ["convolution_gpu_bfyx_f16", 3],
+        "8046441445847114800": ["convolution_gpu_bfyx_f16", 8],
+        "2833200599027391230": ["convolution_gpu_bfyx_os_iyx_osv16", 381],
+        "12087098149473149843": ["convolution_gpu_bfyx_os_iyx_osv16", 5],
+        "10797803634452978403": ["convolution_gpu_bfyx_os_iyx_osv16", 470],
+        "1460198641822920308": ["convolution_gpu_bfyx_os_iyx_osv16", 139],
+        "7541313557160970669": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "11956882275779755540": ["fully_connected_gpu_fb_io_ref", 2],
+        "16260999652061745783": ["fully_connected_gpu_bfyx_ref", 2],
+        "8746885602394700190": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "9829405698318443184": ["convolution_gpu_bfyx_f16", 8],
+        "11076033670880910796": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16821706771740161698": ["convolution_gpu_bfyx_f16", 8],
+        "9820952616161226599": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8413361786588130499": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1954817953089924262": ["convolution_gpu_bfyx_f16", 8],
+        "1911974730574689700": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12327794035459404641": ["convolution_gpu_bfyx_f16", 8],
+        "13508363642895529597": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "18212917731579404065": ["convolution_gpu_bfyx_f16", 3],
+        "149782878608295661": ["convolution_gpu_bfyx_f16", 8],
+        "18278189975456459234": ["convolution_gpu_bfyx_f16", 4],
+        "5103958102100938512": ["convolution_gpu_bfyx_f16", 4],
+        "8327836810944774590": ["convolution_gpu_bfyx_f16", 6],
+        "17957554514796196053": ["convolution_gpu_bfyx_f16", 3],
+        "11235079901248304624": ["convolution_gpu_bfyx_f16", 4],
+        "12525888646035586976": ["convolution_gpu_bfyx_f16", 8],
+        "4250146685122778746": ["convolution_gpu_bfyx_f16", 4],
+        "345518063251891244": ["convolution_gpu_bfyx_f16", 3],
+        "4150442044954827851": ["convolution_gpu_bfyx_f16", 2],
+        "6010542147949689482": ["convolution_gpu_bfyx_f16", 3],
+        "3511884127716721063": ["convolution_gpu_bfyx_os_iyx_osv16", 53],
+        "17902687769380768374": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5123488908996247917": ["convolution_gpu_bfyx_os_iyx_osv16", 128],
+        "3854084472651875897": ["convolution_gpu_bfyx_os_iyx_osv16", 547],
+        "10803004054574179414": ["convolution_gpu_bfyx_os_iyx_osv16", 884],
+        "8761961047097571733": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "3227291961704566512": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "15502623218270238644": ["convolution_gpu_bfyx_os_iyx_osv16", 541],
+        "13533336063700080325": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "4583484812233029888": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8595156989254845134": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14493123117003003092": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "12372261924257291610": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "1547771611689525848": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15727110405754725012": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10890620280807224744": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "16079792265815446547": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "15384055407657760803": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2464531851392092325": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "5613964218561759893": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11460648773146310189": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6593870431636005244": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "11529036254499853035": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2726453304845436156": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "2607416795507802412": ["convolution_gpu_bfyx_os_iyx_osv16", 1110],
+        "17010201596936918243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "8480598154536665021": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "17881013712456488163": ["convolution_gpu_bfyx_os_iyx_osv16", 68],
+        "9336215801757107337": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "8174421295799601683": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "1967655354607438665": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "4972222030950072866": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "18113157997465675692": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 1],
+        "1472667774257971884": ["convolution_gpu_bfyx_os_iyx_osv16", 828],
+        "7480855342650290772": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "17244746622354078542": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "251775001146378096": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "14235558866846276172": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "18066867692765966577": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "1264200731459756446": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "6968087469917482002": ["convolution_gpu_bfyx_os_iyx_osv16", 77],
+        "1607381610581485984": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "17234843749633035510": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "11516168882438876247": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "8919164618663601566": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "16853448010512574338": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "3010644722195354051": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "17062011653598617580": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "4614875083188849196": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "10859023312681572942": ["convolution_gpu_bfyx_os_iyx_osv16", 619],
+        "1377210419756613502": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17391465283540972493": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1514213112647467874": ["convolution_gpu_bfyx_os_iyx_osv16", 437],
+        "17268633106022870055": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8140122945471321201": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "15079423575410353790": ["convolution_gpu_bfyx_os_iyx_osv16", 997],
+        "13787398748724798340": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "9739119866883611322": ["convolution_gpu_bfyx_os_iyx_osv16", 815],
+        "7151167803631697120": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "16532386511585070092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910582540370962997": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12335148041391647118": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "10689880083512104726": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "8870164706606458004": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "9269498023794081940": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "6779832349039897240": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "13942354789498444722": ["convolution_gpu_bfyx_os_iyx_osv16", 477],
+        "14294764660016835141": ["convolution_gpu_bfyx_os_iyx_osv16", 871],
+        "12323510278692809329": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "5728070995112243570": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "5381496395266530071": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "9712640406795417230": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "15036737419347383878": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "11552594222313787816": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "9399255910184037480": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "10594581016504135920": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "15640487942881889055": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "14165417928501578590": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "12251989236991754721": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "6675363512560434713": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "9831713940431605743": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "6531349504807709133": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "2726501303929773572": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "10439704858943788014": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "18137994263450376706": ["convolution_gpu_bfyx_os_iyx_osv16", 58],
+        "5711991739289045727": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "15255831401757117660": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "3906658058160172747": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "15823433297099049221": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "7829483638597533960": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "14092273913846393837": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "3746578485711843646": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "12228183555926126959": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "8776893332387904786": ["convolution_gpu_bfyx_os_iyx_osv16", 808],
+        "16672299044236704672": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "13309889945947393850": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "15966815420067673043": ["convolution_gpu_bfyx_os_iyx_osv16", 697],
+        "7415938485228396256": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "9655590024687998403": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "14798289196964890724": ["convolution_gpu_bfyx_os_iyx_osv16", 56],
+        "9794684437872784678": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "16729204245488754836": ["convolution_gpu_bfyx_os_iyx_osv16", 836],
+        "15185983488152870534": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "13821372148587948765": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "4727004015814244856": ["convolution_gpu_bfyx_1x1_opt", 0],
+        "1738348894912205653": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "559491455289877068": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "17312172687490475177": ["convolution_gpu_bfyx_os_iyx_osv16", 460],
+        "3470176432841342662": ["convolution_gpu_bfyx_os_iyx_osv16", 205],
+        "8950283515337670839": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "3995072673238444396": ["convolution_gpu_bfyx_1x1_opt", 1],
+        "1238913228370790536": ["convolution_gpu_bfyx_os_iyx_osv16", 461],
+        "928677976151553489": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "4059887681292863495": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "5665180797552893949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7180904384828396567": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17041465029020839746": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8648502659728489503": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "2007359338465363037": ["convolution_gpu_bfyx_os_iyx_osv16", 110],
+        "16300204511212928772": ["convolution_gpu_bfyx_os_iyx_osv16", 589],
+        "10636266218009746496": ["convolution_gpu_bfyx_os_iyx_osv16", 591],
+        "17502734572225953539": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "9266211532252099402": ["fully_connected_gpu_fb_oi_ref", 0],
+        "6763848192987176713": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6123737429963241103": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "10102406370623883494": ["convolution_gpu_bfyx_os_iyx_osv16", 861],
+        "16125206369312086947": ["convolution_gpu_bfyx_os_iyx_osv16", 485],
+        "16927483709629289661": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "3196823812655863240": ["convolution_gpu_bfyx_os_iyx_osv16", 861],
+        "7968691295772769464": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "6100031133333761315": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "4055514200737135942": ["fully_connected_gpu_bfyx_ref", 1],
+        "18141581865855554514": ["convolution_gpu_bfyx_os_iyx_osv16", 485],
+        "16956102699411887521": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "11526253915485637934": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "15696872908795836832": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "15332512198621601617": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "5702206454207934253": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 730],
+        "15414564531144316178": ["convolution_gpu_bfyx_os_iyx_osv16", 247],
+        "386448290084824203": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15390537225231495870": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10038180349007230302": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 330],
+        "6817180081986948843": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1527649565538821618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "7004336584711849988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2157468701794819044": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 733],
+        "15920115680945815097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1034],
+        "17778554668592635168": ["convolution_gpu_bfyx_os_iyx_osv16", 466],
+        "6999571050665340986": ["convolution_gpu_bfyx_os_iyx_osv16", 840],
+        "9879436330613366129": ["convolution_gpu_bfyx_gemm_like", 1],
+        "726019095679197164": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1865317677339946921": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12018933315566840474": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "7606282654661282476": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6201358671959761215": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4829111442270007186": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "7267651931396380072": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "1279682391530947146": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2655979063469551930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14425547983540742516": ["convolution_gpu_bfyx_gemm_like", 2],
+        "981419593633555198": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12324657364444167791": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3246153532847702583": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4202705710324555180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12272318018055307535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "396815044270978782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15633173680908856082": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16635731992372618666": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10418466892824851134": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "3244777852750357718": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "2443758478383854939": ["convolution_gpu_bfyx_os_iyx_osv16", 366],
+        "13503934436248311972": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "2594310972560076285": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2424349375092546581": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7104985983444651979": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "13518747015059826801": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "11675809062974151496": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "4725349695436675084": ["convolution_gpu_bfyx_os_iyx_osv16", 373],
+        "17351243519367619322": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17026338651868178077": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8730407034445893642": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "144434691308306757": ["convolution_gpu_bfyx_os_iyx_osv16", 375],
+        "4114184149613179671": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2558882920723584206": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "16481414687792927331": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "17756651805686889890": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "2228533392085335649": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "9038567144062573854": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1345293381483212104": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "729683192738752814": ["convolution_gpu_bfyx_os_iyx_osv16", 1114],
+        "458997435535883643": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "16955907389221472146": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "17927673764274384911": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "6418222853479731432": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7539191242110313918": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "18014188548165359278": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "16640379332042800496": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "14856197725306980283": ["convolution_gpu_bfyx_os_iyx_osv16", 976],
+        "9279474331309267880": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "5717588912072437191": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1143426643765799488": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "1049385516019456025": ["convolution_gpu_bfyx_os_iyx_osv16", 965],
+        "10766144770072425534": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "6442062011017461761": ["convolution_gpu_bfyx_os_iyx_osv16", 208],
+        "6063490496423709036": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "3892512749863226006": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4970240836537468609": ["convolution_gpu_bfyx_os_iyx_osv16", 962],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 0],
+        "2017817372328795772": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18312913026696855515": ["convolution_gpu_bfyx_os_iyx_osv16", 227],
+        "1323873987880062206": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "7947635298491683844": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3828289925836476678": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "10112041311060264798": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "7966725359592006848": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "2213697863012348994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5200128826708487987": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910238486908592807": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "13616909429370698140": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5170073622279980223": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7110283028091835342": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "16035239784731081694": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8190708817382075098": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "14088072670684726938": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "4594156436010043898": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "11599404585487705575": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "12238796233133147488": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "16062641979970268785": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "17970835612618431265": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "2793976170555467399": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "5268998395189523109": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "10247076603819003292": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "10411646581372174184": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "3783590807023839590": ["convolution_gpu_bfyx_os_iyx_osv16", 712],
+        "13040613656895011417": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "3426085674061936062": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "18191480673111859449": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "3168817659922190247": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "18315877695535348266": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "12547634427503359071": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "16329007163840646462": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "10029877845127663589": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "2314415797696124986": ["convolution_gpu_bfyx_os_iyx_osv16", 1088],
+        "16980380685273501504": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "3178865432099367094": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "14025615946937229331": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "9213611800089847066": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "16929122365386190391": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "2135878993442720196": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "9676824536524126662": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "920276615573431782": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "14160730014298968824": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17736530310730065811": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "2980714886349866400": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16634588113528268855": ["convolution_gpu_bfyx_os_iyx_osv16", 981],
+        "11974061312537998708": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "16035580169248458433": ["convolution_gpu_bfyx_os_iyx_osv16", 617],
+        "9866780121729912726": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "9774829335571618473": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12220806137793480020": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18351615003377381150": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "5523604552813225273": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "7679309022130741323": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "5318931986270088360": ["convolution_gpu_bfyx_gemm_like", 1],
+        "515117191459385744": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8719869282082754142": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "7982863980065943223": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11226945962148431484": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4241838582334505669": ["convolution_gpu_bfyx_gemm_like", 2],
+        "377042666741080260": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18145274589954906463": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6999860230736815298": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "16857606646270000245": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12338108420996610172": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10159450328554854004": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9170293267334520501": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "566685987437510322": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3194003345823695583": ["convolution_gpu_bfyx_os_iyx_osv16", 746],
+        "12107562407862382766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7161737091607459281": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9553813691004246971": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "10335630215626781232": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660045223846569448": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14844074799300904420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5366152766029340057": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "8299878919282539563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18146920703695658789": ["convolution_gpu_bfyx_os_iyx_osv16", 750],
+        "9019625678983697946": ["convolution_gpu_bfyx_os_iyx_osv16", 689],
+        "10578264750808095350": ["convolution_gpu_bfyx_os_iyx_osv16", 1070],
+        "17553228602707603911": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "11544029240137241864": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "3625681568469091400": ["convolution_gpu_bfyx_os_iyx_osv16", 750],
+        "8849298369373186729": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "10796031718453810929": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9053983956770697828": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "6446557539680352152": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1642704598828904520": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "8319779172385327650": ["convolution_gpu_bfyx_os_iyx_osv16", 625],
+        "11579387987720364831": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12754351323109225715": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1114],
+        "7903220569487431556": ["convolution_gpu_bfyx_os_iyx_osv16", 950],
+        "3905190080706902824": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8296759260312471619": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17301520533084822859": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1119],
+        "14740238736074743734": ["convolution_gpu_bfyx_os_iyx_osv16", 244],
+        "11837023395630571569": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18200031323963616161": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 364],
+        "4125453719396313232": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3653945386031463537": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "290357754290893078": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3852245179144851596": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13731852935536160843": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "8777588932609025138": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1316118918790851994": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 367],
+        "11178580933542373407": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 360],
+        "17878041282431477247": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18049861144026923516": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2141454343831534876": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9144400494257163130": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13190119938630028553": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "4903536862079845135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "15066104804156933222": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 743],
+        "13457620264718125011": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1119],
+        "16436525035845780373": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 732],
+        "11501452337228727462": ["convolution_gpu_bfyx_os_iyx_osv16", 199],
+        "14843223893923209210": ["convolution_gpu_bfyx_os_iyx_osv16", 199],
+        "3403065541792865347": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "5747468958285466504": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17552192746313035704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4855884888715402777": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "6932556634380539441": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "9400396209180747044": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "10431774409348875623": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "9495099584417616887": ["convolution_gpu_bfyx_os_iyx_osv16", 998],
+        "9115704215611322151": ["convolution_gpu_bfyx_os_iyx_osv16", 739],
+        "11735107098356940998": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15204384674852423405": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16866113149488400688": ["convolution_gpu_bfyx_os_iyx_osv16", 147],
+        "15389774302738715375": ["convolution_gpu_bfyx_os_iyx_osv16", 627],
+        "8101177730804364242": ["convolution_gpu_bfyx_os_iyx_osv16", 148],
+        "10149791427786334512": ["convolution_gpu_bfyx_os_iyx_osv16", 630],
+        "11053198857132396443": ["convolution_gpu_bfyx_os_iyx_osv16", 283],
+        "3963577328998759824": ["fully_connected_gpu_fb_oi_ref", 2],
+        "800184023925596362": ["convolution_gpu_bfyx_os_iyx_osv16", 493],
+        "13839532421033004873": ["convolution_gpu_bfyx_os_iyx_osv16", 566],
+        "8262487256974801864": ["convolution_gpu_bfyx_os_iyx_osv16", 148],
+        "3693217331248996607": ["convolution_gpu_bfyx_os_iyx_osv16", 898],
+        "10388555096612441710": ["convolution_gpu_bfyx_os_iyx_osv16", 526],
+        "8892698757722619628": ["convolution_gpu_bfyx_os_iyx_osv16", 146],
+        "9606108204575763003": ["convolution_gpu_bfyx_os_iyx_osv16", 577],
+        "8449999818915991236": ["fully_connected_gpu_fb_io_ref", 2],
+        "7933040116770016066": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1919536721555752974": ["convolution_gpu_bfyx_os_iyx_osv16", 1088],
+        "10686800639842865597": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8687217977804450176": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3954066703109036822": ["convolution_gpu_bfyx_gemm_like", 1],
+        "723914723460931977": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11198516910049713685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1635320120115967164": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15344790681368521678": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "12844169781725567332": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17741034184665639196": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15923530138304858829": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10444674910548414627": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10302498589531075361": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4924266705550545296": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18358817826057771246": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5814292023792160102": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11190351855453911732": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9686754964115262880": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10699818671891976144": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11629568560686145289": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2754112975365662883": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14572211541644991947": ["convolution_gpu_bfyx_os_iyx_osv16", 49],
+        "15460159349027866277": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "1933120851078072002": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "15544724104656453486": ["convolution_gpu_bfyx_os_iyx_osv16", 359],
+        "9953946296788154289": ["convolution_gpu_bfyx_os_iyx_osv16", 592],
+        "5949275355217152112": ["convolution_gpu_bfyx_os_iyx_osv16", 227],
+        "9953648472305845286": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 366],
+        "4585615709600143734": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "5688607327240251933": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17872945111265083716": ["convolution_gpu_bfyx_os_iyx_osv16", 599],
+        "7002575346587056029": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "4053858347143322566": ["convolution_gpu_bfyx_os_iyx_osv16", 729],
+        "15684381282886192452": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9172655573618628060": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1109],
+        "10794126133490266436": ["convolution_gpu_bfyx_os_iyx_osv16", 1105],
+        "13850228162972171575": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "129286539782466549": ["convolution_gpu_bfyx_os_iyx_osv16", 595],
+        "405864173902226347": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "11446357246069900060": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "11612145813762780082": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15323010740285064115": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "9782042377801038578": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "15340106601175659588": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3000754961057044652": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13882543862049484032": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1037],
+        "459319667430150397": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12757674875116871887": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16119575123089076330": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "17015151842140598799": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "634038212244146017": ["convolution_gpu_bfyx_os_iyx_osv16", 80],
+        "3726173595578668243": ["convolution_gpu_bfyx_f16", 8],
+        "1069242824083103727": ["convolution_gpu_bfyx_f16", 8],
+        "10139803717927136766": ["convolution_gpu_bfyx_f16", 8],
+        "10426525571408284384": ["convolution_gpu_bfyx_f16", 8],
+        "6036447764961737632": ["convolution_gpu_bfyx_f16", 8],
+        "16859712173301423348": ["convolution_gpu_bfyx_f16", 8],
+        "4950939249231517650": ["convolution_gpu_bfyx_f16", 8],
+        "15428640534166306063": ["convolution_gpu_bfyx_f16", 8],
+        "12539440450141711052": ["convolution_gpu_bfyx_f16", 8],
+        "4694865878411993051": ["convolution_gpu_bfyx_f16", 8],
+        "7855581105034231853": ["convolution_gpu_bfyx_f16", 8],
+        "16357120378854173738": ["convolution_gpu_bfyx_f16", 8],
+        "9788176856201644185": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3526857091962358658": ["convolution_gpu_bfyx_f16", 8],
+        "9524927752153133377": ["convolution_gpu_bfyx_f16", 5],
+        "967593872851912083": ["convolution_gpu_bfyx_f16", 8],
+        "8544250266821361254": ["convolution_gpu_bfyx_f16", 5],
+        "14702583823206509221": ["convolution_gpu_bfyx_f16", 8],
+        "6562594370920553562": ["convolution_gpu_bfyx_f16", 5],
+        "4871626169134099270": ["convolution_gpu_bfyx_f16", 8],
+        "4306257530819109379": ["convolution_gpu_bfyx_f16", 5],
+        "13097490329579729355": ["convolution_gpu_bfyx_f16", 5],
+        "7536472342317469819": ["convolution_gpu_bfyx_f16", 8],
+        "17240729682157914878": ["convolution_gpu_bfyx_f16", 5],
+        "4338687769151300794": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9217611707355973890": ["convolution_gpu_bfyx_f16", 5],
+        "16565126239389697019": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9706046427344615745": ["convolution_gpu_bfyx_f16", 5],
+        "8724624785920420532": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3678291868919586746": ["convolution_gpu_bfyx_f16", 5],
+        "357806365552700839": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13063387805113848039": ["convolution_gpu_bfyx_f16", 5],
+        "1557184360709050836": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8608461026786312785": ["convolution_gpu_bfyx_f16", 5],
+        "9987273496502066597": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "64106675123073412": ["convolution_gpu_bfyx_f16", 5],
+        "4220695701755939736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12963348434542940033": ["convolution_gpu_bfyx_f16", 7],
+        "16181124988724765560": ["convolution_gpu_bfyx_f16", 4],
+        "346998321908284784": ["convolution_gpu_bfyx_f16", 6],
+        "2318421272788358186": ["convolution_gpu_bfyx_f16", 3],
+        "15927802155084275629": ["convolution_gpu_bfyx_f16", 6],
+        "8773070973133375779": ["convolution_gpu_bfyx_f16", 4],
+        "9940763571380473237": ["convolution_gpu_bfyx_f16", 7],
+        "16277913671917468663": ["convolution_gpu_bfyx_f16", 3],
+        "1474918596978458534": ["convolution_gpu_bfyx_f16", 7],
+        "2186150200961617234": ["convolution_gpu_bfyx_f16", 3],
+        "10577259940464718041": ["convolution_gpu_bfyx_f16", 6],
+        "10352584043544857764": ["convolution_gpu_bfyx_f16", 4],
+        "9144746358156959840": ["convolution_gpu_bfyx_f16", 7],
+        "13301166545153738930": ["convolution_gpu_bfyx_f16", 4],
+        "10753675657145151848": ["convolution_gpu_bfyx_f16", 6],
+        "10604750453275830911": ["convolution_gpu_bfyx_f16", 3],
+        "9243411386937443096": ["convolution_gpu_bfyx_f16", 7],
+        "12042818423431873035": ["convolution_gpu_bfyx_f16", 4],
+        "6683976234770455967": ["convolution_gpu_bfyx_f16", 7],
+        "6298190398591064450": ["convolution_gpu_bfyx_f16", 3],
+        "17196237025206156806": ["convolution_gpu_bfyx_f16", 6],
+        "5853381784506376944": ["convolution_gpu_bfyx_f16", 4],
+        "7339440798895952661": ["convolution_gpu_bfyx_f16", 6],
+        "309066171876496786": ["convolution_gpu_bfyx_f16", 5],
+        "17843616251377971109": ["convolution_gpu_bfyx_f16", 7],
+        "12223137580096133095": ["convolution_gpu_bfyx_f16", 3],
+        "7577659638199402167": ["convolution_gpu_bfyx_f16", 6],
+        "565723015051709107": ["convolution_gpu_bfyx_f16", 4],
+        "14416887345595384816": ["convolution_gpu_bfyx_f16", 6],
+        "13314165049380641802": ["convolution_gpu_bfyx_f16", 3],
+        "7520511107200802065": ["convolution_gpu_bfyx_f16", 7],
+        "11534561269762454076": ["convolution_gpu_bfyx_f16", 3],
+        "10368570488453413379": ["convolution_gpu_bfyx_f16", 7],
+        "15747873854346463294": ["convolution_gpu_bfyx_f16", 3],
+        "7824157744505687913": ["convolution_gpu_bfyx_f16", 7],
+        "5462648317757708951": ["convolution_gpu_bfyx_f16", 4],
+        "3493741914954272091": ["convolution_gpu_bfyx_f16", 6],
+        "18286084829637877271": ["convolution_gpu_bfyx_f16", 3],
+        "260499864874634958": ["convolution_gpu_bfyx_f16", 7],
+        "10167218530612525698": ["convolution_gpu_bfyx_f16", 3],
+        "11647470184823377234": ["convolution_gpu_bfyx_f16", 6],
+        "6976222743405170101": ["convolution_gpu_bfyx_f16", 4],
+        "7655642513340250684": ["convolution_gpu_bfyx_f16", 6],
+        "2708987188750383204": ["convolution_gpu_bfyx_f16", 4],
+        "3147813143325864684": ["convolution_gpu_bfyx_f16", 7],
+        "13481932492220060429": ["convolution_gpu_bfyx_f16", 4],
+        "8069058927528586404": ["convolution_gpu_bfyx_f16", 7],
+        "9624255156096106627": ["convolution_gpu_bfyx_f16", 3],
+        "17730913632234504096": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11384790797228210583": ["convolution_gpu_bfyx_f16", 3],
+        "16177287431434086806": ["convolution_gpu_bfyx_f16", 2],
+        "2990533830830456778": ["convolution_gpu_bfyx_f16", 4],
+        "8610276394762287397": ["convolution_gpu_bfyx_f16", 1],
+        "14889103084722200470": ["convolution_gpu_bfyx_f16", 3],
+        "1845895244697890167": ["convolution_gpu_bfyx_f16", 1],
+        "9079010613051503735": ["convolution_gpu_bfyx_f16", 4],
+        "12061818277351885597": ["convolution_gpu_bfyx_f16", 2],
+        "9390843066348290833": ["convolution_gpu_bfyx_f16", 4],
+        "10509352827759959818": ["convolution_gpu_bfyx_f16", 1],
+        "7121505015354362475": ["convolution_gpu_bfyx_f16", 3],
+        "3145839553769702558": ["convolution_gpu_bfyx_f16", 2],
+        "9437978197962731993": ["convolution_gpu_bfyx_f16", 4],
+        "16274951933822979821": ["convolution_gpu_bfyx_f16", 2],
+        "14030311264395486109": ["convolution_gpu_bfyx_f16", 3],
+        "6745402198112522691": ["convolution_gpu_bfyx_f16", 2],
+        "17535374606849768070": ["convolution_gpu_bfyx_f16", 5],
+        "13107074908777587001": ["convolution_gpu_bfyx_f16", 2],
+        "12441704244463007888": ["convolution_gpu_bfyx_f16", 4],
+        "9830487478445609618": ["convolution_gpu_bfyx_f16", 1],
+        "2607686439369816702": ["convolution_gpu_bfyx_f16", 3],
+        "11952384679771234258": ["convolution_gpu_bfyx_f16", 2],
+        "3189741427811982954": ["convolution_gpu_bfyx_f16", 4],
+        "7501115822974560125": ["convolution_gpu_bfyx_f16", 2],
+        "5461533362170148981": ["convolution_gpu_bfyx_f16", 4],
+        "10622846706558433994": ["convolution_gpu_bfyx_f16", 2],
+        "14985143127047962687": ["convolution_gpu_bfyx_f16", 4],
+        "9631129065088682473": ["convolution_gpu_bfyx_f16", 2],
+        "9287906640814562678": ["convolution_gpu_bfyx_f16", 5],
+        "10312813290107807302": ["convolution_gpu_bfyx_f16", 1],
+        "12443171163993705676": ["convolution_gpu_bfyx_f16", 3],
+        "3168498630594159758": ["convolution_gpu_bfyx_f16", 1],
+        "1224004372693674977": ["convolution_gpu_bfyx_f16", 6],
+        "11479153223948565455": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15137118881649312407": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7380413826069265610": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16535858081334660130": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "3621905235571219180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15946837476334836670": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "245178301664812042": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11536204967390696799": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13202661087717766278": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17082033214052891239": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10972993149458384549": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13266975232886004160": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "5239323177752135143": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13950458285304028472": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1153656272296563651": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15832393447136864275": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13449769853632530": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16481491209623188639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16355932574879498582": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "9885117015102902622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17948745397003387421": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6169584310346033045": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11946156629252758613": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8766639290602892682": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4124732995953832580": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14120940518810838558": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15477415938111847293": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7899374704077099747": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1738224818674864374": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4675498016268563894": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11678653628752466495": ["convolution_gpu_bfyx_gemm_like", 2],
+        "823094503720427089": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "6268238156027633260": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12067387912557140291": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14700484317091478179": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5093753362153705304": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7185731190256343440": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7147929965532955967": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "11272978444176415320": ["convolution_gpu_bfyx_os_iyx_osv16", 707],
+        "3664831747298375482": ["convolution_gpu_bfyx_os_iyx_osv16", 1122],
+        "5055315246446375474": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "11248871352103466387": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "14138271699174946769": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "11248138620600796041": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "8218608499996018829": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "492405382055839338": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "13627463949725014842": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "10442692749607465731": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "5257716983547940732": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "4531738938698034182": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "4103900860372048770": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "1763848406836981250": ["convolution_gpu_bfyx_os_iyx_osv16", 748],
+        "13050289716763141821": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "5246872552943832761": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "8103482664263052993": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "4890599355418453618": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "13440603011986281192": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "7470027005329223304": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "10193635775409684341": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "9727214793392528330": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "10481905734789810461": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "17748868035178556381": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "1557394183568627973": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "1431347831018127681": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "11729412526159852880": ["convolution_gpu_bfyx_os_iyx_osv16", 1125],
+        "4899105740108544338": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "8050406060207298909": ["convolution_gpu_bfyx_os_iyx_osv16", 1127],
+        "7380902367877842940": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "12400142005537988277": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "7084726217254409262": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "13881126705282937733": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "3268450385258447029": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "3315969006703902437": ["convolution_gpu_bfyx_os_iyx_osv16", 373],
+        "7995430380267318045": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "13355664807789465988": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "1814690350132893834": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "10977798741323641518": ["convolution_gpu_bfyx_os_iyx_osv16", 1126],
+        "16290685659520662243": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "14814993085047057124": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "16036211705705298060": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3314627126439576532": ["convolution_gpu_bfyx_os_iyx_osv16", 374],
+        "5397150622881607923": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "4417629288282219686": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "2593493324630665553": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "2115136697391853510": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "3903972756038760641": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "18309964708787622418": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "10898709444676724488": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "3114718546872961667": ["convolution_gpu_bfyx_os_iyx_osv16", 212],
+        "4116817191288103322": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "5759507923877307269": ["convolution_gpu_bfyx_os_iyx_osv16", 591],
+        "13521523772245595449": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "7025699501997365179": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "17325198932789845471": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "1929216390450946038": ["convolution_gpu_bfyx_os_iyx_osv16", 587],
+        "9359713794448163515": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "1064765432017421754": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "17903113127620271097": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "955947984048164651": ["convolution_gpu_bfyx_os_iyx_osv16", 215],
+        "6871124717336911723": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "17054742656500024833": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "8735118147118298928": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "7689370938722443575": ["convolution_gpu_bfyx_os_iyx_osv16", 589],
+        "7389433284327478008": ["convolution_gpu_bfyx_os_iyx_osv16", 602],
+        "6352588504037946062": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "3420065266906936372": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "5158493429539582334": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "8584667522373731666": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "16628885743804758299": ["convolution_gpu_bfyx_os_iyx_osv16", 210],
+        "9979885527081183609": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "11585377068025763798": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "270198976247871883": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "14806119107242947719": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "6237698548794601324": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "16586342221264661586": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "8378911742901238960": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "8878591357527094058": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16", 403],
+        "16142734280696556211": ["convolution_gpu_bfyx_f16", 8],
+        "635140168178230171": ["convolution_gpu_bfyx_f16", 8],
+        "17935287735372634102": ["convolution_gpu_bfyx_f16", 8],
+        "15817877524852645836": ["convolution_gpu_bfyx_f16", 8],
+        "10065955805093424080": ["convolution_gpu_bfyx_f16", 8],
+        "11821370621780817632": ["convolution_gpu_bfyx_f16", 8],
+        "677921946529877110": ["convolution_gpu_bfyx_f16", 8],
+        "5361664571196670427": ["convolution_gpu_bfyx_f16", 8],
+        "2901538337520242272": ["convolution_gpu_bfyx_f16", 8],
+        "5581843211058265455": ["convolution_gpu_bfyx_f16", 8],
+        "217667049553318429": ["convolution_gpu_bfyx_f16", 7],
+        "5337496722551766654": ["convolution_gpu_bfyx_f16", 8],
+        "52740663361396709": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6991371618000668418": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2326385631302475177": ["convolution_gpu_bfyx_f16", 6],
+        "8721996744048476299": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "453498137980697662": ["convolution_gpu_bfyx_f16", 7],
+        "15807266772870766609": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6553421087532441250": ["convolution_gpu_bfyx_f16", 7],
+        "12573289076827071790": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8853947103468767323": ["convolution_gpu_bfyx_f16", 6],
+        "6453143304950619430": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1775677589702924323": ["convolution_gpu_bfyx_f16", 7],
+        "16761512340234377511": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2448165393673590598": ["convolution_gpu_bfyx_f16", 7],
+        "11041313275514857930": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8908290078256179450": ["convolution_gpu_bfyx_f16", 6],
+        "6872057470208040983": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "3462663905986148169": ["convolution_gpu_bfyx_f16", 6],
+        "9998472323723395768": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9695005447848657794": ["convolution_gpu_bfyx_f16", 7],
+        "864050420562880191": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "16884753149447117871": ["convolution_gpu_bfyx_f16", 7],
+        "9413300293443003372": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9584473138046573481": ["convolution_gpu_bfyx_f16", 7],
+        "17226124546002868085": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5703305874425530284": ["convolution_gpu_bfyx_f16", 7],
+        "16357533604618943588": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8568882981604412701": ["convolution_gpu_bfyx_f16", 6],
+        "6735600860810305128": ["convolution_gpu_bfyx_f16", 4],
+        "9976345793999587972": ["convolution_gpu_bfyx_f16", 6],
+        "15346869959264738522": ["convolution_gpu_bfyx_f16", 4],
+        "18151038936580799249": ["convolution_gpu_bfyx_f16", 7],
+        "11956105843463290323": ["convolution_gpu_bfyx_f16", 3],
+        "2197043795215802833": ["convolution_gpu_bfyx_f16", 7],
+        "7837223160972083111": ["convolution_gpu_bfyx_f16", 4],
+        "17991319065386721750": ["convolution_gpu_bfyx_f16", 7],
+        "8684426249485914306": ["convolution_gpu_bfyx_f16", 4],
+        "15440765487742350713": ["convolution_gpu_bfyx_f16", 7],
+        "4006988924644151380": ["convolution_gpu_bfyx_f16", 3],
+        "1165323482766442288": ["convolution_gpu_bfyx_f16", 7],
+        "6216179328027568162": ["convolution_gpu_bfyx_f16", 3],
+        "5085232160533811804": ["convolution_gpu_bfyx_f16", 6],
+        "5560503476513957999": ["convolution_gpu_bfyx_f16", 4],
+        "11899886655444339788": ["convolution_gpu_bfyx_f16", 7],
+        "8035035668897300219": ["convolution_gpu_bfyx_f16", 4],
+        "15531280953380757927": ["convolution_gpu_bfyx_f16", 6],
+        "5417611188973238514": ["convolution_gpu_bfyx_f16", 4],
+        "13845305820052266938": ["convolution_gpu_bfyx_f16", 6],
+        "156328565120552800": ["convolution_gpu_bfyx_f16", 4],
+        "15783591814248428053": ["convolution_gpu_bfyx_f16", 6],
+        "5753913342838023682": ["convolution_gpu_bfyx_f16", 4],
+        "3207990305547692029": ["convolution_gpu_bfyx_f16", 6],
+        "18084824492918706199": ["convolution_gpu_bfyx_f16", 4],
+        "8033743776899693075": ["convolution_gpu_bfyx_f16", 7],
+        "243712386211233379": ["convolution_gpu_bfyx_f16", 3],
+        "2965177266959923348": ["convolution_gpu_bfyx_f16", 7],
+        "13237451337340946362": ["convolution_gpu_bfyx_f16", 3],
+        "9188120772772842413": ["convolution_gpu_bfyx_f16", 7],
+        "1249134296559537004": ["convolution_gpu_bfyx_f16", 4],
+        "6776437678382831419": ["convolution_gpu_bfyx_f16", 6],
+        "9140223146321937006": ["convolution_gpu_bfyx_f16", 3],
+        "7509732267784929557": ["convolution_gpu_bfyx_f16", 7],
+        "9869335174149535367": ["convolution_gpu_bfyx_f16", 3],
+        "15410089184813419927": ["convolution_gpu_bfyx_f16", 6],
+        "12736591082694609735": ["convolution_gpu_bfyx_f16", 3],
+        "10111465201148839782": ["convolution_gpu_bfyx_f16", 6],
+        "6977012639021700914": ["convolution_gpu_bfyx_f16", 3],
+        "10452382209692659038": ["convolution_gpu_bfyx_f16", 6],
+        "13099335757796409253": ["convolution_gpu_bfyx_f16", 3],
+        "8355446198162136384": ["convolution_gpu_bfyx_f16", 6],
+        "6457714394569252436": ["convolution_gpu_bfyx_f16", 3],
+        "1870949498151438396": ["convolution_gpu_bfyx_f16", 7],
+        "6325249952936664765": ["convolution_gpu_bfyx_f16", 4],
+        "4283372428897156128": ["convolution_gpu_bfyx_f16", 7],
+        "15284708683366527091": ["convolution_gpu_bfyx_f16", 4],
+        "12367140420770161260": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17302868757320805407": ["convolution_gpu_bfyx_f16", 3],
+        "12812798569408798714": ["convolution_gpu_bfyx_f16", 5],
+        "18027642894783121874": ["convolution_gpu_bfyx_f16", 3],
+        "3766561909462900481": ["convolution_gpu_bfyx_f16", 4],
+        "8126433884587687354": ["convolution_gpu_bfyx_f16", 4],
+        "7431739774665400867": ["convolution_gpu_bfyx_f16", 5],
+        "15213968303698655071": ["convolution_gpu_bfyx_f16", 4],
+        "1895954773577076065": ["convolution_gpu_bfyx_f16", 4],
+        "10820634669412096693": ["convolution_gpu_bfyx_f16", 4],
+        "9105871040526273510": ["convolution_gpu_bfyx_f16", 4],
+        "6253056982440997971": ["convolution_gpu_bfyx_f16", 3],
+        "14271936409538632354": ["convolution_gpu_bfyx_f16", 5],
+        "7830723669305086809": ["convolution_gpu_bfyx_f16", 3],
+        "16905205856195133489": ["convolution_gpu_bfyx_f16", 4],
+        "17744780595721014433": ["convolution_gpu_bfyx_f16", 3],
+        "1185658428449577287": ["convolution_gpu_bfyx_f16", 5],
+        "4322844512730914538": ["convolution_gpu_bfyx_f16", 4],
+        "8559998096869077061": ["convolution_gpu_bfyx_f16", 5],
+        "12935328860605637188": ["convolution_gpu_bfyx_f16", 3],
+        "17826095303533956022": ["convolution_gpu_bfyx_f16", 3],
+        "6059064882469521870": ["convolution_gpu_bfyx_f16", 3],
+        "17987726224817029150": ["convolution_gpu_bfyx_f16", 4],
+        "1752617074755449766": ["convolution_gpu_bfyx_f16", 4],
+        "1147527648969475665": ["convolution_gpu_bfyx_f16", 5],
+        "336079374726362009": ["convolution_gpu_bfyx_f16", 3],
+        "3956037701575034246": ["convolution_gpu_bfyx_f16", 4],
+        "9177200416044551211": ["convolution_gpu_bfyx_f16", 3],
+        "3580337905402094261": ["convolution_gpu_bfyx_f16", 5],
+        "8657404564308325878": ["convolution_gpu_bfyx_f16", 4],
+        "9660551017019324634": ["convolution_gpu_bfyx_f16", 3],
+        "2283387892607580344": ["convolution_gpu_bfyx_f16", 4],
+        "9757276965383246450": ["convolution_gpu_bfyx_f16", 5],
+        "5662627047941545281": ["convolution_gpu_bfyx_f16", 6],
+        "4652102901251847499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8552605555461651066": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4821707856043228388": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like", 2],
+        "867673900353092030": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9400507072890048966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13391871893495885313": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10371076921125171059": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10315090439844489700": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "671453551040072499": ["convolution_gpu_bfyx_os_iyx_osv16", 119],
+        "7957019749780783255": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "14034525799882831106": ["convolution_gpu_bfyx_os_iyx_osv16", 869],
+        "3916912615549949771": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5115007207028125638": ["convolution_gpu_bfyx_os_iyx_osv16", 870],
+        "3702373232430988630": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7913076120244203725": ["convolution_gpu_bfyx_os_iyx_osv16", 866],
+        "17778091287904736965": ["convolution_gpu_bfyx_os_iyx_osv16", 868],
+        "16866405531619284081": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10645625090439446714": ["convolution_gpu_bfyx_os_iyx_osv16", 114],
+        "3118240332710616352": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7450417963648518926": ["convolution_gpu_bfyx_os_iyx_osv16", 491],
+        "18271341717679165017": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1520529227443340435": ["convolution_gpu_bfyx_os_iyx_osv16", 866],
+        "6547588888976666790": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2920840796593281126": ["convolution_gpu_bfyx_os_iyx_osv16", 867],
+        "3243287355593359731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15289152041466330689": ["convolution_gpu_bfyx_os_iyx_osv16", 117],
+        "11745487821055710420": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10330180429524641331": ["convolution_gpu_bfyx_os_iyx_osv16", 114],
+        "2413743706626149595": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17228810554159747400": ["convolution_gpu_bfyx_os_iyx_osv16", 118],
+        "2891977832675907820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5140042030231193807": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "12362834244136780846": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "12975331316527510995": ["convolution_gpu_bfyx_os_iyx_osv16", 1073],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "11296280342006832013": ["convolution_gpu_bfyx_os_iyx_osv16", 321],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "2729382724566640622": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "1973144337799131575": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "11237620198863831646": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "13522230668952002294": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "1878253869657286717": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "15537416934472628620": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "1614676161640914325": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "7887122837178625925": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "13932612600851474669": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "108336648992892440": ["convolution_gpu_bfyx_os_iyx_osv16", 696],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "10684345634354913297": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "10916615896929712681": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "6401617291202138329": ["convolution_gpu_bfyx_os_iyx_osv16", 1082],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "14418145752469985573": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "10000629948062903268": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "17913158947435785150": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3683538222536942924": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "3497309410275654168": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "5849203144808104114": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "6079947803671938062": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "10023464714622430341": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "10125169683435871224": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "16183189414217717282": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16", 206],
+        "4778769961736466493": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "14240807033488944743": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "9611215430798915107": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "13082046205786468713": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "12207197008210652563": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "10581403540319621428": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "8583043839495629208": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16", 958],
+        "14447820502121172060": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "16001364310945493562": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "8482359546526573989": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16", 957],
+        "1012101590389722479": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16", 580],
+        "14078917033502693044": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "3484370445244910200": ["convolution_gpu_bfyx_1x1_opt", 2],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16", 956],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16", 9],
+        "2438463778071005693": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10241616750018729197": ["convolution_gpu_bfyx_os_iyx_osv16", 257],
+        "16093736249698386830": ["convolution_gpu_bfyx_os_iyx_osv16", 99],
+        "15577855965797137317": ["fully_connected_gpu_fb_oi_ref", 1],
+        "2793239401424346732": ["fully_connected_gpu_fb_oi_ref", 1],
+        "1090168454685651958": ["fully_connected_gpu_fb_oi_ref", 2],
+        "8694043970360551765": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8773350383870039461": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "18102285308171488538": ["convolution_gpu_bfyx_os_iyx_osv16", 471],
+        "4504463103561729721": ["fully_connected_gpu_fb_io_ref", 1],
+        "13026398103046869012": ["fully_connected_gpu_fb_oi_ref", 2],
+        "4938053383542014494": ["fully_connected_gpu_fb_oi_ref", 2],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16", 1100],
+        "10292349730148518173": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "17322754821646330275": ["convolution_gpu_bfyx_os_iyx_osv16", 564],
+        "9463001223908267526": ["convolution_gpu_bfyx_os_iyx_osv16", 673],
+        "2819993544283340217": ["convolution_gpu_bfyx_os_iyx_osv16", 149],
+        "4891941794728322149": ["convolution_gpu_bfyx_os_iyx_osv16", 294],
+        "17966409116732724850": ["convolution_gpu_bfyx_os_iyx_osv16", 679],
+        "16009549743559486766": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14591935906857802585": ["convolution_gpu_bfyx_os_iyx_osv16", 736],
+        "111424963409848995": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2739383731123097925": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17284261626529871462": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "5668693380660004839": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "12579230945548766456": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5167557197439368430": ["convolution_gpu_bfyx_gemm_like", 1],
+        "1540552565663233708": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "16096568902203474447": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5028262864972382565": ["convolution_gpu_bfyx_gemm_like", 0],
+        "1974417291828577": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "2599172922002088957": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11660160310320618383": ["convolution_gpu_bfyx_gemm_like", 0],
+        "6321333724966975926": ["convolution_gpu_bfyx_os_iyx_osv16", 342],
+        "13769852278335802471": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "79817180213970569": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7224734161984848733": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2597920881875761524": ["convolution_gpu_bfyx_os_iyx_osv16", 264],
+        "17470658487460623535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3304768856579090475": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13876951907579147655": ["convolution_gpu_bfyx_os_iyx_osv16", 261],
+        "10366703264083184092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7178492718471026756": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2607889212984737257": ["convolution_gpu_bfyx_os_iyx_osv16", 298],
+        "6528945595038330865": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9831986499172731633": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18440050172847926353": ["convolution_gpu_bfyx_os_iyx_osv16", 257],
+        "6512088599266777589": ["convolution_gpu_bfyx_os_iyx_osv16", 640],
+        "10631671892805059138": ["convolution_gpu_bfyx_os_iyx_osv16", 154],
+        "13517627553690454113": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "17864904691465978047": ["convolution_gpu_bfyx_os_iyx_osv16", 158],
+        "7688613129211669281": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "15276587352894128846": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "16583642152876546031": ["convolution_gpu_bfyx_os_iyx_osv16", 327],
+        "1540351396976309640": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "12018060391889249406": ["convolution_gpu_bfyx_os_iyx_osv16", 529],
+        "1703594828023385832": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "16671415101494484639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4660166087476681397": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "824242000358871449": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "9894766303335506733": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17266480567140619519": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "13198159541095771298": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "3117760785038488579": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "1476464784116064433": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4485934013026623941": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "16673650204498772920": ["convolution_gpu_bfyx_os_iyx_osv16", 219],
+        "8328912827514946731": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15425046562310745575": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9559768114277499815": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "8904325051665606784": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2193347488577584488": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6699483770041820657": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "789366296550494453": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14473138580870542149": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "1428800094127546021": ["convolution_gpu_bfyx_os_iyx_osv16", 1015],
+        "14142504888572786665": ["convolution_gpu_bfyx_os_iyx_osv16", 271],
+        "1984025014517619256": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7957167898986800985": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "412995552853553524": ["convolution_gpu_bfyx_f16", 7],
+        "7058232330882130703": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15549425900373079382": ["convolution_gpu_bfyx_f16", 8],
+        "2713038204741622907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1878980012173918209": ["convolution_gpu_bfyx_f16", 8],
+        "12468208151780727122": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6674575974748163031": ["convolution_gpu_bfyx_f16", 8],
+        "5591111867402032949": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3413916493145831316": ["convolution_gpu_bfyx_f16", 8],
+        "12421615174911349736": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689084255978323672": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12474210147973914830": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14174889288973953645": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18224887830367116006": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16049847963625476676": ["convolution_gpu_bfyx_os_iyx_osv16", 499],
+        "3817623781909159313": ["convolution_gpu_bfyx_f16", 7],
+        "3004968067582685285": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6876765637331622545": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6802301901709446085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13245964863324091195": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "953254263392356310": ["convolution_gpu_bfyx_f16", 2],
+        "5388858533648189105": ["convolution_gpu_bfyx_f16", 4],
+        "3226238265868290723": ["convolution_gpu_bfyx_f16", 7],
+        "10098858620420134682": ["convolution_gpu_bfyx_f16", 1],
+        "18308172581381789101": ["convolution_gpu_bfyx_f16", 2],
+        "12846183737006963638": ["convolution_gpu_bfyx_f16", 3],
+        "8746233054079242877": ["convolution_gpu_bfyx_f16", 2],
+        "7516276889336424671": ["convolution_gpu_bfyx_f16", 4],
+        "8240661672477348007": ["convolution_gpu_bfyx_f16", 2],
+        "7421142512620741721": ["convolution_gpu_bfyx_f16", 2],
+        "17095633565672192085": ["convolution_gpu_bfyx_f16", 5],
+        "7381046541836362634": ["convolution_gpu_bfyx_f16", 1],
+        "7006663637645720459": ["convolution_gpu_bfyx_f16", 3],
+        "554667746487334145": ["convolution_gpu_bfyx_f16", 7],
+        "1899794088311416867": ["convolution_gpu_bfyx_f16", 6],
+        "4461871297663195464": ["convolution_gpu_bfyx_f16", 1],
+        "845238018552466931": ["convolution_gpu_bfyx_f16", 1],
+        "1588946175550138318": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15493305609986974083": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18266429764179335648": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4773783671939023015": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "4841057875316789358": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10434845132440395347": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4971104866692187809": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3918510119122483722": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10511458406494047485": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4104477639131772427": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14619253124444303162": ["convolution_gpu_bfyx_os_iyx_osv16", 350],
+        "2303241947828987936": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "15440788136860909526": ["convolution_gpu_bfyx_os_iyx_osv16", 1028],
+        "5886674354741908134": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "8121822626577551399": ["convolution_gpu_bfyx_os_iyx_osv16", 653],
+        "6561450336890348030": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16", 360],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16", 686],
+        "6864098212683093769": ["convolution_gpu_bfyx_os_iyx_osv16", 1067],
+        "12286768317527546407": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15803888689432429483": ["convolution_gpu_bfyx_os_iyx_osv16", 653],
+        "2969163284049372725": ["convolution_gpu_bfyx_os_iyx_osv16", 288],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16", 294],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16", 1026],
+        "13943934495343791315": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "2864116308996401112": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16", 133],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "12840351521230542751": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "10101063893937511289": ["convolution_gpu_bfyx_os_iyx_osv16", 479],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16", 479],
+        "16052212361531923323": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "14021819955559248258": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16", 798],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16", 846],
+        "18183296320499063227": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16", 578],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16", 426],
+        "13889057206654080908": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2813710942447372241": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "13633232435632839044": ["convolution_gpu_bfyx_f16", 7],
+        "2883172178329270363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9432546329737888706": ["convolution_gpu_bfyx_f16", 8],
+        "12985746913235154779": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17940668702908419725": ["convolution_gpu_bfyx_f16", 8],
+        "2064000219100642226": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5833649709217830223": ["convolution_gpu_bfyx_f16", 8],
+        "10849235794440642481": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6321445979984216128": ["convolution_gpu_bfyx_f16", 7],
+        "14697315322325185660": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "36079357617783912": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4063865474431180498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13167503358764278233": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17498603449428007802": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6304136029727027056": ["convolution_gpu_bfyx_os_iyx_osv16", 166],
+        "1754448782405089213": ["convolution_gpu_bfyx_f16", 7],
+        "15489166244290113065": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5756918986564223629": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8035545676843269497": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17042017278300937839": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11260048813076144906": ["convolution_gpu_bfyx_f16", 1],
+        "6873924247641352061": ["convolution_gpu_bfyx_f16", 3],
+        "6474957215284027135": ["convolution_gpu_bfyx_f16", 6],
+        "16573724507496129614": ["convolution_gpu_bfyx_f16", 7],
+        "11210971373278055121": ["convolution_gpu_bfyx_f16", 4],
+        "185717560970701618": ["convolution_gpu_bfyx_f16", 5],
+        "11817410866221484993": ["convolution_gpu_bfyx_f16", 3],
+        "9765519004693711463": ["convolution_gpu_bfyx_f16", 6],
+        "14300671725579588671": ["convolution_gpu_bfyx_f16", 5],
+        "1297549572559338433": ["convolution_gpu_bfyx_f16", 4],
+        "4346210823986581329": ["convolution_gpu_bfyx_f16", 6],
+        "2750608965765787878": ["convolution_gpu_bfyx_f16", 5],
+        "14245442283142381063": ["convolution_gpu_bfyx_f16", 7],
+        "2942593456597250269": ["convolution_gpu_bfyx_f16", 5],
+        "14807774261203767931": ["convolution_gpu_bfyx_f16", 8],
+        "2024891861044519704": ["convolution_gpu_bfyx_f16", 8],
+        "12988352411577718659": ["convolution_gpu_bfyx_f16", 1],
+        "7546167886043158750": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12777758044198094011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "17999895886988202252": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7284204319739516687": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11574916930945966662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12181953262469206135": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11001131415959768285": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11516255774873880270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17905472119711952421": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3708423242842748011": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16735610121492345646": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10393786933242452104": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8593006729492614006": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "8080047256092430454": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5827132729840694911": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2862262622518056270": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7592655788466931007": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "10751447918697845967": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "14327549932088763609": ["convolution_gpu_bfyx_os_iyx_osv16", 249],
+        "9139350052341521235": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2268155498775258271": ["convolution_gpu_bfyx_os_iyx_osv16", 1002],
+        "9252995576301318377": ["convolution_gpu_bfyx_os_iyx_osv16", 1007],
+        "16131094933895726474": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "2390813972238809739": ["convolution_gpu_bfyx_os_iyx_osv16", 630],
+        "6575286116803785519": ["convolution_gpu_bfyx_os_iyx_osv16", 911],
+        "9509860212160444680": ["convolution_gpu_bfyx_os_iyx_osv16", 142],
+        "2025729513014515133": ["convolution_gpu_bfyx_os_iyx_osv16", 581],
+        "7012386443457106080": ["convolution_gpu_bfyx_os_iyx_osv16", 519],
+        "10807317048120773939": ["convolution_gpu_bfyx_os_iyx_osv16", 478],
+        "13800264518247731721": ["convolution_gpu_bfyx_os_iyx_osv16", 162],
+        "10381956671421182115": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "4874673523117573787": ["convolution_gpu_bfyx_os_iyx_osv16", 918],
+        "18140414399325733479": ["convolution_gpu_bfyx_os_iyx_osv16", 426],
+        "5854165399605633326": ["convolution_gpu_bfyx_os_iyx_osv16", 423],
+        "17238058461587589303": ["convolution_gpu_bfyx_os_iyx_osv16", 797],
+        "4101383449947395379": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "2697043651083211983": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "1196153439884178828": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "1408371298472575421": ["convolution_gpu_bfyx_os_iyx_osv16", 467],
+        "9614122272772797675": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7199567766573336359": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13573164884579883011": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15482728985931330311": ["convolution_gpu_bfyx_gemm_like", 1],
+        "4607650298345740971": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16086873164128770879": ["convolution_gpu_bfyx_gemm_like", 2],
+        "105926781977700977": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11591232422517503119": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11582016741808877197": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16914574072145986060": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6022176855777948587": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8941858845051007302": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9357675997524716463": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "3521176117120705338": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12045093589986262223": ["convolution_gpu_bfyx_os_iyx_osv16", 590],
+        "470065517654323782": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16734161909350784601": ["convolution_gpu_bfyx_os_iyx_osv16", 973],
+        "11121230809258677064": ["convolution_gpu_bfyx_gemm_like", 1],
+        "6349024748484491361": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9689224985169331447": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3750053020466161808": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "15788948623626667459": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13291988829313422545": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 2],
+        "17375427967226537519": ["convolution_gpu_bfyx_gemm_like", 2],
+        "356011965155211999": ["convolution_gpu_bfyx_os_iyx_osv16", 285],
+        "10249443290070223207": ["convolution_gpu_bfyx_os_iyx_osv16", 548],
+        "11731131619682311119": ["convolution_gpu_bfyx_os_iyx_osv16", 283],
+        "499465197159774125": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 729],
+        "6713136765330410003": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "10482500982261483441": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12868046747643626115": ["convolution_gpu_bfyx_os_iyx_osv16", 286],
+        "3118940652855466279": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "9133477146144263621": ["convolution_gpu_bfyx_os_iyx_osv16", 1038],
+        "6014658843738581344": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2254000832500315403": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "2201913047888029571": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "6765174963106729735": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6860612036193780126": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4053722516029644812": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1037],
+        "3872902814632377403": ["convolution_gpu_bfyx_os_iyx_osv16", 175],
+        "11807558788154880902": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11052363375504603312": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "5704480811160976661": ["convolution_gpu_bfyx_os_iyx_osv16", 179],
+        "2631038501229053001": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11448877892018743111": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10504809699083269708": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 582],
+        "221686752427251764": ["convolution_gpu_bfyx_os_iyx_osv16", 541],
+        "8099629938775512387": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "5641577920984461497": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 542],
+        "12153763576335891417": ["fully_connected_gpu_fb_io_b8_f8_vload", 2],
+        "10715707282679913174": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15195978022706554558": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4702145645721143238": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10415281487218000500": ["convolution_gpu_bfyx_gemm_like", 2],
+        "680533894953795110": ["convolution_gpu_bfyx_os_iyx_osv16", 1025],
+        "1524996376386486665": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "2180727313291426024": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "13865408769089368168": ["convolution_gpu_bfyx_os_iyx_osv16", 530],
+        "17729561573161674389": ["convolution_gpu_bfyx_os_iyx_osv16", 531],
+        "14102092207521274159": ["convolution_gpu_bfyx_os_iyx_osv16", 874],
+        "14601505600623942303": ["convolution_gpu_bfyx_os_iyx_osv16", 499],
+        "93092162022748986": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15895053123520992434": ["convolution_gpu_bfyx_os_iyx_osv16", 923],
+        "14005851072926998714": ["convolution_gpu_bfyx_os_iyx_osv16", 544],
+        "13582287631171243512": ["convolution_gpu_bfyx_os_iyx_osv16", 597],
+        "10982128848228134282": ["convolution_gpu_bfyx_os_iyx_osv16", 624],
+        "7236965443679023925": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "1267627207431132628": ["convolution_gpu_bfyx_os_iyx_osv16", 137],
+        "2427481818567622188": ["convolution_gpu_bfyx_os_iyx_osv16", 532],
+        "9499169226931836849": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "14841135939793901331": ["convolution_gpu_bfyx_os_iyx_osv16", 127],
+        "13877129322236450083": ["convolution_gpu_bfyx_os_iyx_osv16", 875],
+        "6772340882401465511": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11743064882436041973": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10729082617196359413": ["convolution_gpu_bfyx_gemm_like", 0],
+        "8688603561602716375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "6216329929003742144": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5858568936289863149": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1258577325908211211": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10990147603320054495": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13137659893098575291": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2695989423525253829": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3932955531996129807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14834765532454121330": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "11205075769094656704": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10056755067893619842": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8845972204063781512": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "5286686388506198758": ["convolution_gpu_bfyx_os_iyx_osv16", 670],
+        "17044347247573802405": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16931221552471635881": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3375470456077799802": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14332199338789934423": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16759785658634382018": ["convolution_gpu_bfyx_os_iyx_osv16", 385],
+        "9833509391965801955": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "853439126393091889": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10335429769666812841": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "9424664012357101635": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "5860372371921305416": ["convolution_gpu_bfyx_os_iyx_osv16", 970],
+        "3503193615625158929": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14744249132822614079": ["convolution_gpu_bfyx_os_iyx_osv16", 593],
+        "5243045977966841351": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "12657769780794263187": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "7005710331306745857": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "17732714197816812919": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2691481290737970286": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "16758724324099838132": ["convolution_gpu_bfyx_os_iyx_osv16", 629],
+        "13321275573521697498": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17371402188380900420": ["convolution_gpu_bfyx_os_iyx_osv16", 631],
+        "15857411657993741130": ["convolution_gpu_bfyx_os_iyx_osv16", 630],
+        "10824769165318760081": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1670508622389791801": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6601005881101223654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3767953997999748671": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12193543332391207302": ["convolution_gpu_bfyx_os_iyx_osv16", 818],
+        "16644569811401857265": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14644196187730386778": ["convolution_gpu_bfyx_to_fs_byx_fsv32", 242],
+        "15997330269289678741": ["convolution_gpu_fs_byx_fsv32", 20],
+        "9456547817322301854": ["convolution_gpu_fs_byx_fsv32", 14],
+        "14503081204981089589": ["convolution_gpu_fs_byx_fsv32", 8],
+        "17459372555428323405": ["convolution_gpu_fs_byx_fsv32", 1],
+        "11045313080354230499": ["convolution_gpu_fs_byx_fsv32_1x1", 1],
+        "11891736161858320688": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "3837179970761308107": ["convolution_gpu_bfyx_os_iyx_osv16", 819],
+        "1599135987505067413": ["convolution_gpu_bfyx_os_iyx_osv16", 662],
+        "88960405449779079": ["convolution_gpu_bfyx_os_iyx_osv16", 703],
+        "3983071771155729815": ["convolution_gpu_bfyx_os_iyx_osv16", 326],
+        "4686928543634340294": ["convolution_gpu_bfyx_os_iyx_osv16", 125],
+        "9500201961536063781": ["convolution_gpu_bfyx_os_iyx_osv16", 87],
+        "5626617363814193337": ["convolution_gpu_bfyx_os_iyx_osv16", 41],
+        "9493629616033946504": ["convolution_gpu_bfyx_os_iyx_osv16", 45],
+        "9142997105687030758": ["convolution_gpu_bfyx_os_iyx_osv16", 755],
+        "3565303211593767799": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "1638619072790951553": ["convolution_gpu_bfyx_os_iyx_osv16", 285],
+        "16187579575395923193": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14768404566434004921": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "4439755580616372110": ["convolution_gpu_bfyx_os_iyx_osv16", 158],
+        "12082385141539179745": ["convolution_gpu_bfyx_os_iyx_osv16", 472],
+        "80211457682233943": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "2281851137797618536": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "4306482192283599644": ["convolution_gpu_bfyx_os_iyx_osv16", 3],
+        "7438079994024163367": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "11505611789014119307": ["convolution_gpu_bfyx_gemm_like", 2],
+        "437815073846842580": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1950316744853763835": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2101440743856834523": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11177728104020690382": ["convolution_gpu_bfyx_gemm_like", 1],
+        "11070046570645256268": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12619772485618838435": ["convolution_gpu_bfyx_gemm_like", 1],
+        "2751512607890114618": ["convolution_gpu_bfyx_gemm_like", 1],
+        "15183698566691504656": ["convolution_gpu_bfyx_os_iyx_osv16", 484],
+        "12653721467536263212": ["convolution_gpu_bfyx_os_iyx_osv16", 430],
+        "13194232160397919757": ["convolution_gpu_bfyx_os_iyx_osv16", 431],
+        "204538163378003996": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "6149494643008538957": ["convolution_gpu_bfyx_os_iyx_osv16", 377],
+        "11290368603402236066": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1323592601201034234": ["convolution_gpu_bfyx_f16", 7],
+        "14798486770850675841": ["convolution_gpu_bfyx_f16", 7],
+        "11673314628747753691": ["convolution_gpu_bfyx_f16", 4],
+        "7021961511624638678": ["convolution_gpu_bfyx_f16", 4],
+        "5676198353742450430": ["convolution_gpu_bfyx_f16", 3],
+        "4929819810689803833": ["convolution_gpu_bfyx_f16", 6],
+        "240316590146675808": ["convolution_gpu_bfyx_f16", 5],
+        "17625565940895057722": ["convolution_gpu_bfyx_f16", 5],
+        "8688075088415087060": ["convolution_gpu_bfyx_f16", 3],
+        "3109943868702160503": ["convolution_gpu_bfyx_f16", 5],
+        "15650217867869430450": ["convolution_gpu_bfyx_f16", 2],
+        "17908144598228512507": ["convolution_gpu_bfyx_os_iyx_osv16", 2],
+        "12413306519886846795": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3485465952750021220": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16729621401445513163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5488147296483022703": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8710473738514939538": ["convolution_gpu_bfyx_gemm_like", 1],
+        "9147606392761848284": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "5087291643342132199": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11774085137209016046": ["convolution_gpu_bfyx_os_iyx_osv16", 482],
+        "8929841836974581600": ["convolution_gpu_bfyx_os_iyx_osv16", 486],
+        "7073670312468097760": ["convolution_gpu_bfyx_os_iyx_osv16", 843],
+        "14911211495772743601": ["convolution_gpu_bfyx_os_iyx_osv16", 802],
+        "3856389350154673872": ["convolution_gpu_bfyx_os_iyx_osv16", 429],
+        "14587774878993352201": ["convolution_gpu_bfyx_os_iyx_osv16", 378],
+        "17294259290189185308": ["convolution_gpu_bfyx_os_iyx_osv16", 794],
+        "3125709089627137774": ["fully_connected_gpu_bf_io_gemm", 2],
+        "10167929165359894539": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "9467011207416095285": ["convolution_gpu_bfyx_f16", 8],
+        "18110939720141818253": ["convolution_gpu_bfyx_f16", 8],
+        "10809361044654815291": ["convolution_gpu_bfyx_f16", 8],
+        "8729970019383795569": ["convolution_gpu_bfyx_f16", 8],
+        "17827231016257521940": ["convolution_gpu_bfyx_f16", 8],
+        "5828304029836199809": ["convolution_gpu_bfyx_f16", 8],
+        "8608098673678326421": ["convolution_gpu_bfyx_f16", 8],
+        "4799951889015766408": ["convolution_gpu_bfyx_f16", 7],
+        "11256132692380923779": ["convolution_gpu_bfyx_f16", 8],
+        "5227747889064386895": ["convolution_gpu_bfyx_f16", 8],
+        "12435125610964522916": ["convolution_gpu_bfyx_f16", 6],
+        "16247711996185304739": ["convolution_gpu_bfyx_f16", 8],
+        "18419015006676659067": ["convolution_gpu_bfyx_f16", 8],
+        "14816607493224983644": ["convolution_gpu_bfyx_f16", 6],
+        "18080751562346851399": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7358709983309335021": ["convolution_gpu_bfyx_f16", 8],
+        "5951975369177447577": ["convolution_gpu_bfyx_f16", 8],
+        "4605643396574334390": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "568067246719517739": ["convolution_gpu_bfyx_f16", 8],
+        "5442649671500173287": ["convolution_gpu_bfyx_f16", 8],
+        "8903826140874560178": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9628142105100134135": ["convolution_gpu_bfyx_f16", 8],
+        "1232826373801366623": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13694514357024954788": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11391493057056286699": ["convolution_gpu_bfyx_f16", 8],
+        "2928262304708248227": ["convolution_gpu_bfyx_f16", 8],
+        "17499908409183932218": ["convolution_gpu_bfyx_f16", 4],
+        "14859658448955499179": ["convolution_gpu_bfyx_f16", 4],
+        "15311505436254873919": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5960462069119450195": ["convolution_gpu_bfyx_f16", 8],
+        "9149564669418564479": ["convolution_gpu_bfyx_f16", 8],
+        "18163755369052842813": ["convolution_gpu_bfyx_f16", 8],
+        "8275135719522690527": ["convolution_gpu_bfyx_f16", 8],
+        "2298529747054199771": ["convolution_gpu_bfyx_f16", 8],
+        "15976944134096292352": ["convolution_gpu_bfyx_f16", 8],
+        "547454211762819596": ["convolution_gpu_bfyx_f16", 8],
+        "17906950174330335301": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13031644522906269724": ["convolution_gpu_bfyx_f16", 8],
+        "18288385173121209976": ["convolution_gpu_bfyx_f16", 8],
+        "17061924482046716630": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11361932739710301784": ["convolution_gpu_bfyx_f16", 8],
+        "16458889125940546368": ["convolution_gpu_bfyx_f16", 8],
+        "12450622764005143821": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2903571673767001640": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13347455981832054533": ["convolution_gpu_bfyx_f16", 8],
+        "12538241775315086953": ["convolution_gpu_bfyx_f16", 8],
+        "8126691205953344871": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16780977023488631804": ["convolution_gpu_bfyx_f16", 8],
+        "17136478867951302904": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13212207942275182881": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14495079358949370120": ["convolution_gpu_bfyx_f16", 8],
+        "4382417733451183736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "288372190616771633": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11617333447192485775": ["convolution_gpu_bfyx_f16", 8],
+        "6974470789123895023": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18231804735191273525": ["convolution_gpu_bfyx_f16", 6],
+        "4874987389971025643": ["convolution_gpu_bfyx_f16", 3],
+        "3058493226413875038": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "10596520518408393633": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "425449143970914526": ["convolution_gpu_bfyx_f16", 6],
+        "10926709262435197853": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15825916789772519361": ["convolution_gpu_bfyx_f16", 8],
+        "11344323188244299942": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10578113800829774986": ["convolution_gpu_bfyx_f16", 6],
+        "2375997562748751746": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2516065028198850400": ["convolution_gpu_bfyx_f16", 8],
+        "6409750149360913230": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10271647104505566706": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8508694183870500179": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5818470431977263086": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5193770703407776933": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17521329019150924840": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14014285669562955732": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6671379525619868817": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "4953425498352128032": ["convolution_gpu_bfyx_f16", 8],
+        "17554861470547691900": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10785511833112040704": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17842238305349560675": ["convolution_gpu_bfyx_f16", 8],
+        "9045432887458921887": ["convolution_gpu_bfyx_f16", 8],
+        "2317068120382359117": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13198591357050450583": ["convolution_gpu_bfyx_f16", 6],
+        "16630820631092662612": ["convolution_gpu_bfyx_f16", 8],
+        "3304799458897138796": ["convolution_gpu_bfyx_f16", 8],
+        "12719370525372818641": ["convolution_gpu_bfyx_f16", 8],
+        "13033843297334845893": ["convolution_gpu_bfyx_f16", 8],
+        "3008335290324080979": ["convolution_gpu_bfyx_f16", 8],
+        "12990586786235599879": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7414503846719526880": ["convolution_gpu_bfyx_f16", 7],
+        "5220110435197346795": ["convolution_gpu_bfyx_f16", 8],
+        "2541237388942804073": ["convolution_gpu_bfyx_os_iyx_osv16", 195],
+        "16216088294429058352": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6548372255564207516": ["convolution_gpu_bfyx_os_iyx_osv16", 357],
+        "7504803815827679458": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7890404193762842042": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1983441240707747971": ["convolution_gpu_bfyx_os_iyx_osv16", 732],
+        "4803645476690765923": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "5462029271954491399": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11800826711545841506": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "15561297213164683600": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16248626680766522088": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "3284821679004727180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16599055682441641193": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "153264675527724419": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "17075015317341671432": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 333],
+        "632977374237919309": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "14450344739436846034": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9209250404182507365": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8415669066784458057": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "185843586737457998": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1085],
+        "766969822241155083": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "16155506862324955163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11978741458254664288": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11586230734901512993": ["convolution_gpu_bfyx_os_iyx_osv16", 1081],
+        "10087695833233777190": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1083],
+        "12128657061255822293": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "17939537883510630662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18423168447470112214": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5547921846568118067": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "5296811001677778360": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 333],
+        "10607242950204268733": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9582149845440579162": ["convolution_gpu_bfyx_gemm_like", 0],
+        "7551236304447290573": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "11927599447031406206": ["convolution_gpu_bfyx_os_iyx_osv16", 705],
+        "8913537827155053321": ["convolution_gpu_bfyx_os_iyx_osv16", 330],
+        "2694403950208867995": ["convolution_gpu_bfyx_f16", 8],
+        "8480814485407284385": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5659725884804864884": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17247202229862821831": ["convolution_gpu_bfyx_f16", 8],
+        "4591189240532371990": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "11298211715109572712": ["convolution_gpu_bfyx_f16", 8],
+        "11060719795922800502": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4024014996080705374": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6324164991095466646": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2028232780476695855": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5660036914950333671": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13834181765190936016": ["convolution_gpu_bfyx_f16", 8],
+        "4422700514049031042": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9588011604564305874": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7215636441499808004": ["convolution_gpu_bfyx_f16", 8],
+        "17474693423719248969": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7554680558347192287": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3170336071769787200": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2846411667168152725": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11181943097674741934": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11639140799979401650": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "16307198282569581614": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17680917278609527441": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "13721389244734206054": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4859340216439264067": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14901974015744200645": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15824748596078822917": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "13975002992350723907": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1856369619298816871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15980250964613837766": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "11188535023427121544": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "6357407299587334303": ["convolution_gpu_bfyx_os_iyx_osv16", 361],
+        "1637111553324908623": ["convolution_gpu_bfyx_os_iyx_osv16", 364],
+        "17491572290683222099": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "276041414724056329": ["convolution_gpu_bfyx_os_iyx_osv16", 226],
+        "14980338802512175932": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1556522723879235440": ["convolution_gpu_bfyx_os_iyx_osv16", 600],
+        "16417146101754878208": ["convolution_gpu_bfyx_os_iyx_osv16", 88],
+        "15277806782771093230": ["convolution_gpu_bfyx_f16", 8],
+        "9169923259147715426": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9970341118085992354": ["convolution_gpu_bfyx_f16", 8],
+        "12104455606397742751": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "402419791784302832": ["convolution_gpu_bfyx_f16", 8],
+        "13972467430568459775": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4129442598734369883": ["convolution_gpu_bfyx_f16", 8],
+        "77779308553690261": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7805353658967212897": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17523151407361362563": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2925909029337163642": ["convolution_gpu_bfyx_f16", 8],
+        "9734910403081363436": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17920847151501327807": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12704616994417458301": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18154520535225399083": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12277279619939618291": ["convolution_gpu_bfyx_f16", 8],
+        "9784651392759875534": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "2028955242295927382": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "796719947114218328": ["convolution_gpu_bfyx_f16", 4],
+        "7199374877403948402": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "7332724365602008957": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13826441040132777454": ["convolution_gpu_bfyx_f16", 8],
+        "845707625293752103": ["convolution_gpu_bfyx_f16", 6],
+        "17141580666144953740": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4767393715835846841": ["convolution_gpu_bfyx_f16", 6],
+        "11299501537291400011": ["convolution_gpu_bfyx_f16", 8],
+        "11125333100440185901": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10739033895708157022": ["convolution_gpu_bfyx_f16", 8],
+        "13410695662993340019": ["convolution_gpu_bfyx_f16", 6],
+        "9449557005968240342": ["convolution_gpu_bfyx_os_iyx_osv16", 1094],
+        "13636060167487404399": ["convolution_gpu_bfyx_f16", 8],
+        "17615292268108897451": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "15275200239982983006": ["convolution_gpu_bfyx_f16", 8],
+        "6933005158879333442": ["convolution_gpu_bfyx_f16", 7],
+        "17404976166399004281": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15546952240223018544": ["convolution_gpu_bfyx_f16", 8],
+        "9221818348726873589": ["convolution_gpu_bfyx_f16", 8],
+        "4394722210952321875": ["convolution_gpu_bfyx_os_iyx_osv16", 375],
+        "6306121426740325003": ["convolution_gpu_bfyx_f16", 6],
+        "8909598090821234392": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8323852925946594550": ["convolution_gpu_bfyx_f16", 8],
+        "15387142572620537292": ["convolution_gpu_bfyx_f16", 8],
+        "7224633322280261797": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "4152667642459353093": ["convolution_gpu_bfyx_f16", 7],
+        "6850677395681499432": ["convolution_gpu_bfyx_f16", 7],
+        "9650348888387532125": ["convolution_gpu_bfyx_f16", 8],
+        "6081667869975438905": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "12327336773475034198": ["convolution_gpu_bfyx_f16", 7],
+        "1948599748319101030": ["convolution_gpu_bfyx_f16", 7],
+        "98134405895903266": ["convolution_gpu_bfyx_os_iyx_osv16", 358],
+        "14701329763611994976": ["convolution_gpu_bfyx_f16", 8],
+        "6716883556122508073": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16273453628106476710": ["convolution_gpu_bfyx_f16", 7],
+        "8720949360157265320": ["convolution_gpu_bfyx_f16", 8],
+        "7198010523779834541": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "2836223913370318262": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8745298918616517419": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14153070039524436781": ["convolution_gpu_bfyx_f16", 8],
+        "13942373222048813039": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2520658429967845372": ["convolution_gpu_bfyx_os_iyx_osv16", 995],
+        "3842241205837995725": ["convolution_gpu_bfyx_to_bfyx_f16", 5],
+        "13045259089446188773": ["convolution_gpu_bfyx_f16", 2],
+        "1946851578807856544": ["convolution_gpu_bfyx_f16", 5],
+        "16189255500363260553": ["convolution_gpu_bfyx_f16", 5],
+        "16375480181817879443": ["convolution_gpu_bfyx_f16", 4],
+        "1731805218150075474": ["convolution_gpu_bfyx_f16", 2],
+        "11489413392122514570": ["convolution_gpu_bfyx_f16", 3],
+        "10034218261019628269": ["convolution_gpu_bfyx_f16", 2],
+        "3242010181507046540": ["convolution_gpu_bfyx_f16", 1],
+        "17874779808897661223": ["convolution_gpu_bfyx_f16", 5],
+        "8285149767406178221": ["convolution_gpu_bfyx_f16", 1],
+        "14312011904722537311": ["convolution_gpu_bfyx_f16", 1],
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 1],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10247418143396239693": ["fused_conv_eltwise_gpu_ref", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 0],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "3025829117046314851": ["convolution_gpu_bfyx_gemm_like", 0],
+        "9151746755060262640": ["convolution_gpu_winograd_2x3_s1_fused", 0],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "482233531247606412": ["convolution_gpu_winograd_2x3_s1_fused", 0],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17167052658616496904": ["convolution_gpu_bfyx_gemm_like", 0],
+        "758159154291645307": ["fully_connected_gpu_bfyx_ref", 0],
         "10298865798559508430": ["convolution_gpu_bfyx_gemm_like", 2],
         "8036745915261696332": ["convolution_gpu_bfyx_gemm_like", 2],
         "10569376024770516176": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
         "13762162740325518554": ["convolution_gpu_bfyx_os_iyx_osv16", 87]
     },
     "18": {
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5981205170754513046": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3025829117046314851": ["convolution_gpu_bfyx_os_iyx_osv16", 107],
+        "14600403613863348033": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9466249274834206569": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17167052658616496904": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "758159154291645307": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "14555191501995137081": ["fully_connected_gpu_bf_io_gemm", 1],
+        "3057483147285040704": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4460662214292495759": ["convolution_gpu_bfyx_f16", 8],
+        "17632851940131114495": ["convolution_gpu_bfyx_f16", 8],
+        "7945867532035693686": ["convolution_gpu_bfyx_f16", 8],
+        "10798155343477437060": ["convolution_gpu_bfyx_f16", 8],
+        "14191150640021059705": ["convolution_gpu_bfyx_f16", 8],
+        "14593228968660512118": ["convolution_gpu_bfyx_f16", 8],
+        "5573515532668433114": ["convolution_gpu_bfyx_f16", 8],
+        "11642934660277782628": ["convolution_gpu_bfyx_f16", 8],
+        "4825553592910970555": ["convolution_gpu_bfyx_f16", 8],
+        "17245530055973419690": ["convolution_gpu_bfyx_f16", 8],
+        "14644519840111409049": ["convolution_gpu_bfyx_f16", 8],
+        "15093112872571669071": ["convolution_gpu_bfyx_f16", 8],
+        "6172925429706792586": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16888914688498810916": ["convolution_gpu_bfyx_f16", 8],
+        "7094210524110336636": ["convolution_gpu_bfyx_f16", 8],
+        "1102719880087191972": ["convolution_gpu_bfyx_f16", 6],
+        "17635368969132641763": ["convolution_gpu_bfyx_f16", 8],
+        "6444855710931944326": ["convolution_gpu_bfyx_f16", 8],
+        "3685203889040861337": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8983142397488339162": ["convolution_gpu_bfyx_f16", 8],
+        "2942771097961823034": ["convolution_gpu_bfyx_f16", 8],
+        "16912834065670733738": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "2419223013209835757": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "11179881900554989521": ["convolution_gpu_bfyx_f16", 8],
+        "16511126264743737451": ["convolution_gpu_bfyx_f16", 8],
+        "10100289629103173958": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "9258363108725341315": ["convolution_gpu_bfyx_f16", 8],
+        "13334138861096017540": ["convolution_gpu_bfyx_f16", 8],
+        "6513616579637283618": ["convolution_gpu_bfyx_f16", 7],
+        "881483878813237044": ["convolution_gpu_bfyx_f16", 8],
+        "9696420455787045679": ["convolution_gpu_bfyx_f16", 8],
+        "7480696988694183789": ["convolution_gpu_bfyx_f16", 8],
+        "9560848299493464065": ["convolution_gpu_bfyx_f16", 8],
+        "4670244085889208769": ["convolution_gpu_bfyx_f16", 8],
+        "11349612635173553035": ["convolution_gpu_bfyx_f16", 8],
+        "6259794269666057674": ["convolution_gpu_bfyx_f16", 8],
+        "5786551708845072629": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16619951395310930207": ["convolution_gpu_bfyx_f16", 8],
+        "3173655881192997611": ["convolution_gpu_bfyx_f16", 8],
+        "6211510258514141464": ["convolution_gpu_bfyx_f16", 5],
+        "14941982212174570311": ["convolution_gpu_bfyx_f16", 5],
+        "11364624703533653571": ["convolution_gpu_bfyx_f16", 8],
+        "338313831905889757": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13154424438571292174": ["convolution_gpu_bfyx_f16", 5],
+        "14845639704528269654": ["convolution_gpu_bfyx_f16", 5],
+        "12200202041476611175": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "14166499608250271507": ["convolution_gpu_bfyx_f16", 5],
+        "13694208494559240243": ["convolution_gpu_bfyx_f16", 8],
+        "14476260143987433871": ["convolution_gpu_bfyx_f16", 5],
+        "6145395374917324923": ["convolution_gpu_bfyx_f16", 5],
+        "2094686947151722271": ["convolution_gpu_bfyx_os_iyx_osv16", 78],
+        "11589833946098195323": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "11775116692122787310": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "570493430126610249": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "17743072683947532579": ["fully_connected_gpu_bf_io_gemm", 0],
+        "18382443157447369363": ["fully_connected_gpu_bf_io_gemm", 0],
+        "5689213766720451736": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11153755804932874939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13074790088623248655": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14552950763379636885": ["convolution_gpu_bfyx_f16", 7],
+        "1094600023872583173": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16827633753490728058": ["convolution_gpu_bfyx_f16", 8],
+        "6130516122331504865": ["convolution_gpu_bfyx_f16", 8],
+        "7670629548971090825": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5029322578170351026": ["convolution_gpu_bfyx_f16", 8],
+        "11682717086936489649": ["convolution_gpu_bfyx_f16", 8],
+        "9372644596618467274": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14183733053550126939": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5642981720905097704": ["convolution_gpu_bfyx_f16", 5],
+        "3924580903671169312": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17700105511171786728": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14998223809620050073": ["convolution_gpu_bfyx_f16", 5],
+        "419201770890811765": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7063350782589593425": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10687898799916833174": ["convolution_gpu_bfyx_f16", 8],
+        "5341504900604548311": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8082311845702095517": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5769891345892528049": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5034821474694053994": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2717532297792072749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "368628635269408785": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10159612784755046280": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15051374440521170869": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17031332595095892437": ["convolution_gpu_bfyx_f16", 7],
+        "6938198718430530942": ["convolution_gpu_bfyx_f16", 8],
+        "2358029178760210430": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16131007134197705525": ["convolution_gpu_bfyx_f16", 7],
+        "6612035874395100788": ["convolution_gpu_bfyx_f16", 8],
+        "15022677981959490269": ["convolution_gpu_bfyx_f16", 8],
+        "11900509609879947992": ["convolution_gpu_bfyx_f16", 1],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3968445701280656378": ["convolution_gpu_bfyx_f16", 8],
+        "7463742252314920613": ["convolution_gpu_bfyx_f16", 8],
+        "17747065651432157057": ["convolution_gpu_bfyx_f16", 8],
+        "2951437417233062866": ["convolution_gpu_bfyx_f16", 8],
+        "4695031178096669813": ["convolution_gpu_bfyx_f16", 5],
+        "13200791011072363046": ["convolution_gpu_bfyx_f16", 5],
+        "7702483443698911725": ["convolution_gpu_bfyx_f16", 5],
+        "3225276687886679210": ["convolution_gpu_bfyx_f16", 7],
+        "8406061878298060171": ["convolution_gpu_bfyx_f16", 7],
+        "11861948300376902542": ["convolution_gpu_bfyx_f16", 7],
+        "18047654118875021903": ["convolution_gpu_bfyx_f16", 6],
+        "3876838946012690078": ["convolution_gpu_bfyx_f16", 7],
+        "11532811324432477051": ["convolution_gpu_bfyx_f16", 8],
+        "16482627014547828135": ["convolution_gpu_bfyx_f16", 7],
+        "4565106422618308814": ["convolution_gpu_bfyx_f16", 6],
+        "16991433003318725315": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "16286683168753184722": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7074368169815304627": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10702490327714920783": ["convolution_gpu_bfyx_gemm_like", 2],
+        "964168479107166949": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6252510766878541979": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1012052068628903875": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15499166167392043521": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14327383763442344255": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18371627210590255356": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13185859115957551268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15052792752810689842": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17918808521142517830": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1644157325342654261": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12198018126650448419": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9714393675511550323": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4928366179227934688": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15361605271135812199": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10267714663732575502": ["convolution_gpu_bfyx_1x1", 2],
+        "9990965405769569785": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10493403039286551634": ["convolution_gpu_bfyx_1x1", 2],
+        "18324310183763016728": ["convolution_gpu_bfyx_os_iyx_osv16", 458],
+        "6002923098500991259": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3429780644945779272": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6005067060818453503": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3676547304316346974": ["convolution_gpu_bfyx_f16", 6],
+        "8412675332215210248": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14477382651380138146": ["convolution_gpu_bfyx_f16", 8],
+        "15899888589766240554": ["convolution_gpu_bfyx_f16", 8],
+        "4529376177404929890": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7210896246223636810": ["convolution_gpu_bfyx_f16", 8],
+        "2775471071662652034": ["convolution_gpu_bfyx_f16", 8],
+        "17132456912135683375": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15563691660506818555": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9997402509928965207": ["convolution_gpu_bfyx_f16", 5],
+        "7793754164423097155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4639865771698877244": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1766192115208251594": ["convolution_gpu_bfyx_f16", 8],
+        "2015853414727933068": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10306264176864957825": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4871044181497936479": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8396548857016837452": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12714653556587252941": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1967886437456544865": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11350907923254547441": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12282274184666824734": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16866941685634953173": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6312283149621718315": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9795822066940245604": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "7256380059517365529": ["convolution_gpu_bfyx_f16", 6],
+        "11966909558503849515": ["convolution_gpu_bfyx_f16", 8],
+        "11277466712159791917": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4992371260504969141": ["convolution_gpu_bfyx_f16", 7],
+        "15043181455492553716": ["convolution_gpu_bfyx_f16", 7],
+        "8399107263382557054": ["convolution_gpu_bfyx_f16", 8],
+        "6350452055467384023": ["convolution_gpu_bfyx_f16", 7],
+        "14026570177552137240": ["convolution_gpu_bfyx_os_iyx_osv16", 974],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like", 2],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16", 1091],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "4917807560042671575": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "44341776758472069": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "3584869801682702110": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "13032463401326344281": ["convolution_gpu_bfyx_os_iyx_osv16", 339],
+        "12074020528214820344": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "10792368710075698135": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14773903272136532468": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "4459329337183571568": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "17247158622529817069": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 352],
+        "15248304664655540462": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "8737603244374483727": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6375630142791083064": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16951442326148701883": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "8824140014793073324": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "6420666457275061685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18191060893922845906": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4914314319075651246": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2995522243104361971": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12727854191946007642": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1104],
+        "3260693384502698965": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8599674766060889778": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8021852643758937492": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "2492924011838985637": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "4309855944835724499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14741878965259218163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180612484034524170": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13300287078635373813": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13378751364754764186": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6449257695177020930": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "17627392788011440461": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "13831493475156855535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16483429728914404238": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "3860080842190932938": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12349486511618981663": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "15798538366019336375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17142061595610833587": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "413520381980740601": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "15678637644328155655": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "6526747512277607691": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "16117940336643166742": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5991582579063082343": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "3294597200237228703": ["convolution_gpu_bfyx_os_iyx_osv16", 670],
+        "16191151963860109032": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4092109744625924274": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4849563739505810631": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3411824370004173602": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "15344685054531225492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14837032904820198149": ["convolution_gpu_bfyx_f16", 8],
+        "14191080790860851837": ["convolution_gpu_bfyx_f16", 8],
+        "17023834849779428858": ["convolution_gpu_bfyx_f16", 8],
+        "3329139872094988661": ["convolution_gpu_bfyx_f16", 8],
+        "4450424283454693457": ["convolution_gpu_bfyx_f16", 8],
+        "6264730897461114496": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16058636937964624617": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "499215221217528434": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14655897748934541342": ["convolution_gpu_bfyx_f16", 8],
+        "15662090780385020537": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7311728100823416883": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7221666363928264914": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "949611108582310305": ["convolution_gpu_bfyx_f16", 8],
+        "398119457330194405": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18306921825426259074": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14045661362966364917": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11211712695622132026": ["convolution_gpu_bfyx_f16", 8],
+        "13777550841624006577": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4765385132115618850": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16898905631497333152": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "2688905295933725456": ["convolution_gpu_bfyx_f16", 7],
+        "10325568251605243952": ["convolution_gpu_bfyx_f16", 8],
+        "4697609485293892109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15761571492230997960": ["convolution_gpu_bfyx_f16", 8],
+        "10403493618856101043": ["convolution_gpu_bfyx_f16", 7],
+        "15694677292906293678": ["convolution_gpu_bfyx_f16", 8],
+        "11385013883660304429": ["convolution_gpu_bfyx_f16", 6],
+        "8155797389244290087": ["convolution_gpu_bfyx_f16", 0],
+        "16706121580364790904": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5673972310424776040": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16", 859],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "4305463771822108179": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "2079318718874681198": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "17439941375453858836": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12467583825605788345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9058857190661793339": ["fused_conv_eltwise_gpu_ref", 2],
+        "11620974866622716017": ["fused_conv_eltwise_gpu_ref", 0],
+        "8857009061371774666": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5756084360647965669": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3975438095352877013": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3345987020362642539": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2080318501154291605": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "13813582937323882369": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "11149782181562145291": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16", 1098],
+        "3526580286148537369": ["convolution_gpu_bfyx_os_iyx_osv16", 316],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16", 750],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "2877876834438717783": ["convolution_gpu_bfyx_os_iyx_osv16", 951],
+        "9156649014297448284": ["convolution_gpu_bfyx_os_iyx_osv16", 644],
+        "13660470643303663441": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "8081997809574506331": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "8199400320947837516": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "11460891889180307970": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5643924526605879168": ["convolution_gpu_bfyx_os_iyx_osv16", 723],
+        "14198463555297179999": ["convolution_gpu_bfyx_f16", 8],
+        "9820219997540294747": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16598220433310484103": ["convolution_gpu_bfyx_f16", 8],
+        "13332579082252874358": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10148956417804060854": ["convolution_gpu_bfyx_f16", 8],
+        "16052199780545784176": ["convolution_gpu_bfyx_f16", 8],
+        "17284989371701058847": ["convolution_gpu_bfyx_os_iyx_osv16", 318],
+        "18186300610687882698": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17323620992879479455": ["convolution_gpu_bfyx_f16", 8],
+        "10782643446733040985": ["convolution_gpu_bfyx_f16", 8],
+        "3080843366919845836": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16898206352994894714": ["convolution_gpu_bfyx_f16", 8],
+        "17502393571772755646": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12982233543299343225": ["convolution_gpu_bfyx_os_iyx_osv16", 89],
+        "5609871805820255743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "7971259885907841252": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15956442448148612253": ["convolution_gpu_bfyx_os_iyx_osv16", 1069],
+        "7600980811977404651": ["convolution_gpu_bfyx_os_iyx_osv16", 984],
+        "6051578359778554994": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "14591236937522474591": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "380671738106280681": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "16581313033870107357": ["convolution_gpu_bfyx_os_iyx_osv16", 1066],
+        "15132868076468531540": ["convolution_gpu_bfyx_os_iyx_osv16", 695],
+        "4964421818619633295": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "5206589624074157418": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "15804259593852912096": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "9667577643691138471": ["convolution_gpu_bfyx_f16", 8],
+        "9587296295017154035": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "624896425985287215": ["convolution_gpu_bfyx_f16", 8],
+        "13698491289625410930": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "5501294609610168354": ["convolution_gpu_bfyx_f16", 8],
+        "14684726385174603824": ["convolution_gpu_bfyx_f16", 8],
+        "3538746967389669479": ["convolution_gpu_bfyx_os_iyx_osv16", 1055],
+        "5442728869442056950": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "17446903112723559991": ["convolution_gpu_bfyx_f16", 8],
+        "17314761693722740561": ["convolution_gpu_bfyx_f16", 8],
+        "12956681231908531328": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "17836528995874415642": ["convolution_gpu_bfyx_f16", 8],
+        "8896717627818724430": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "14716947061630316041": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "9735141117399046903": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18366465884925728820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17388129439366166721": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "13724220569112734431": ["convolution_gpu_bfyx_os_iyx_osv16", 973],
+        "5529587475911632254": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "11660089067798953391": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "5181665423821543629": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "8048807352445331657": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "3470485673426524224": ["convolution_gpu_bfyx_os_iyx_osv16", 1021],
+        "3135008557801015427": ["convolution_gpu_bfyx_os_iyx_osv16", 1006],
+        "14966985685297154154": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2035874178080637954": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "5013120291092844103": ["convolution_gpu_bfyx_os_iyx_osv16", 608],
+        "4460181251394130653": ["convolution_gpu_bfyx_os_iyx_osv16", 369],
+        "5117453858905614531": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "8461950668910238851": ["convolution_gpu_bfyx_os_iyx_osv16", 747],
+        "1805006234516270784": ["convolution_gpu_bfyx_os_iyx_osv16", 314],
+        "4583484812233029888": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8595156989254845134": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14493123117003003092": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "12372261924257291610": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "1547771611689525848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15727110405754725012": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10890620280807224744": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16079792265815446547": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15384055407657760803": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2464531851392092325": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5613964218561759893": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11460648773146310189": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6593870431636005244": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11529036254499853035": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2726453304845436156": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "2607416795507802412": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "17010201596936918243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8480598154536665021": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17881013712456488163": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "9336215801757107337": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8174421295799601683": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "1967655354607438665": ["convolution_gpu_bfyx_os_iyx_osv16", 748],
+        "4972222030950072866": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "18113157997465675692": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1472667774257971884": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7480855342650290772": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17244746622354078542": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "251775001146378096": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "14235558866846276172": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "18066867692765966577": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "1264200731459756446": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "6968087469917482002": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1607381610581485984": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17234843749633035510": ["convolution_gpu_bfyx_os_iyx_osv16", 700],
+        "11516168882438876247": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "8919164618663601566": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "16853448010512574338": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3010644722195354051": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17062011653598617580": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "4614875083188849196": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "10859023312681572942": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "1377210419756613502": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17391465283540972493": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1514213112647467874": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "17268633106022870055": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8140122945471321201": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "15079423575410353790": ["convolution_gpu_bfyx_os_iyx_osv16", 966],
+        "13787398748724798340": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "9739119866883611322": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "7151167803631697120": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "16532386511585070092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910582540370962997": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12335148041391647118": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "10689880083512104726": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "8870164706606458004": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9269498023794081940": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6779832349039897240": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "13942354789498444722": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "14294764660016835141": ["convolution_gpu_bfyx_os_iyx_osv16", 940],
+        "12323510278692809329": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "5728070995112243570": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5381496395266530071": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9712640406795417230": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15036737419347383878": ["convolution_gpu_bfyx_os_iyx_osv16", 107],
+        "11552594222313787816": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "7606282654661282476": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6201358671959761215": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4829111442270007186": ["convolution_gpu_bfyx_os_iyx_osv16", 1102],
+        "7267651931396380072": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "1279682391530947146": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2655979063469551930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14425547983540742516": ["convolution_gpu_bfyx_gemm_like", 2],
+        "981419593633555198": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12324657364444167791": ["convolution_gpu_bfyx_os_iyx_osv16", 1103],
+        "3246153532847702583": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4202705710324555180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12272318018055307535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "396815044270978782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15633173680908856082": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16635731992372618666": ["convolution_gpu_bfyx_os_iyx_osv16", 723],
+        "10418466892824851134": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "3244777852750357718": ["convolution_gpu_bfyx_os_iyx_osv16", 1113],
+        "2443758478383854939": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "13503934436248311972": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "2594310972560076285": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2424349375092546581": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "7104985983444651979": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "13518747015059826801": ["convolution_gpu_bfyx_os_iyx_osv16", 737],
+        "11675809062974151496": ["convolution_gpu_bfyx_os_iyx_osv16", 1113],
+        "4725349695436675084": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "17351243519367619322": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17026338651868178077": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "8730407034445893642": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "144434691308306757": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "4114184149613179671": ["convolution_gpu_bfyx_os_iyx_osv16", 707],
+        "2558882920723584206": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "16481414687792927331": ["convolution_gpu_bfyx_os_iyx_osv16", 365],
+        "17756651805686889890": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "2228533392085335649": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "9038567144062573854": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1345293381483212104": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "729683192738752814": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "458997435535883643": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "16955907389221472146": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "17927673764274384911": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "6418222853479731432": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7539191242110313918": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "18014188548165359278": ["convolution_gpu_bfyx_os_iyx_osv16", 229],
+        "16640379332042800496": ["convolution_gpu_bfyx_os_iyx_osv16", 980],
+        "14856197725306980283": ["convolution_gpu_bfyx_os_iyx_osv16", 981],
+        "9279474331309267880": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "5717588912072437191": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1143426643765799488": ["convolution_gpu_bfyx_os_iyx_osv16", 225],
+        "1049385516019456025": ["convolution_gpu_bfyx_os_iyx_osv16", 971],
+        "10766144770072425534": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "6442062011017461761": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "6063490496423709036": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "3892512749863226006": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4970240836537468609": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 1],
+        "9399255910184037480": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "10594581016504135920": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "15640487942881889055": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "14165417928501578590": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "12251989236991754721": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "6675363512560434713": ["convolution_gpu_bfyx_os_iyx_osv16", 339],
+        "9831713940431605743": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "6531349504807709133": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "2726501303929773572": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "10439704858943788014": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "18137994263450376706": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "5711991739289045727": ["convolution_gpu_bfyx_os_iyx_osv16", 339],
+        "15255831401757117660": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3906658058160172747": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "15823433297099049221": ["convolution_gpu_bfyx_os_iyx_osv16", 700],
+        "7829483638597533960": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "14092273913846393837": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "3746578485711843646": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "12228183555926126959": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "8776893332387904786": ["convolution_gpu_bfyx_os_iyx_osv16", 910],
+        "16672299044236704672": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "13309889945947393850": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "15966815420067673043": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "7415938485228396256": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "9655590024687998403": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "14798289196964890724": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "9794684437872784678": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "16729204245488754836": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "15185983488152870534": ["convolution_gpu_bfyx_os_iyx_osv16", 955],
+        "13821372148587948765": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "4727004015814244856": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "1738348894912205653": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "559491455289877068": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "17312172687490475177": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "3470176432841342662": ["convolution_gpu_bfyx_os_iyx_osv16", 584],
+        "8950283515337670839": ["convolution_gpu_bfyx_os_iyx_osv16", 975],
+        "3995072673238444396": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "1238913228370790536": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "928677976151553489": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "4059887681292863495": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "2017817372328795772": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18312913026696855515": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1323873987880062206": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "7947635298491683844": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "3828289925836476678": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10112041311060264798": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7966725359592006848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2213697863012348994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5200128826708487987": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "4910238486908592807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13616909429370698140": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5170073622279980223": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7110283028091835342": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16035239784731081694": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8190708817382075098": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "5665180797552893949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7180904384828396567": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17041465029020839746": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8648502659728489503": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2007359338465363037": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16300204511212928772": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "10636266218009746496": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "17502734572225953539": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "9266211532252099402": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 2],
+        "6763848192987176713": ["convolution_gpu_bfyx_os_iyx_osv16", 493],
+        "6123737429963241103": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "10102406370623883494": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "16125206369312086947": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "16927483709629289661": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "3196823812655863240": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "7968691295772769464": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "6100031133333761315": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "4055514200737135942": ["fully_connected_gpu_fb_oi_ref", 0],
+        "8508119169246513026": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14616145871710456304": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "5168719682914827724": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "9473263513191498949": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13461678175466315866": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5434387853485184980": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1580848418974169308": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6784038318046980185": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6248879028648699716": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "1436424324238684653": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13189391944650202330": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "1199836165181399413": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5850612837647497531": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14740129361300854586": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5500102903434438965": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "7297288884568452370": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5136459381906620211": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17411381157694639837": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8553537608760917592": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12734736056404146766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "706526643700857104": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14937087468947592213": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10242452169628899571": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16629319403227634487": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3072344987020666532": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "5932710369376133446": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "15493383292734604744": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5089311900051393846": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8721087995946196075": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14614506535270942373": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1289727743091243002": ["convolution_gpu_bfyx_os_iyx_osv16", 665],
+        "18141581865855554514": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16956102699411887521": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11526253915485637934": ["convolution_gpu_bfyx_os_iyx_osv16", 620],
+        "15696872908795836832": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15332512198621601617": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5702206454207934253": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "15414564531144316178": ["convolution_gpu_bfyx_gemm_like", 2],
+        "386448290084824203": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15390537225231495870": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10038180349007230302": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "6817180081986948843": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1527649565538821618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7004336584711849988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2157468701794819044": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "15920115680945815097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 733],
+        "17778554668592635168": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "6999571050665340986": ["convolution_gpu_bfyx_os_iyx_osv16", 910],
+        "9879436330613366129": ["convolution_gpu_bfyx_gemm_like", 2],
+        "726019095679197164": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "1865317677339946921": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12018933315566840474": ["convolution_gpu_bfyx_os_iyx_osv16", 625],
+        "16124622994105864663": ["convolution_gpu_bfyx_os_iyx_osv16", 289],
+        "9495099584417616887": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "11735107098356940998": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15204384674852423405": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16866113149488400688": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15389774302738715375": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8101177730804364242": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "10149791427786334512": ["convolution_gpu_bfyx_os_iyx_osv16", 977],
+        "11053198857132396443": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "3963577328998759824": ["fully_connected_gpu_fb_oi_ref", 1],
+        "800184023925596362": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "13839532421033004873": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8262487256974801864": ["convolution_gpu_bfyx_os_iyx_osv16", 261],
+        "3693217331248996607": ["convolution_gpu_bfyx_os_iyx_osv16", 636],
+        "10388555096612441710": ["convolution_gpu_bfyx_os_iyx_osv16", 147],
+        "8892698757722619628": ["convolution_gpu_bfyx_os_iyx_osv16", 524],
+        "9606108204575763003": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "8449999818915991236": ["fully_connected_gpu_fb_io_ref", 0],
+        "6954046921635466236": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12133573113666871990": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18286924901612269315": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16168987643236739114": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17573344121250212662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8792004303945144557": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6055054188657886157": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16692293796070898202": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18377591093081814522": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7171735046681228890": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2461164836823254208": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14430129165479757357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14698972830975282413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "3479216436904445131": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5269956004669551826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13594976208424418204": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "12373590460058087695": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "4405236452109167503": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14132900527730577142": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1349033639465657142": ["convolution_gpu_bfyx_gemm_like", 2],
+        "812985719328060901": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12407276986845062239": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9170373506597510005": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1389904024718949479": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "11509503516680870396": ["convolution_gpu_bfyx_os_iyx_osv16", 860],
+        "3553844546517243430": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "11739050017164389431": ["convolution_gpu_bfyx_os_iyx_osv16", 216],
+        "14683616789766294266": ["convolution_gpu_bfyx_os_iyx_osv16", 362],
+        "1178443422000627700": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "3959894501921049830": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "6268257722565030993": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8104007721367839894": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11004242349744689661": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1104],
+        "18331651243656907622": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "165832937834890614": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13820132527548818114": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "11494973886338256684": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9562717353252171645": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15182874743616431755": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11923231799522030843": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "7212742683076043022": ["convolution_gpu_bfyx_os_iyx_osv16", 597],
+        "1535659774314187616": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9077124630226762093": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "10707439442194349922": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "13670707208998927662": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11898738546265963886": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7218310781442328740": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 577],
+        "17307988793370069255": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "3159313229944494871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2202381460552007272": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1038],
+        "4539543204582046751": ["convolution_gpu_bfyx_os_iyx_osv16", 267],
+        "2922645767583925625": ["convolution_gpu_bfyx_os_iyx_osv16", 151],
+        "3726173595578668243": ["convolution_gpu_bfyx_f16", 8],
+        "1069242824083103727": ["convolution_gpu_bfyx_f16", 8],
+        "10139803717927136766": ["convolution_gpu_bfyx_f16", 8],
+        "10426525571408284384": ["convolution_gpu_bfyx_f16", 8],
+        "6036447764961737632": ["convolution_gpu_bfyx_f16", 8],
+        "16859712173301423348": ["convolution_gpu_bfyx_f16", 8],
+        "4950939249231517650": ["convolution_gpu_bfyx_f16", 8],
+        "15428640534166306063": ["convolution_gpu_bfyx_f16", 8],
+        "12539440450141711052": ["convolution_gpu_bfyx_f16", 8],
+        "4694865878411993051": ["convolution_gpu_bfyx_f16", 8],
+        "7855581105034231853": ["convolution_gpu_bfyx_f16", 8],
+        "16357120378854173738": ["convolution_gpu_bfyx_f16", 8],
+        "9788176856201644185": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3526857091962358658": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9524927752153133377": ["convolution_gpu_bfyx_f16", 5],
+        "967593872851912083": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8544250266821361254": ["convolution_gpu_bfyx_f16", 5],
+        "14702583823206509221": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6562594370920553562": ["convolution_gpu_bfyx_f16", 5],
+        "4871626169134099270": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4306257530819109379": ["convolution_gpu_bfyx_f16", 5],
+        "13097490329579729355": ["convolution_gpu_bfyx_f16", 5],
+        "7536472342317469819": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17240729682157914878": ["convolution_gpu_bfyx_f16", 5],
+        "4338687769151300794": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9217611707355973890": ["convolution_gpu_bfyx_f16", 5],
+        "16565126239389697019": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9706046427344615745": ["convolution_gpu_bfyx_f16", 5],
+        "8724624785920420532": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3678291868919586746": ["convolution_gpu_bfyx_f16", 5],
+        "357806365552700839": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13063387805113848039": ["convolution_gpu_bfyx_f16", 5],
+        "1557184360709050836": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8608461026786312785": ["convolution_gpu_bfyx_f16", 5],
+        "9987273496502066597": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "64106675123073412": ["convolution_gpu_bfyx_f16", 5],
+        "4220695701755939736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12963348434542940033": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16181124988724765560": ["convolution_gpu_bfyx_f16", 5],
+        "346998321908284784": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2318421272788358186": ["convolution_gpu_bfyx_f16", 5],
+        "15927802155084275629": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8773070973133375779": ["convolution_gpu_bfyx_f16", 5],
+        "9940763571380473237": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16277913671917468663": ["convolution_gpu_bfyx_f16", 5],
+        "1474918596978458534": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2186150200961617234": ["convolution_gpu_bfyx_f16", 5],
+        "10577259940464718041": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10352584043544857764": ["convolution_gpu_bfyx_f16", 5],
+        "9144746358156959840": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13301166545153738930": ["convolution_gpu_bfyx_f16", 5],
+        "10753675657145151848": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10604750453275830911": ["convolution_gpu_bfyx_f16", 5],
+        "9243411386937443096": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12042818423431873035": ["convolution_gpu_bfyx_f16", 5],
+        "6683976234770455967": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6298190398591064450": ["convolution_gpu_bfyx_f16", 5],
+        "17196237025206156806": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5853381784506376944": ["convolution_gpu_bfyx_f16", 5],
+        "7339440798895952661": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "309066171876496786": ["convolution_gpu_bfyx_f16", 5],
+        "17843616251377971109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12223137580096133095": ["convolution_gpu_bfyx_f16", 5],
+        "7577659638199402167": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "565723015051709107": ["convolution_gpu_bfyx_f16", 5],
+        "14416887345595384816": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13314165049380641802": ["convolution_gpu_bfyx_f16", 5],
+        "7520511107200802065": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11534561269762454076": ["convolution_gpu_bfyx_f16", 5],
+        "10368570488453413379": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15747873854346463294": ["convolution_gpu_bfyx_f16", 5],
+        "7824157744505687913": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5462648317757708951": ["convolution_gpu_bfyx_f16", 5],
+        "3493741914954272091": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18286084829637877271": ["convolution_gpu_bfyx_f16", 5],
+        "260499864874634958": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10167218530612525698": ["convolution_gpu_bfyx_f16", 5],
+        "11647470184823377234": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6976222743405170101": ["convolution_gpu_bfyx_f16", 5],
+        "7655642513340250684": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "2708987188750383204": ["convolution_gpu_bfyx_f16", 5],
+        "3147813143325864684": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13481932492220060429": ["convolution_gpu_bfyx_f16", 5],
+        "8069058927528586404": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "9624255156096106627": ["convolution_gpu_bfyx_f16", 5],
+        "17730913632234504096": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11384790797228210583": ["convolution_gpu_bfyx_f16", 7],
+        "16177287431434086806": ["convolution_gpu_bfyx_f16", 1],
+        "2990533830830456778": ["convolution_gpu_bfyx_f16", 6],
+        "8610276394762287397": ["convolution_gpu_bfyx_f16", 0],
+        "14889103084722200470": ["convolution_gpu_bfyx_f16", 6],
+        "1845895244697890167": ["convolution_gpu_bfyx_f16", 1],
+        "9079010613051503735": ["convolution_gpu_bfyx_f16", 6],
+        "12061818277351885597": ["convolution_gpu_bfyx_f16", 0],
+        "9390843066348290833": ["convolution_gpu_bfyx_f16", 7],
+        "10509352827759959818": ["convolution_gpu_bfyx_f16", 1],
+        "7121505015354362475": ["convolution_gpu_bfyx_f16", 7],
+        "3145839553769702558": ["convolution_gpu_bfyx_f16", 0],
+        "9437978197962731993": ["convolution_gpu_bfyx_f16", 7],
+        "16274951933822979821": ["convolution_gpu_bfyx_f16", 0],
+        "14030311264395486109": ["convolution_gpu_bfyx_f16", 6],
+        "6745402198112522691": ["convolution_gpu_bfyx_f16", 2],
+        "17535374606849768070": ["convolution_gpu_bfyx_f16", 6],
+        "13107074908777587001": ["convolution_gpu_bfyx_f16", 1],
+        "12441704244463007888": ["convolution_gpu_bfyx_f16", 6],
+        "9830487478445609618": ["convolution_gpu_bfyx_f16", 0],
+        "2607686439369816702": ["convolution_gpu_bfyx_f16", 7],
+        "11952384679771234258": ["convolution_gpu_bfyx_f16", 0],
+        "3189741427811982954": ["convolution_gpu_bfyx_f16", 7],
+        "7501115822974560125": ["convolution_gpu_bfyx_f16", 1],
+        "5461533362170148981": ["convolution_gpu_bfyx_f16", 6],
+        "10622846706558433994": ["convolution_gpu_bfyx_f16", 1],
+        "14985143127047962687": ["convolution_gpu_bfyx_f16", 6],
+        "9631129065088682473": ["convolution_gpu_bfyx_f16", 1],
+        "9287906640814562678": ["convolution_gpu_bfyx_f16", 7],
+        "10312813290107807302": ["convolution_gpu_bfyx_f16", 0],
+        "12443171163993705676": ["convolution_gpu_bfyx_f16", 6],
+        "3168498630594159758": ["convolution_gpu_bfyx_f16", 1],
+        "1224004372693674977": ["convolution_gpu_bfyx_f16", 6],
+        "16142734280696556211": ["convolution_gpu_bfyx_f16", 8],
+        "635140168178230171": ["convolution_gpu_bfyx_f16", 8],
+        "17935287735372634102": ["convolution_gpu_bfyx_f16", 8],
+        "15817877524852645836": ["convolution_gpu_bfyx_f16", 8],
+        "10065955805093424080": ["convolution_gpu_bfyx_f16", 8],
+        "11821370621780817632": ["convolution_gpu_bfyx_f16", 8],
+        "677921946529877110": ["convolution_gpu_bfyx_f16", 8],
+        "5361664571196670427": ["convolution_gpu_bfyx_f16", 8],
+        "2901538337520242272": ["convolution_gpu_bfyx_f16", 8],
+        "5581843211058265455": ["convolution_gpu_bfyx_f16", 8],
+        "217667049553318429": ["convolution_gpu_bfyx_f16", 8],
+        "5337496722551766654": ["convolution_gpu_bfyx_f16", 8],
+        "52740663361396709": ["convolution_gpu_bfyx_f16", 8],
+        "6991371618000668418": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2326385631302475177": ["convolution_gpu_bfyx_f16", 5],
+        "8721996744048476299": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "453498137980697662": ["convolution_gpu_bfyx_f16", 5],
+        "15807266772870766609": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6553421087532441250": ["convolution_gpu_bfyx_f16", 5],
+        "12573289076827071790": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8853947103468767323": ["convolution_gpu_bfyx_f16", 5],
+        "6453143304950619430": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1775677589702924323": ["convolution_gpu_bfyx_f16", 5],
+        "16761512340234377511": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2448165393673590598": ["convolution_gpu_bfyx_f16", 5],
+        "11041313275514857930": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8908290078256179450": ["convolution_gpu_bfyx_f16", 5],
+        "6872057470208040983": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3462663905986148169": ["convolution_gpu_bfyx_f16", 5],
+        "9998472323723395768": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9695005447848657794": ["convolution_gpu_bfyx_f16", 5],
+        "864050420562880191": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16884753149447117871": ["convolution_gpu_bfyx_f16", 5],
+        "9413300293443003372": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9584473138046573481": ["convolution_gpu_bfyx_f16", 5],
+        "17226124546002868085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5703305874425530284": ["convolution_gpu_bfyx_f16", 5],
+        "16357533604618943588": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8568882981604412701": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6735600860810305128": ["convolution_gpu_bfyx_f16", 7],
+        "9976345793999587972": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15346869959264738522": ["convolution_gpu_bfyx_f16", 6],
+        "18151038936580799249": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11956105843463290323": ["convolution_gpu_bfyx_f16", 7],
+        "2197043795215802833": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7837223160972083111": ["convolution_gpu_bfyx_f16", 7],
+        "17991319065386721750": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8684426249485914306": ["convolution_gpu_bfyx_f16", 6],
+        "15440765487742350713": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "4006988924644151380": ["convolution_gpu_bfyx_f16", 6],
+        "1165323482766442288": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6216179328027568162": ["convolution_gpu_bfyx_f16", 7],
+        "5085232160533811804": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5560503476513957999": ["convolution_gpu_bfyx_f16", 7],
+        "11899886655444339788": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "8035035668897300219": ["convolution_gpu_bfyx_f16", 6],
+        "15531280953380757927": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5417611188973238514": ["convolution_gpu_bfyx_f16", 6],
+        "13845305820052266938": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "156328565120552800": ["convolution_gpu_bfyx_f16", 6],
+        "15783591814248428053": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5753913342838023682": ["convolution_gpu_bfyx_f16", 6],
+        "3207990305547692029": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18084824492918706199": ["convolution_gpu_bfyx_f16", 6],
+        "8033743776899693075": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "243712386211233379": ["convolution_gpu_bfyx_f16", 7],
+        "2965177266959923348": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "13237451337340946362": ["convolution_gpu_bfyx_f16", 7],
+        "9188120772772842413": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "1249134296559537004": ["convolution_gpu_bfyx_f16", 7],
+        "6776437678382831419": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "9140223146321937006": ["convolution_gpu_bfyx_f16", 6],
+        "7509732267784929557": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9869335174149535367": ["convolution_gpu_bfyx_f16", 6],
+        "15410089184813419927": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "12736591082694609735": ["convolution_gpu_bfyx_f16", 7],
+        "10111465201148839782": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6977012639021700914": ["convolution_gpu_bfyx_f16", 6],
+        "10452382209692659038": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "13099335757796409253": ["convolution_gpu_bfyx_f16", 6],
+        "8355446198162136384": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6457714394569252436": ["convolution_gpu_bfyx_f16", 6],
+        "1870949498151438396": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6325249952936664765": ["convolution_gpu_bfyx_f16", 6],
+        "4283372428897156128": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "15284708683366527091": ["convolution_gpu_bfyx_f16", 7],
+        "12367140420770161260": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17302868757320805407": ["convolution_gpu_bfyx_f16", 6],
+        "12812798569408798714": ["convolution_gpu_bfyx_f16", 3],
+        "18027642894783121874": ["convolution_gpu_bfyx_f16", 6],
+        "3766561909462900481": ["convolution_gpu_bfyx_f16", 4],
+        "8126433884587687354": ["convolution_gpu_bfyx_f16", 6],
+        "7431739774665400867": ["convolution_gpu_bfyx_f16", 4],
+        "15213968303698655071": ["convolution_gpu_bfyx_f16", 6],
+        "1895954773577076065": ["convolution_gpu_bfyx_f16", 4],
+        "10820634669412096693": ["convolution_gpu_bfyx_f16", 6],
+        "9105871040526273510": ["convolution_gpu_bfyx_f16", 4],
+        "6253056982440997971": ["convolution_gpu_bfyx_f16", 7],
+        "14271936409538632354": ["convolution_gpu_bfyx_f16", 4],
+        "7830723669305086809": ["convolution_gpu_bfyx_f16", 7],
+        "16905205856195133489": ["convolution_gpu_bfyx_f16", 4],
+        "17744780595721014433": ["convolution_gpu_bfyx_f16", 7],
+        "1185658428449577287": ["convolution_gpu_bfyx_f16", 4],
+        "4322844512730914538": ["convolution_gpu_bfyx_f16", 7],
+        "8559998096869077061": ["convolution_gpu_bfyx_f16", 4],
+        "12935328860605637188": ["convolution_gpu_bfyx_f16", 7],
+        "17826095303533956022": ["convolution_gpu_bfyx_f16", 3],
+        "6059064882469521870": ["convolution_gpu_bfyx_f16", 7],
+        "17987726224817029150": ["convolution_gpu_bfyx_f16", 4],
+        "1752617074755449766": ["convolution_gpu_bfyx_f16", 6],
+        "1147527648969475665": ["convolution_gpu_bfyx_f16", 3],
+        "336079374726362009": ["convolution_gpu_bfyx_f16", 6],
+        "3956037701575034246": ["convolution_gpu_bfyx_f16", 3],
+        "9177200416044551211": ["convolution_gpu_bfyx_f16", 7],
+        "3580337905402094261": ["convolution_gpu_bfyx_f16", 4],
+        "8657404564308325878": ["convolution_gpu_bfyx_f16", 6],
+        "9660551017019324634": ["convolution_gpu_bfyx_f16", 4],
+        "2283387892607580344": ["convolution_gpu_bfyx_f16", 7],
+        "9757276965383246450": ["convolution_gpu_bfyx_f16", 4],
+        "5662627047941545281": ["convolution_gpu_bfyx_f16", 7],
+        "14491983419826529399": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "11866343372130060111": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "3750595711145201146": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "555112033233919049": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "9449916193007510499": ["fully_connected_gpu_bf_io_gemm", 1],
+        "821153009898835283": ["fully_connected_gpu_bf_io_gemm", 1],
+        "10018756206737727294": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5830779024517851317": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "7913817244562964901": ["convolution_gpu_bfyx_f16", 8],
+        "11779589567746893119": ["convolution_gpu_bfyx_f16", 8],
+        "5287441936829096354": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "16879635677321458783": ["convolution_gpu_bfyx_f16", 8],
+        "5936894667802097344": ["convolution_gpu_bfyx_f16", 8],
+        "12029555773381953470": ["convolution_gpu_bfyx_f16", 8],
+        "1395714970525756800": ["convolution_gpu_bfyx_f16", 5],
+        "18366381433142273315": ["convolution_gpu_bfyx_f16", 8],
+        "17839315025229585473": ["convolution_gpu_bfyx_f16", 8],
+        "7428339090190576585": ["convolution_gpu_bfyx_f16", 8],
+        "16427721132197847241": ["convolution_gpu_bfyx_f16", 8],
+        "929038963682864275": ["convolution_gpu_bfyx_f16", 8],
+        "6348679735483401866": ["convolution_gpu_bfyx_f16", 8],
+        "17409943223289937333": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10896472785943286419": ["convolution_gpu_bfyx_f16", 8],
+        "8675423965229942895": ["convolution_gpu_bfyx_f16", 8],
+        "15359653790909326580": ["convolution_gpu_bfyx_f16", 5],
+        "937772044105590355": ["convolution_gpu_bfyx_f16", 5],
+        "11630003841984891663": ["convolution_gpu_bfyx_f16", 8],
+        "15721323944762357421": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "18032560040713612222": ["convolution_gpu_bfyx_f16", 5],
+        "16185581163541386950": ["convolution_gpu_bfyx_f16", 5],
+        "7296460872108123423": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18375557444371775299": ["convolution_gpu_bfyx_f16", 5],
+        "10922059457537054563": ["convolution_gpu_bfyx_f16", 5],
+        "122295605901184339": ["convolution_gpu_bfyx_f16", 4],
+        "12164250230746861951": ["convolution_gpu_bfyx_f16", 3],
+        "1984025014517619256": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7957167898986800985": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "412995552853553524": ["convolution_gpu_bfyx_f16", 8],
+        "7058232330882130703": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15549425900373079382": ["convolution_gpu_bfyx_f16", 8],
+        "2713038204741622907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1878980012173918209": ["convolution_gpu_bfyx_f16", 8],
+        "12468208151780727122": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6674575974748163031": ["convolution_gpu_bfyx_f16", 8],
+        "5591111867402032949": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3413916493145831316": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12421615174911349736": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689084255978323672": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12474210147973914830": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14174889288973953645": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18224887830367116006": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16049847963625476676": ["convolution_gpu_bfyx_os_iyx_osv16", 993],
+        "3817623781909159313": ["convolution_gpu_bfyx_f16", 8],
+        "3004968067582685285": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6876765637331622545": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6802301901709446085": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13245964863324091195": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "953254263392356310": ["convolution_gpu_bfyx_f16", 4],
+        "5388858533648189105": ["convolution_gpu_bfyx_f16", 5],
+        "3226238265868290723": ["convolution_gpu_bfyx_f16", 7],
+        "10098858620420134682": ["convolution_gpu_bfyx_f16", 6],
+        "18308172581381789101": ["convolution_gpu_bfyx_f16", 3],
+        "12846183737006963638": ["convolution_gpu_bfyx_f16", 7],
+        "8746233054079242877": ["convolution_gpu_bfyx_f16", 3],
+        "7516276889336424671": ["convolution_gpu_bfyx_f16", 5],
+        "8240661672477348007": ["convolution_gpu_bfyx_f16", 0],
+        "7421142512620741721": ["convolution_gpu_bfyx_f16", 6],
+        "17095633565672192085": ["convolution_gpu_bfyx_f16", 6],
+        "7381046541836362634": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7006663637645720459": ["convolution_gpu_bfyx_f16", 3],
+        "554667746487334145": ["convolution_gpu_bfyx_f16", 4],
+        "1899794088311416867": ["convolution_gpu_bfyx_f16", 0],
+        "4461871297663195464": ["convolution_gpu_bfyx_f16", 2],
+        "845238018552466931": ["convolution_gpu_bfyx_f16", 2],
+        "13889057206654080908": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2813710942447372241": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13633232435632839044": ["convolution_gpu_bfyx_f16", 8],
+        "2883172178329270363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9432546329737888706": ["convolution_gpu_bfyx_f16", 8],
+        "12985746913235154779": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17940668702908419725": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2064000219100642226": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5833649709217830223": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10849235794440642481": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6321445979984216128": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14697315322325185660": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "36079357617783912": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "4063865474431180498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13167503358764278233": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17498603449428007802": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "6304136029727027056": ["convolution_gpu_bfyx_os_iyx_osv16", 995],
+        "1754448782405089213": ["convolution_gpu_bfyx_f16", 7],
+        "15489166244290113065": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5756918986564223629": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8035545676843269497": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17042017278300937839": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11260048813076144906": ["convolution_gpu_bfyx_f16", 4],
+        "6873924247641352061": ["convolution_gpu_bfyx_f16", 4],
+        "6474957215284027135": ["convolution_gpu_bfyx_f16", 7],
+        "16573724507496129614": ["convolution_gpu_bfyx_f16", 6],
+        "11210971373278055121": ["convolution_gpu_bfyx_f16", 2],
+        "185717560970701618": ["convolution_gpu_bfyx_f16", 6],
+        "11817410866221484993": ["convolution_gpu_bfyx_f16", 3],
+        "9765519004693711463": ["convolution_gpu_bfyx_f16", 6],
+        "14300671725579588671": ["convolution_gpu_bfyx_f16", 5],
+        "1297549572559338433": ["convolution_gpu_bfyx_f16", 5],
+        "4346210823986581329": ["convolution_gpu_bfyx_f16", 4],
+        "2750608965765787878": ["convolution_gpu_bfyx_f16", 3],
+        "14245442283142381063": ["convolution_gpu_bfyx_f16", 6],
+        "2942593456597250269": ["convolution_gpu_bfyx_f16", 7],
+        "14807774261203767931": ["convolution_gpu_bfyx_f16", 6],
+        "2024891861044519704": ["convolution_gpu_bfyx_f16", 6],
+        "12988352411577718659": ["convolution_gpu_bfyx_f16", 6],
+        "2856387545805299627": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "6931984251726006059": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "10053897550646291639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "166522152877705111": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8194080531314571831": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "8462596687449136841": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16641148739441654579": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3012332306785177280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1667559253581127345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17950962563816983793": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15920581282829793263": ["convolution_gpu_bfyx_os_iyx_osv16", 126],
+        "4931844549089354374": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11227326613484178737": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "8926339988827333993": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14947161471102583853": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7959005479751426244": ["convolution_gpu_bfyx_os_iyx_osv16", 913],
+        "13876295120508241721": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "5450799298000231966": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "745049678230480319": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17799305583546345514": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "15448134419455024563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10247046915015701375": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "818326236814735107": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11621993279519931789": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10879300979808656559": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "8049787711095084959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8361191677655973935": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "6455326407035817823": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "4549875381866576113": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14780479128645572595": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "9221666339438514459": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "17091218700152862273": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9951123692498529061": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "15226633731441516361": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "4453349487216529991": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "17929115705990268026": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6621532750524834097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "16562571407098459049": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "2873284221161386597": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3769897639705493224": ["convolution_gpu_bfyx_os_iyx_osv16", 174],
+        "5447803100312758964": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 926],
+        "9163238347824560017": ["convolution_gpu_bfyx_os_iyx_osv16", 542],
+        "1688979903294911182": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9338092674592431198": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 912],
+        "15522545626077485199": ["convolution_gpu_bfyx_os_iyx_osv16", 543],
+        "1797489112792772811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5478531388148194783": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "3289369122755371980": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 84],
+        "14572382016053496602": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "16841168676076935693": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18407347961782182453": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 83],
+        "8695092335925023399": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "14168685794682021826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12423218459706339590": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8734189831526420226": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14362182205968229036": ["convolution_gpu_bfyx_os_iyx_osv16", 517],
+        "13157476677873103938": ["convolution_gpu_bfyx_os_iyx_osv16", 891],
+        "11940005480315119153": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "1302512649939808216": ["convolution_gpu_bfyx_os_iyx_osv16", 500],
+        "16919811480058643640": ["convolution_gpu_bfyx_os_iyx_osv16", 132],
+        "5208084625746441471": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8262549900448065079": ["convolution_gpu_bfyx_os_iyx_osv16", 46],
+        "5227665249672396809": ["convolution_gpu_bfyx_os_iyx_osv16", 799],
+        "4933328578946081154": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13882747247011638614": ["convolution_gpu_bfyx_os_iyx_osv16", 178],
+        "814582084353022226": ["convolution_gpu_bfyx_os_iyx_osv16", 178],
+        "4844820846457555156": ["convolution_gpu_bfyx_os_iyx_osv16", 887],
+        "6607603202773469786": ["convolution_gpu_bfyx_os_iyx_osv16", 138],
+        "15439502814859116813": ["convolution_gpu_bfyx_os_iyx_osv16", 890],
+        "15777107988701235428": ["convolution_gpu_bfyx_os_iyx_osv16", 501],
+        "12832042711454018844": ["convolution_gpu_bfyx_os_iyx_osv16", 508],
+        "6099745418702030715": ["convolution_gpu_bfyx_os_iyx_osv16", 132],
+        "4230880085403638923": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "62516450676185117": ["convolution_gpu_bfyx_os_iyx_osv16", 422],
+        "5477965717233241895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13669762279828807941": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11383807956757990177": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660099130061496863": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17151683028720387864": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1859914910272455189": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7396998153023492339": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2008700175670389343": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16827869183124732303": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13120889385491477637": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18305507733019922935": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4387964680811897490": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9490382148010824252": ["convolution_gpu_bfyx_os_iyx_osv16", 293],
+        "7607585452987307694": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6647358668213164168": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "3269426835760928022": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "8407302923973070317": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7392260165026897157": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "17129583679506972654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15394113208725741887": ["convolution_gpu_bfyx_os_iyx_osv16", 535],
+        "2232515974555590822": ["convolution_gpu_bfyx_os_iyx_osv16", 458],
+        "7051704960834828963": ["convolution_gpu_bfyx_os_iyx_osv16", 494],
+        "5849502570947855625": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17320230733736402509": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14376448497282593859": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11059091112167439040": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11073613812342958769": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4154541958145867375": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11497596156215746295": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7989457597882264703": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11224449857742374449": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8019330764912846895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8298488609133255406": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14393217564854520848": ["convolution_gpu_bfyx_os_iyx_osv16", 251],
+        "3141906957984957990": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8411633870815503324": ["convolution_gpu_bfyx_os_iyx_osv16", 1006],
+        "17289238208820562994": ["convolution_gpu_bfyx_os_iyx_osv16", 254],
+        "17376882838565917025": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "376447867595880925": ["convolution_gpu_bfyx_os_iyx_osv16", 905],
+        "9223591734176279618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2123481240130017671": ["convolution_gpu_bfyx_os_iyx_osv16", 908],
+        "60262519627721258": ["convolution_gpu_bfyx_os_iyx_osv16", 773],
+        "2779831597589397721": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14888498856025675875": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13008816286946828339": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14472562307183930494": ["convolution_gpu_bfyx_os_iyx_osv16", 508],
+        "12260051528344627305": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "12237139830764526217": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "12839904859734107448": ["convolution_gpu_bfyx_os_iyx_osv16", 604],
+        "2557331839687658350": ["convolution_gpu_bfyx_os_iyx_osv16", 3],
+        "14711934417369240383": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "1599135987505067413": ["convolution_gpu_bfyx_gemm_like", 2],
+        "88960405449779079": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "3983071771155729815": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "4686928543634340294": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "9500201961536063781": ["convolution_gpu_bfyx_os_iyx_osv16", 500],
+        "5626617363814193337": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "9493629616033946504": ["convolution_gpu_bfyx_os_iyx_osv16", 1018],
+        "9142997105687030758": ["convolution_gpu_bfyx_os_iyx_osv16", 754],
+        "3565303211593767799": ["convolution_gpu_bfyx_os_iyx_osv16", 756],
+        "2027062613896109334": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2494989528221736054": ["convolution_gpu_bfyx_f16", 8],
+        "10481457184081052557": ["convolution_gpu_bfyx_f16", 8],
+        "17843566914419305583": ["convolution_gpu_bfyx_f16", 8],
+        "10440359951914302042": ["convolution_gpu_bfyx_f16", 5],
+        "12355534646291322950": ["convolution_gpu_bfyx_f16", 5],
+        "1312046147551402733": ["convolution_gpu_bfyx_f16", 6],
+        "17747064821498992452": ["convolution_gpu_bfyx_f16", 4],
+        "15727623554601964014": ["convolution_gpu_bfyx_f16", 1],
+        "1123438482147655288": ["convolution_gpu_bfyx_f16", 5],
+        "7126696940487701707": ["convolution_gpu_bfyx_f16", 3],
+        "3872390202906772826": ["convolution_gpu_bfyx_f16", 7],
+        "2880589787553789663": ["convolution_gpu_bfyx_os_iyx_osv16", 48],
+        "11290368603402236066": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1323592601201034234": ["convolution_gpu_bfyx_f16", 8],
+        "14798486770850675841": ["convolution_gpu_bfyx_f16", 8],
+        "11673314628747753691": ["convolution_gpu_bfyx_f16", 6],
+        "7021961511624638678": ["convolution_gpu_bfyx_f16", 7],
+        "5676198353742450430": ["convolution_gpu_bfyx_f16", 7],
+        "4929819810689803833": ["convolution_gpu_bfyx_f16", 3],
+        "240316590146675808": ["convolution_gpu_bfyx_f16", 4],
+        "17625565940895057722": ["convolution_gpu_bfyx_f16", 3],
+        "8688075088415087060": ["convolution_gpu_bfyx_f16", 1],
+        "3109943868702160503": ["convolution_gpu_bfyx_f16", 4],
+        "15650217867869430450": ["convolution_gpu_bfyx_f16", 8],
+        "17908144598228512507": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "18154134293896237020": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9604863051097029874": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "12931069967038668164": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "6806199908367808607": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11683146685348965370": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8154297486284619437": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14336744408490491240": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "4571901717343198720": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6532394816830144120": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2666796249274140911": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "11653606109120321972": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "6204893434840435239": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "13218364348439640168": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "10201555771333451359": ["convolution_gpu_bfyx_os_iyx_osv16", 1022],
         "10298865798559508430": ["convolution_gpu_bfyx_gemm_like", 2],
         "8036745915261696332": ["convolution_gpu_bfyx_gemm_like", 2],
         "10569376024770516176": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
         "9267895309185863228": ["convolution_gpu_bfyx_os_iyx_osv16", 1]
     },
     "12": {
+        "16441539315228762959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10503311838004746406": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5981205170754513046": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10926555569182284101": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13694948197248278243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3025829117046314851": ["convolution_gpu_bfyx_os_iyx_osv16", 107],
+        "14600403613863348033": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11071652997326968925": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "9466249274834206569": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "18144397193643864406": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17167052658616496904": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "758159154291645307": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "14555191501995137081": ["fully_connected_gpu_bf_io_gemm", 1],
+        "3057483147285040704": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "4460662214292495759": ["convolution_gpu_bfyx_f16", 8],
+        "17632851940131114495": ["convolution_gpu_bfyx_f16", 8],
+        "7945867532035693686": ["convolution_gpu_bfyx_f16", 8],
+        "10798155343477437060": ["convolution_gpu_bfyx_f16", 8],
+        "14191150640021059705": ["convolution_gpu_bfyx_f16", 8],
+        "14593228968660512118": ["convolution_gpu_bfyx_f16", 8],
+        "5573515532668433114": ["convolution_gpu_bfyx_f16", 8],
+        "11642934660277782628": ["convolution_gpu_bfyx_f16", 8],
+        "4825553592910970555": ["convolution_gpu_bfyx_f16", 8],
+        "17245530055973419690": ["convolution_gpu_bfyx_f16", 8],
+        "14644519840111409049": ["convolution_gpu_bfyx_f16", 8],
+        "15093112872571669071": ["convolution_gpu_bfyx_f16", 8],
+        "6172925429706792586": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16888914688498810916": ["convolution_gpu_bfyx_f16", 8],
+        "7094210524110336636": ["convolution_gpu_bfyx_f16", 8],
+        "1102719880087191972": ["convolution_gpu_bfyx_f16", 6],
+        "17635368969132641763": ["convolution_gpu_bfyx_f16", 8],
+        "6444855710931944326": ["convolution_gpu_bfyx_f16", 8],
+        "3685203889040861337": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8983142397488339162": ["convolution_gpu_bfyx_f16", 8],
+        "2942771097961823034": ["convolution_gpu_bfyx_f16", 8],
+        "16912834065670733738": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "2419223013209835757": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "11179881900554989521": ["convolution_gpu_bfyx_f16", 8],
+        "16511126264743737451": ["convolution_gpu_bfyx_f16", 8],
+        "10100289629103173958": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "9258363108725341315": ["convolution_gpu_bfyx_f16", 8],
+        "13334138861096017540": ["convolution_gpu_bfyx_f16", 8],
+        "6513616579637283618": ["convolution_gpu_bfyx_f16", 7],
+        "881483878813237044": ["convolution_gpu_bfyx_f16", 8],
+        "9696420455787045679": ["convolution_gpu_bfyx_f16", 8],
+        "7480696988694183789": ["convolution_gpu_bfyx_f16", 8],
+        "9560848299493464065": ["convolution_gpu_bfyx_f16", 8],
+        "4670244085889208769": ["convolution_gpu_bfyx_f16", 8],
+        "11349612635173553035": ["convolution_gpu_bfyx_f16", 8],
+        "6259794269666057674": ["convolution_gpu_bfyx_f16", 8],
+        "5786551708845072629": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16619951395310930207": ["convolution_gpu_bfyx_f16", 8],
+        "3173655881192997611": ["convolution_gpu_bfyx_f16", 8],
+        "6211510258514141464": ["convolution_gpu_bfyx_f16", 5],
+        "14941982212174570311": ["convolution_gpu_bfyx_f16", 5],
+        "11364624703533653571": ["convolution_gpu_bfyx_f16", 8],
+        "338313831905889757": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13154424438571292174": ["convolution_gpu_bfyx_f16", 5],
+        "14845639704528269654": ["convolution_gpu_bfyx_f16", 5],
+        "12200202041476611175": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "14166499608250271507": ["convolution_gpu_bfyx_f16", 5],
+        "13694208494559240243": ["convolution_gpu_bfyx_f16", 8],
+        "14476260143987433871": ["convolution_gpu_bfyx_f16", 5],
+        "6145395374917324923": ["convolution_gpu_bfyx_f16", 5],
+        "2094686947151722271": ["convolution_gpu_bfyx_os_iyx_osv16", 78],
+        "11589833946098195323": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "11775116692122787310": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "570493430126610249": ["fully_connected_gpu_bs_f_bsv16_b1", 1],
+        "17743072683947532579": ["fully_connected_gpu_bf_io_gemm", 0],
+        "18382443157447369363": ["fully_connected_gpu_bf_io_gemm", 0],
+        "5689213766720451736": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "11153755804932874939": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13074790088623248655": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14552950763379636885": ["convolution_gpu_bfyx_f16", 7],
+        "1094600023872583173": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16827633753490728058": ["convolution_gpu_bfyx_f16", 8],
+        "6130516122331504865": ["convolution_gpu_bfyx_f16", 8],
+        "7670629548971090825": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5029322578170351026": ["convolution_gpu_bfyx_f16", 8],
+        "11682717086936489649": ["convolution_gpu_bfyx_f16", 8],
+        "9372644596618467274": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14183733053550126939": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5642981720905097704": ["convolution_gpu_bfyx_f16", 5],
+        "3924580903671169312": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17700105511171786728": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14998223809620050073": ["convolution_gpu_bfyx_f16", 5],
+        "419201770890811765": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7063350782589593425": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10687898799916833174": ["convolution_gpu_bfyx_f16", 8],
+        "5341504900604548311": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8082311845702095517": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5769891345892528049": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5034821474694053994": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2717532297792072749": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "368628635269408785": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10159612784755046280": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15051374440521170869": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "17031332595095892437": ["convolution_gpu_bfyx_f16", 7],
+        "6938198718430530942": ["convolution_gpu_bfyx_f16", 8],
+        "2358029178760210430": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16131007134197705525": ["convolution_gpu_bfyx_f16", 7],
+        "6612035874395100788": ["convolution_gpu_bfyx_f16", 8],
+        "15022677981959490269": ["convolution_gpu_bfyx_f16", 8],
+        "11900509609879947992": ["convolution_gpu_bfyx_f16", 1],
+        "5035548158005038390": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3968445701280656378": ["convolution_gpu_bfyx_f16", 8],
+        "7463742252314920613": ["convolution_gpu_bfyx_f16", 8],
+        "17747065651432157057": ["convolution_gpu_bfyx_f16", 8],
+        "2951437417233062866": ["convolution_gpu_bfyx_f16", 8],
+        "4695031178096669813": ["convolution_gpu_bfyx_f16", 5],
+        "13200791011072363046": ["convolution_gpu_bfyx_f16", 5],
+        "7702483443698911725": ["convolution_gpu_bfyx_f16", 5],
+        "3225276687886679210": ["convolution_gpu_bfyx_f16", 7],
+        "8406061878298060171": ["convolution_gpu_bfyx_f16", 7],
+        "11861948300376902542": ["convolution_gpu_bfyx_f16", 7],
+        "18047654118875021903": ["convolution_gpu_bfyx_f16", 6],
+        "3876838946012690078": ["convolution_gpu_bfyx_f16", 7],
+        "11532811324432477051": ["convolution_gpu_bfyx_f16", 8],
+        "16482627014547828135": ["convolution_gpu_bfyx_f16", 7],
+        "4565106422618308814": ["convolution_gpu_bfyx_f16", 6],
+        "16991433003318725315": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "16286683168753184722": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7074368169815304627": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10702490327714920783": ["convolution_gpu_bfyx_gemm_like", 2],
+        "964168479107166949": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6252510766878541979": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1012052068628903875": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15499166167392043521": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14327383763442344255": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18371627210590255356": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13185859115957551268": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15052792752810689842": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17918808521142517830": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "1644157325342654261": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12198018126650448419": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9714393675511550323": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4928366179227934688": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15361605271135812199": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10267714663732575502": ["convolution_gpu_bfyx_1x1", 2],
+        "9990965405769569785": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10493403039286551634": ["convolution_gpu_bfyx_1x1", 2],
+        "18324310183763016728": ["convolution_gpu_bfyx_os_iyx_osv16", 458],
+        "6002923098500991259": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "3429780644945779272": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6005067060818453503": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3676547304316346974": ["convolution_gpu_bfyx_f16", 6],
+        "8412675332215210248": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "14477382651380138146": ["convolution_gpu_bfyx_f16", 8],
+        "15899888589766240554": ["convolution_gpu_bfyx_f16", 8],
+        "4529376177404929890": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "7210896246223636810": ["convolution_gpu_bfyx_f16", 8],
+        "2775471071662652034": ["convolution_gpu_bfyx_f16", 8],
+        "17132456912135683375": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "15563691660506818555": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9997402509928965207": ["convolution_gpu_bfyx_f16", 5],
+        "7793754164423097155": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4639865771698877244": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1766192115208251594": ["convolution_gpu_bfyx_f16", 8],
+        "2015853414727933068": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "10306264176864957825": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4871044181497936479": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8396548857016837452": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "12714653556587252941": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1967886437456544865": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11350907923254547441": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12282274184666824734": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16866941685634953173": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6312283149621718315": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9795822066940245604": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "7256380059517365529": ["convolution_gpu_bfyx_f16", 6],
+        "11966909558503849515": ["convolution_gpu_bfyx_f16", 8],
+        "11277466712159791917": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "4992371260504969141": ["convolution_gpu_bfyx_f16", 7],
+        "15043181455492553716": ["convolution_gpu_bfyx_f16", 7],
+        "8399107263382557054": ["convolution_gpu_bfyx_f16", 8],
+        "6350452055467384023": ["convolution_gpu_bfyx_f16", 7],
+        "14026570177552137240": ["convolution_gpu_bfyx_os_iyx_osv16", 974],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like", 2],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16", 1091],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16", 1074],
+        "4917807560042671575": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "44341776758472069": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "3584869801682702110": ["convolution_gpu_bfyx_os_iyx_osv16", 1075],
+        "13032463401326344281": ["convolution_gpu_bfyx_os_iyx_osv16", 339],
+        "12074020528214820344": ["convolution_gpu_bfyx_os_iyx_osv16", 329],
+        "10792368710075698135": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14773903272136532468": ["convolution_gpu_bfyx_os_iyx_osv16", 628],
+        "4459329337183571568": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "17247158622529817069": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 352],
+        "15248304664655540462": ["convolution_gpu_bfyx_os_iyx_osv16", 999],
+        "8737603244374483727": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6375630142791083064": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16951442326148701883": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "8824140014793073324": ["convolution_gpu_bfyx_os_iyx_osv16", 622],
+        "6420666457275061685": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18191060893922845906": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4914314319075651246": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2995522243104361971": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "12727854191946007642": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1104],
+        "3260693384502698965": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8599674766060889778": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8021852643758937492": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "2492924011838985637": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1105],
+        "4309855944835724499": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14741878965259218163": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12180612484034524170": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13300287078635373813": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13378751364754764186": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6449257695177020930": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "17627392788011440461": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "13831493475156855535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16483429728914404238": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "3860080842190932938": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12349486511618981663": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "15798538366019336375": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17142061595610833587": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "413520381980740601": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "15678637644328155655": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 357],
+        "6526747512277607691": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "16117940336643166742": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5991582579063082343": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "3294597200237228703": ["convolution_gpu_bfyx_os_iyx_osv16", 670],
+        "16191151963860109032": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4092109744625924274": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4849563739505810631": ["convolution_gpu_bfyx_gemm_like", 2],
+        "3411824370004173602": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "11418950537551203770": ["convolution_gpu_bfyx_to_bfyx_f16", 7],
+        "15344685054531225492": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14837032904820198149": ["convolution_gpu_bfyx_f16", 8],
+        "14191080790860851837": ["convolution_gpu_bfyx_f16", 8],
+        "17023834849779428858": ["convolution_gpu_bfyx_f16", 8],
+        "3329139872094988661": ["convolution_gpu_bfyx_f16", 8],
+        "4450424283454693457": ["convolution_gpu_bfyx_f16", 8],
+        "6264730897461114496": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16058636937964624617": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "499215221217528434": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14655897748934541342": ["convolution_gpu_bfyx_f16", 8],
+        "15662090780385020537": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7311728100823416883": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "7221666363928264914": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "949611108582310305": ["convolution_gpu_bfyx_f16", 8],
+        "398119457330194405": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "18306921825426259074": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "14045661362966364917": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "11211712695622132026": ["convolution_gpu_bfyx_f16", 8],
+        "13777550841624006577": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4765385132115618850": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "16898905631497333152": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "2688905295933725456": ["convolution_gpu_bfyx_f16", 7],
+        "10325568251605243952": ["convolution_gpu_bfyx_f16", 8],
+        "4697609485293892109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "15761571492230997960": ["convolution_gpu_bfyx_f16", 8],
+        "10403493618856101043": ["convolution_gpu_bfyx_f16", 7],
+        "15694677292906293678": ["convolution_gpu_bfyx_f16", 8],
+        "11385013883660304429": ["convolution_gpu_bfyx_f16", 6],
+        "8155797389244290087": ["convolution_gpu_bfyx_f16", 0],
+        "16706121580364790904": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5673972310424776040": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16", 859],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16", 698],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "4305463771822108179": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "2079318718874681198": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "17439941375453858836": ["convolution_gpu_bfyx_gemm_like", 1],
+        "12467583825605788345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "9058857190661793339": ["fused_conv_eltwise_gpu_ref", 2],
+        "11620974866622716017": ["fused_conv_eltwise_gpu_ref", 0],
+        "8857009061371774666": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5756084360647965669": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3975438095352877013": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3345987020362642539": ["convolution_gpu_bfyx_gemm_like", 0],
+        "2080318501154291605": ["fully_connected_gpu_bf_io_input_spatial", 2],
+        "13813582937323882369": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "11149782181562145291": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16", 1098],
+        "3526580286148537369": ["convolution_gpu_bfyx_os_iyx_osv16", 316],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16", 750],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16", 744],
+        "2877876834438717783": ["convolution_gpu_bfyx_os_iyx_osv16", 951],
+        "9156649014297448284": ["convolution_gpu_bfyx_os_iyx_osv16", 644],
+        "13660470643303663441": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "8081997809574506331": ["convolution_gpu_bfyx_os_iyx_osv16", 353],
+        "8199400320947837516": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "11460891889180307970": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5643924526605879168": ["convolution_gpu_bfyx_os_iyx_osv16", 723],
+        "14198463555297179999": ["convolution_gpu_bfyx_f16", 8],
+        "9820219997540294747": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16598220433310484103": ["convolution_gpu_bfyx_f16", 8],
+        "13332579082252874358": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "10148956417804060854": ["convolution_gpu_bfyx_f16", 8],
+        "16052199780545784176": ["convolution_gpu_bfyx_f16", 8],
+        "17284989371701058847": ["convolution_gpu_bfyx_os_iyx_osv16", 318],
+        "18186300610687882698": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "17323620992879479455": ["convolution_gpu_bfyx_f16", 8],
+        "10782643446733040985": ["convolution_gpu_bfyx_f16", 8],
+        "3080843366919845836": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16898206352994894714": ["convolution_gpu_bfyx_f16", 8],
+        "17502393571772755646": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12982233543299343225": ["convolution_gpu_bfyx_os_iyx_osv16", 89],
+        "5609871805820255743": ["fully_connected_gpu_bf_io_gemm", 2],
+        "7971259885907841252": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15956442448148612253": ["convolution_gpu_bfyx_os_iyx_osv16", 1069],
+        "7600980811977404651": ["convolution_gpu_bfyx_os_iyx_osv16", 984],
+        "6051578359778554994": ["convolution_gpu_bfyx_os_iyx_osv16", 1124],
+        "14591236937522474591": ["convolution_gpu_bfyx_os_iyx_osv16", 749],
+        "380671738106280681": ["convolution_gpu_bfyx_os_iyx_osv16", 371],
+        "16581313033870107357": ["convolution_gpu_bfyx_os_iyx_osv16", 1066],
+        "15132868076468531540": ["convolution_gpu_bfyx_os_iyx_osv16", 695],
+        "4964421818619633295": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "5206589624074157418": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "15804259593852912096": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "9667577643691138471": ["convolution_gpu_bfyx_f16", 8],
+        "9587296295017154035": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "624896425985287215": ["convolution_gpu_bfyx_f16", 8],
+        "13698491289625410930": ["convolution_gpu_bfyx_os_iyx_osv16", 303],
+        "5501294609610168354": ["convolution_gpu_bfyx_f16", 8],
+        "14684726385174603824": ["convolution_gpu_bfyx_f16", 8],
+        "3538746967389669479": ["convolution_gpu_bfyx_os_iyx_osv16", 1055],
+        "5442728869442056950": ["convolution_gpu_bfyx_to_bfyx_f16", 6],
+        "17446903112723559991": ["convolution_gpu_bfyx_f16", 8],
+        "17314761693722740561": ["convolution_gpu_bfyx_f16", 8],
+        "12956681231908531328": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "17836528995874415642": ["convolution_gpu_bfyx_f16", 8],
+        "8896717627818724430": ["convolution_gpu_bfyx_os_iyx_osv16", 108],
+        "14716947061630316041": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "9735141117399046903": ["fully_connected_gpu_bf_io_gemm", 2],
+        "18366465884925728820": ["convolution_gpu_bfyx_gemm_like", 2],
+        "17388129439366166721": ["convolution_gpu_bfyx_os_iyx_osv16", 639],
+        "13724220569112734431": ["convolution_gpu_bfyx_os_iyx_osv16", 973],
+        "5529587475911632254": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "11660089067798953391": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "5181665423821543629": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "8048807352445331657": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "3470485673426524224": ["convolution_gpu_bfyx_os_iyx_osv16", 1021],
+        "3135008557801015427": ["convolution_gpu_bfyx_os_iyx_osv16", 1006],
+        "14966985685297154154": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2035874178080637954": ["convolution_gpu_bfyx_os_iyx_osv16", 1064],
+        "5013120291092844103": ["convolution_gpu_bfyx_os_iyx_osv16", 608],
+        "4460181251394130653": ["convolution_gpu_bfyx_os_iyx_osv16", 369],
+        "5117453858905614531": ["convolution_gpu_bfyx_os_iyx_osv16", 1123],
+        "8461950668910238851": ["convolution_gpu_bfyx_os_iyx_osv16", 747],
+        "1805006234516270784": ["convolution_gpu_bfyx_os_iyx_osv16", 314],
+        "4583484812233029888": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8595156989254845134": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14493123117003003092": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "12372261924257291610": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "1547771611689525848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15727110405754725012": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10890620280807224744": ["convolution_gpu_bfyx_gemm_like", 0],
+        "16079792265815446547": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "15384055407657760803": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2464531851392092325": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5613964218561759893": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11460648773146310189": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6593870431636005244": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "11529036254499853035": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "2726453304845436156": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "2607416795507802412": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "17010201596936918243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8480598154536665021": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17881013712456488163": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "9336215801757107337": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8174421295799601683": ["convolution_gpu_bfyx_os_iyx_osv16", 701],
+        "1967655354607438665": ["convolution_gpu_bfyx_os_iyx_osv16", 748],
+        "4972222030950072866": ["convolution_gpu_bfyx_os_iyx_osv16", 743],
+        "18113157997465675692": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1472667774257971884": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7480855342650290772": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17244746622354078542": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "251775001146378096": ["convolution_gpu_winograd_6x3_s1_fused", 2],
+        "14235558866846276172": ["convolution_gpu_bfyx_os_iyx_osv16", 322],
+        "18066867692765966577": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "1264200731459756446": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "6968087469917482002": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1607381610581485984": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17234843749633035510": ["convolution_gpu_bfyx_os_iyx_osv16", 700],
+        "11516168882438876247": ["convolution_gpu_bfyx_os_iyx_osv16", 1117],
+        "8919164618663601566": ["convolution_gpu_bfyx_os_iyx_osv16", 323],
+        "16853448010512574338": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3010644722195354051": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17062011653598617580": ["convolution_gpu_bfyx_os_iyx_osv16", 331],
+        "4614875083188849196": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "10859023312681572942": ["convolution_gpu_bfyx_os_iyx_osv16", 959],
+        "1377210419756613502": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "17391465283540972493": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1514213112647467874": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "17268633106022870055": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8140122945471321201": ["convolution_gpu_bfyx_os_iyx_osv16", 245],
+        "15079423575410353790": ["convolution_gpu_bfyx_os_iyx_osv16", 966],
+        "13787398748724798340": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "9739119866883611322": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "7151167803631697120": ["convolution_gpu_bfyx_os_iyx_osv16", 224],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "16532386511585070092": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4910582540370962997": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12335148041391647118": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "10689880083512104726": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "8870164706606458004": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9269498023794081940": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6779832349039897240": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "13942354789498444722": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "14294764660016835141": ["convolution_gpu_bfyx_os_iyx_osv16", 940],
+        "12323510278692809329": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "5728070995112243570": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5381496395266530071": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9712640406795417230": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15036737419347383878": ["convolution_gpu_bfyx_os_iyx_osv16", 107],
+        "11552594222313787816": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "7606282654661282476": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6201358671959761215": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4829111442270007186": ["convolution_gpu_bfyx_os_iyx_osv16", 1102],
+        "7267651931396380072": ["convolution_gpu_bfyx_os_iyx_osv16", 1106],
+        "1279682391530947146": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2655979063469551930": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14425547983540742516": ["convolution_gpu_bfyx_gemm_like", 2],
+        "981419593633555198": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "12324657364444167791": ["convolution_gpu_bfyx_os_iyx_osv16", 1103],
+        "3246153532847702583": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4202705710324555180": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12272318018055307535": ["convolution_gpu_bfyx_gemm_like", 2],
+        "396815044270978782": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15633173680908856082": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16635731992372618666": ["convolution_gpu_bfyx_os_iyx_osv16", 723],
+        "10418466892824851134": ["convolution_gpu_bfyx_os_iyx_osv16", 354],
+        "3244777852750357718": ["convolution_gpu_bfyx_os_iyx_osv16", 1113],
+        "2443758478383854939": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "13503934436248311972": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "2594310972560076285": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2424349375092546581": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "7104985983444651979": ["convolution_gpu_bfyx_os_iyx_osv16", 1083],
+        "13518747015059826801": ["convolution_gpu_bfyx_os_iyx_osv16", 737],
+        "11675809062974151496": ["convolution_gpu_bfyx_os_iyx_osv16", 1113],
+        "4725349695436675084": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "17351243519367619322": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17026338651868178077": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "8730407034445893642": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "144434691308306757": ["convolution_gpu_bfyx_os_iyx_osv16", 1086],
+        "4114184149613179671": ["convolution_gpu_bfyx_os_iyx_osv16", 707],
+        "2558882920723584206": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "16481414687792927331": ["convolution_gpu_bfyx_os_iyx_osv16", 365],
+        "17756651805686889890": ["convolution_gpu_bfyx_os_iyx_osv16", 1116],
+        "2228533392085335649": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "9038567144062573854": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1345293381483212104": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "729683192738752814": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "458997435535883643": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "16955907389221472146": ["convolution_gpu_bfyx_os_iyx_osv16", 740],
+        "17927673764274384911": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "6418222853479731432": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "7539191242110313918": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "18014188548165359278": ["convolution_gpu_bfyx_os_iyx_osv16", 229],
+        "16640379332042800496": ["convolution_gpu_bfyx_os_iyx_osv16", 980],
+        "14856197725306980283": ["convolution_gpu_bfyx_os_iyx_osv16", 981],
+        "9279474331309267880": ["convolution_gpu_bfyx_os_iyx_osv16", 978],
+        "5717588912072437191": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "1143426643765799488": ["convolution_gpu_bfyx_os_iyx_osv16", 225],
+        "1049385516019456025": ["convolution_gpu_bfyx_os_iyx_osv16", 971],
+        "10766144770072425534": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "6442062011017461761": ["convolution_gpu_bfyx_os_iyx_osv16", 230],
+        "6063490496423709036": ["convolution_gpu_bfyx_os_iyx_osv16", 601],
+        "3892512749863226006": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "4970240836537468609": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 1],
+        "9399255910184037480": ["convolution_gpu_bfyx_os_iyx_osv16", 953],
+        "10594581016504135920": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "15640487942881889055": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "14165417928501578590": ["convolution_gpu_bfyx_os_iyx_osv16", 320],
+        "12251989236991754721": ["convolution_gpu_bfyx_os_iyx_osv16", 709],
+        "6675363512560434713": ["convolution_gpu_bfyx_os_iyx_osv16", 339],
+        "9831713940431605743": ["convolution_gpu_bfyx_os_iyx_osv16", 1078],
+        "6531349504807709133": ["convolution_gpu_bfyx_os_iyx_osv16", 710],
+        "2726501303929773572": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "10439704858943788014": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "18137994263450376706": ["convolution_gpu_bfyx_os_iyx_osv16", 325],
+        "5711991739289045727": ["convolution_gpu_bfyx_os_iyx_osv16", 339],
+        "15255831401757117660": ["convolution_gpu_bfyx_os_iyx_osv16", 333],
+        "3906658058160172747": ["convolution_gpu_bfyx_os_iyx_osv16", 711],
+        "15823433297099049221": ["convolution_gpu_bfyx_os_iyx_osv16", 700],
+        "7829483638597533960": ["convolution_gpu_bfyx_os_iyx_osv16", 1079],
+        "14092273913846393837": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "3746578485711843646": ["convolution_gpu_bfyx_os_iyx_osv16", 1087],
+        "12228183555926126959": ["convolution_gpu_bfyx_os_iyx_osv16", 1076],
+        "8776893332387904786": ["convolution_gpu_bfyx_os_iyx_osv16", 910],
+        "16672299044236704672": ["convolution_gpu_bfyx_os_iyx_osv16", 1084],
+        "13309889945947393850": ["convolution_gpu_bfyx_os_iyx_osv16", 1072],
+        "15966815420067673043": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "7415938485228396256": ["convolution_gpu_bfyx_os_iyx_osv16", 324],
+        "9655590024687998403": ["convolution_gpu_bfyx_os_iyx_osv16", 334],
+        "14798289196964890724": ["convolution_gpu_bfyx_os_iyx_osv16", 332],
+        "9794684437872784678": ["convolution_gpu_bfyx_os_iyx_osv16", 335],
+        "16729204245488754836": ["convolution_gpu_bfyx_os_iyx_osv16", 582],
+        "15185983488152870534": ["convolution_gpu_bfyx_os_iyx_osv16", 955],
+        "13821372148587948765": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "4727004015814244856": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "1738348894912205653": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "559491455289877068": ["convolution_gpu_bfyx_os_iyx_osv16", 579],
+        "17312172687490475177": ["convolution_gpu_bfyx_os_iyx_osv16", 207],
+        "3470176432841342662": ["convolution_gpu_bfyx_os_iyx_osv16", 584],
+        "8950283515337670839": ["convolution_gpu_bfyx_os_iyx_osv16", 975],
+        "3995072673238444396": ["convolution_gpu_bfyx_os_iyx_osv16", 576],
+        "1238913228370790536": ["convolution_gpu_bfyx_os_iyx_osv16", 204],
+        "928677976151553489": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "4059887681292863495": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "2017817372328795772": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18312913026696855515": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1323873987880062206": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "7947635298491683844": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "3828289925836476678": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10112041311060264798": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7966725359592006848": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2213697863012348994": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5200128826708487987": ["convolution_gpu_bfyx_os_iyx_osv16", 708],
+        "4910238486908592807": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13616909429370698140": ["convolution_gpu_bfyx_gemm_like", 0],
+        "5170073622279980223": ["convolution_gpu_bfyx_gemm_like", 1],
+        "7110283028091835342": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16035239784731081694": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8190708817382075098": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "5665180797552893949": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7180904384828396567": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "17041465029020839746": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8648502659728489503": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "2007359338465363037": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16300204511212928772": ["convolution_gpu_bfyx_os_iyx_osv16", 982],
+        "10636266218009746496": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "17502734572225953539": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "9266211532252099402": ["fully_connected_gpu_bs_f_bsv16_af8_vload", 2],
+        "6763848192987176713": ["convolution_gpu_bfyx_os_iyx_osv16", 493],
+        "6123737429963241103": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "10102406370623883494": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "16125206369312086947": ["convolution_gpu_bfyx_os_iyx_osv16", 983],
+        "16927483709629289661": ["convolution_gpu_bfyx_os_iyx_osv16", 607],
+        "3196823812655863240": ["convolution_gpu_bfyx_os_iyx_osv16", 231],
+        "7968691295772769464": ["convolution_gpu_bfyx_os_iyx_osv16", 1109],
+        "6100031133333761315": ["fully_connected_gpu_bf_io_input_spatial", 0],
+        "4055514200737135942": ["fully_connected_gpu_fb_oi_ref", 0],
+        "8508119169246513026": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14616145871710456304": ["convolution_gpu_bfyx_os_iyx_osv16", 1120],
+        "5168719682914827724": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "9473263513191498949": ["convolution_gpu_bfyx_gemm_like", 1],
+        "13461678175466315866": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5434387853485184980": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1580848418974169308": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6784038318046980185": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6248879028648699716": ["convolution_gpu_bfyx_os_iyx_osv16", 1112],
+        "1436424324238684653": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13189391944650202330": ["convolution_gpu_bfyx_os_iyx_osv16", 741],
+        "1199836165181399413": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5850612837647497531": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "14740129361300854586": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5500102903434438965": ["convolution_gpu_bfyx_os_iyx_osv16", 367],
+        "7297288884568452370": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5136459381906620211": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17411381157694639837": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8553537608760917592": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12734736056404146766": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "706526643700857104": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14937087468947592213": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "10242452169628899571": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16629319403227634487": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "3072344987020666532": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "5932710369376133446": ["convolution_gpu_bfyx_os_iyx_osv16", 528],
+        "15493383292734604744": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "5089311900051393846": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8721087995946196075": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14614506535270942373": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "1289727743091243002": ["convolution_gpu_bfyx_os_iyx_osv16", 665],
+        "18141581865855554514": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16956102699411887521": ["convolution_gpu_bfyx_gemm_like", 0],
+        "11526253915485637934": ["convolution_gpu_bfyx_os_iyx_osv16", 620],
+        "15696872908795836832": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15332512198621601617": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5702206454207934253": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "15414564531144316178": ["convolution_gpu_bfyx_gemm_like", 2],
+        "386448290084824203": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15390537225231495870": ["convolution_gpu_bfyx_gemm_like", 2],
+        "10038180349007230302": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 731],
+        "6817180081986948843": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1527649565538821618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7004336584711849988": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2157468701794819044": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 728],
+        "15920115680945815097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 733],
+        "17778554668592635168": ["convolution_gpu_bfyx_os_iyx_osv16", 533],
+        "6999571050665340986": ["convolution_gpu_bfyx_os_iyx_osv16", 910],
+        "9879436330613366129": ["convolution_gpu_bfyx_gemm_like", 2],
+        "726019095679197164": ["convolution_gpu_bfyx_os_iyx_osv16", 730],
+        "1865317677339946921": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12018933315566840474": ["convolution_gpu_bfyx_os_iyx_osv16", 625],
+        "16124622994105864663": ["convolution_gpu_bfyx_os_iyx_osv16", 289],
+        "9495099584417616887": ["convolution_gpu_bfyx_os_iyx_osv16", 626],
+        "11735107098356940998": ["convolution_gpu_bfyx_gemm_like", 0],
+        "15204384674852423405": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "16866113149488400688": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15389774302738715375": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8101177730804364242": ["convolution_gpu_bfyx_os_iyx_osv16", 250],
+        "10149791427786334512": ["convolution_gpu_bfyx_os_iyx_osv16", 977],
+        "11053198857132396443": ["convolution_gpu_bfyx_os_iyx_osv16", 352],
+        "3963577328998759824": ["fully_connected_gpu_fb_oi_ref", 1],
+        "800184023925596362": ["convolution_gpu_bfyx_os_iyx_osv16", 475],
+        "13839532421033004873": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8262487256974801864": ["convolution_gpu_bfyx_os_iyx_osv16", 261],
+        "3693217331248996607": ["convolution_gpu_bfyx_os_iyx_osv16", 636],
+        "10388555096612441710": ["convolution_gpu_bfyx_os_iyx_osv16", 147],
+        "8892698757722619628": ["convolution_gpu_bfyx_os_iyx_osv16", 524],
+        "9606108204575763003": ["convolution_gpu_bfyx_os_iyx_osv16", 1111],
+        "8449999818915991236": ["fully_connected_gpu_fb_io_ref", 0],
+        "6954046921635466236": ["convolution_gpu_bfyx_gemm_like", 0],
+        "12133573113666871990": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18286924901612269315": ["convolution_gpu_bfyx_gemm_like", 2],
+        "16168987643236739114": ["convolution_gpu_bfyx_gemm_like", 1],
+        "17573344121250212662": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8792004303945144557": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "6055054188657886157": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16692293796070898202": ["convolution_gpu_bfyx_gemm_like", 2],
+        "18377591093081814522": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7171735046681228890": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2461164836823254208": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14430129165479757357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14698972830975282413": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "3479216436904445131": ["convolution_gpu_bfyx_gemm_like", 2],
+        "5269956004669551826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13594976208424418204": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "12373590460058087695": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "4405236452109167503": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14132900527730577142": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "1349033639465657142": ["convolution_gpu_bfyx_gemm_like", 2],
+        "812985719328060901": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "12407276986845062239": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9170373506597510005": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1389904024718949479": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "11509503516680870396": ["convolution_gpu_bfyx_os_iyx_osv16", 860],
+        "3553844546517243430": ["convolution_gpu_bfyx_os_iyx_osv16", 1107],
+        "11739050017164389431": ["convolution_gpu_bfyx_os_iyx_osv16", 216],
+        "14683616789766294266": ["convolution_gpu_bfyx_os_iyx_osv16", 362],
+        "1178443422000627700": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "3959894501921049830": ["convolution_gpu_bfyx_os_iyx_osv16", 728],
+        "6268257722565030993": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "8104007721367839894": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11004242349744689661": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1104],
+        "18331651243656907622": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "165832937834890614": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "13820132527548818114": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 356],
+        "11494973886338256684": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9562717353252171645": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "15182874743616431755": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11923231799522030843": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "7212742683076043022": ["convolution_gpu_bfyx_os_iyx_osv16", 597],
+        "1535659774314187616": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9077124630226762093": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 355],
+        "10707439442194349922": ["convolution_gpu_bfyx_os_iyx_osv16", 733],
+        "13670707208998927662": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11898738546265963886": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7218310781442328740": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 577],
+        "17307988793370069255": ["convolution_gpu_bfyx_os_iyx_osv16", 967],
+        "3159313229944494871": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "2202381460552007272": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1038],
+        "4539543204582046751": ["convolution_gpu_bfyx_os_iyx_osv16", 267],
+        "2922645767583925625": ["convolution_gpu_bfyx_os_iyx_osv16", 151],
+        "3726173595578668243": ["convolution_gpu_bfyx_f16", 8],
+        "1069242824083103727": ["convolution_gpu_bfyx_f16", 8],
+        "10139803717927136766": ["convolution_gpu_bfyx_f16", 8],
+        "10426525571408284384": ["convolution_gpu_bfyx_f16", 8],
+        "6036447764961737632": ["convolution_gpu_bfyx_f16", 8],
+        "16859712173301423348": ["convolution_gpu_bfyx_f16", 8],
+        "4950939249231517650": ["convolution_gpu_bfyx_f16", 8],
+        "15428640534166306063": ["convolution_gpu_bfyx_f16", 8],
+        "12539440450141711052": ["convolution_gpu_bfyx_f16", 8],
+        "4694865878411993051": ["convolution_gpu_bfyx_f16", 8],
+        "7855581105034231853": ["convolution_gpu_bfyx_f16", 8],
+        "16357120378854173738": ["convolution_gpu_bfyx_f16", 8],
+        "9788176856201644185": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3526857091962358658": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9524927752153133377": ["convolution_gpu_bfyx_f16", 5],
+        "967593872851912083": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8544250266821361254": ["convolution_gpu_bfyx_f16", 5],
+        "14702583823206509221": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6562594370920553562": ["convolution_gpu_bfyx_f16", 5],
+        "4871626169134099270": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "4306257530819109379": ["convolution_gpu_bfyx_f16", 5],
+        "13097490329579729355": ["convolution_gpu_bfyx_f16", 5],
+        "7536472342317469819": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17240729682157914878": ["convolution_gpu_bfyx_f16", 5],
+        "4338687769151300794": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9217611707355973890": ["convolution_gpu_bfyx_f16", 5],
+        "16565126239389697019": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9706046427344615745": ["convolution_gpu_bfyx_f16", 5],
+        "8724624785920420532": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3678291868919586746": ["convolution_gpu_bfyx_f16", 5],
+        "357806365552700839": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13063387805113848039": ["convolution_gpu_bfyx_f16", 5],
+        "1557184360709050836": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8608461026786312785": ["convolution_gpu_bfyx_f16", 5],
+        "9987273496502066597": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "64106675123073412": ["convolution_gpu_bfyx_f16", 5],
+        "4220695701755939736": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12963348434542940033": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16181124988724765560": ["convolution_gpu_bfyx_f16", 5],
+        "346998321908284784": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2318421272788358186": ["convolution_gpu_bfyx_f16", 5],
+        "15927802155084275629": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8773070973133375779": ["convolution_gpu_bfyx_f16", 5],
+        "9940763571380473237": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16277913671917468663": ["convolution_gpu_bfyx_f16", 5],
+        "1474918596978458534": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2186150200961617234": ["convolution_gpu_bfyx_f16", 5],
+        "10577259940464718041": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10352584043544857764": ["convolution_gpu_bfyx_f16", 5],
+        "9144746358156959840": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "13301166545153738930": ["convolution_gpu_bfyx_f16", 5],
+        "10753675657145151848": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10604750453275830911": ["convolution_gpu_bfyx_f16", 5],
+        "9243411386937443096": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12042818423431873035": ["convolution_gpu_bfyx_f16", 5],
+        "6683976234770455967": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6298190398591064450": ["convolution_gpu_bfyx_f16", 5],
+        "17196237025206156806": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5853381784506376944": ["convolution_gpu_bfyx_f16", 5],
+        "7339440798895952661": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "309066171876496786": ["convolution_gpu_bfyx_f16", 5],
+        "17843616251377971109": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12223137580096133095": ["convolution_gpu_bfyx_f16", 5],
+        "7577659638199402167": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "565723015051709107": ["convolution_gpu_bfyx_f16", 5],
+        "14416887345595384816": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13314165049380641802": ["convolution_gpu_bfyx_f16", 5],
+        "7520511107200802065": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11534561269762454076": ["convolution_gpu_bfyx_f16", 5],
+        "10368570488453413379": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15747873854346463294": ["convolution_gpu_bfyx_f16", 5],
+        "7824157744505687913": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5462648317757708951": ["convolution_gpu_bfyx_f16", 5],
+        "3493741914954272091": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18286084829637877271": ["convolution_gpu_bfyx_f16", 5],
+        "260499864874634958": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "10167218530612525698": ["convolution_gpu_bfyx_f16", 5],
+        "11647470184823377234": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6976222743405170101": ["convolution_gpu_bfyx_f16", 5],
+        "7655642513340250684": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "2708987188750383204": ["convolution_gpu_bfyx_f16", 5],
+        "3147813143325864684": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13481932492220060429": ["convolution_gpu_bfyx_f16", 5],
+        "8069058927528586404": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "9624255156096106627": ["convolution_gpu_bfyx_f16", 5],
+        "17730913632234504096": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11384790797228210583": ["convolution_gpu_bfyx_f16", 7],
+        "16177287431434086806": ["convolution_gpu_bfyx_f16", 1],
+        "2990533830830456778": ["convolution_gpu_bfyx_f16", 6],
+        "8610276394762287397": ["convolution_gpu_bfyx_f16", 0],
+        "14889103084722200470": ["convolution_gpu_bfyx_f16", 6],
+        "1845895244697890167": ["convolution_gpu_bfyx_f16", 1],
+        "9079010613051503735": ["convolution_gpu_bfyx_f16", 6],
+        "12061818277351885597": ["convolution_gpu_bfyx_f16", 0],
+        "9390843066348290833": ["convolution_gpu_bfyx_f16", 7],
+        "10509352827759959818": ["convolution_gpu_bfyx_f16", 1],
+        "7121505015354362475": ["convolution_gpu_bfyx_f16", 7],
+        "3145839553769702558": ["convolution_gpu_bfyx_f16", 0],
+        "9437978197962731993": ["convolution_gpu_bfyx_f16", 7],
+        "16274951933822979821": ["convolution_gpu_bfyx_f16", 0],
+        "14030311264395486109": ["convolution_gpu_bfyx_f16", 6],
+        "6745402198112522691": ["convolution_gpu_bfyx_f16", 2],
+        "17535374606849768070": ["convolution_gpu_bfyx_f16", 6],
+        "13107074908777587001": ["convolution_gpu_bfyx_f16", 1],
+        "12441704244463007888": ["convolution_gpu_bfyx_f16", 6],
+        "9830487478445609618": ["convolution_gpu_bfyx_f16", 0],
+        "2607686439369816702": ["convolution_gpu_bfyx_f16", 7],
+        "11952384679771234258": ["convolution_gpu_bfyx_f16", 0],
+        "3189741427811982954": ["convolution_gpu_bfyx_f16", 7],
+        "7501115822974560125": ["convolution_gpu_bfyx_f16", 1],
+        "5461533362170148981": ["convolution_gpu_bfyx_f16", 6],
+        "10622846706558433994": ["convolution_gpu_bfyx_f16", 1],
+        "14985143127047962687": ["convolution_gpu_bfyx_f16", 6],
+        "9631129065088682473": ["convolution_gpu_bfyx_f16", 1],
+        "9287906640814562678": ["convolution_gpu_bfyx_f16", 7],
+        "10312813290107807302": ["convolution_gpu_bfyx_f16", 0],
+        "12443171163993705676": ["convolution_gpu_bfyx_f16", 6],
+        "3168498630594159758": ["convolution_gpu_bfyx_f16", 1],
+        "1224004372693674977": ["convolution_gpu_bfyx_f16", 6],
+        "16142734280696556211": ["convolution_gpu_bfyx_f16", 8],
+        "635140168178230171": ["convolution_gpu_bfyx_f16", 8],
+        "17935287735372634102": ["convolution_gpu_bfyx_f16", 8],
+        "15817877524852645836": ["convolution_gpu_bfyx_f16", 8],
+        "10065955805093424080": ["convolution_gpu_bfyx_f16", 8],
+        "11821370621780817632": ["convolution_gpu_bfyx_f16", 8],
+        "677921946529877110": ["convolution_gpu_bfyx_f16", 8],
+        "5361664571196670427": ["convolution_gpu_bfyx_f16", 8],
+        "2901538337520242272": ["convolution_gpu_bfyx_f16", 8],
+        "5581843211058265455": ["convolution_gpu_bfyx_f16", 8],
+        "217667049553318429": ["convolution_gpu_bfyx_f16", 8],
+        "5337496722551766654": ["convolution_gpu_bfyx_f16", 8],
+        "52740663361396709": ["convolution_gpu_bfyx_f16", 8],
+        "6991371618000668418": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2326385631302475177": ["convolution_gpu_bfyx_f16", 5],
+        "8721996744048476299": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "453498137980697662": ["convolution_gpu_bfyx_f16", 5],
+        "15807266772870766609": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6553421087532441250": ["convolution_gpu_bfyx_f16", 5],
+        "12573289076827071790": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8853947103468767323": ["convolution_gpu_bfyx_f16", 5],
+        "6453143304950619430": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "1775677589702924323": ["convolution_gpu_bfyx_f16", 5],
+        "16761512340234377511": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2448165393673590598": ["convolution_gpu_bfyx_f16", 5],
+        "11041313275514857930": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8908290078256179450": ["convolution_gpu_bfyx_f16", 5],
+        "6872057470208040983": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "3462663905986148169": ["convolution_gpu_bfyx_f16", 5],
+        "9998472323723395768": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9695005447848657794": ["convolution_gpu_bfyx_f16", 5],
+        "864050420562880191": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "16884753149447117871": ["convolution_gpu_bfyx_f16", 5],
+        "9413300293443003372": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "9584473138046573481": ["convolution_gpu_bfyx_f16", 5],
+        "17226124546002868085": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "5703305874425530284": ["convolution_gpu_bfyx_f16", 5],
+        "16357533604618943588": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "8568882981604412701": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6735600860810305128": ["convolution_gpu_bfyx_f16", 7],
+        "9976345793999587972": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "15346869959264738522": ["convolution_gpu_bfyx_f16", 6],
+        "18151038936580799249": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "11956105843463290323": ["convolution_gpu_bfyx_f16", 7],
+        "2197043795215802833": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7837223160972083111": ["convolution_gpu_bfyx_f16", 7],
+        "17991319065386721750": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "8684426249485914306": ["convolution_gpu_bfyx_f16", 6],
+        "15440765487742350713": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "4006988924644151380": ["convolution_gpu_bfyx_f16", 6],
+        "1165323482766442288": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6216179328027568162": ["convolution_gpu_bfyx_f16", 7],
+        "5085232160533811804": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5560503476513957999": ["convolution_gpu_bfyx_f16", 7],
+        "11899886655444339788": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "8035035668897300219": ["convolution_gpu_bfyx_f16", 6],
+        "15531280953380757927": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "5417611188973238514": ["convolution_gpu_bfyx_f16", 6],
+        "13845305820052266938": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "156328565120552800": ["convolution_gpu_bfyx_f16", 6],
+        "15783591814248428053": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5753913342838023682": ["convolution_gpu_bfyx_f16", 6],
+        "3207990305547692029": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18084824492918706199": ["convolution_gpu_bfyx_f16", 6],
+        "8033743776899693075": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "243712386211233379": ["convolution_gpu_bfyx_f16", 7],
+        "2965177266959923348": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "13237451337340946362": ["convolution_gpu_bfyx_f16", 7],
+        "9188120772772842413": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "1249134296559537004": ["convolution_gpu_bfyx_f16", 7],
+        "6776437678382831419": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "9140223146321937006": ["convolution_gpu_bfyx_f16", 6],
+        "7509732267784929557": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "9869335174149535367": ["convolution_gpu_bfyx_f16", 6],
+        "15410089184813419927": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "12736591082694609735": ["convolution_gpu_bfyx_f16", 7],
+        "10111465201148839782": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "6977012639021700914": ["convolution_gpu_bfyx_f16", 6],
+        "10452382209692659038": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "13099335757796409253": ["convolution_gpu_bfyx_f16", 6],
+        "8355446198162136384": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6457714394569252436": ["convolution_gpu_bfyx_f16", 6],
+        "1870949498151438396": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "6325249952936664765": ["convolution_gpu_bfyx_f16", 6],
+        "4283372428897156128": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "15284708683366527091": ["convolution_gpu_bfyx_f16", 7],
+        "12367140420770161260": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17302868757320805407": ["convolution_gpu_bfyx_f16", 6],
+        "12812798569408798714": ["convolution_gpu_bfyx_f16", 3],
+        "18027642894783121874": ["convolution_gpu_bfyx_f16", 6],
+        "3766561909462900481": ["convolution_gpu_bfyx_f16", 4],
+        "8126433884587687354": ["convolution_gpu_bfyx_f16", 6],
+        "7431739774665400867": ["convolution_gpu_bfyx_f16", 4],
+        "15213968303698655071": ["convolution_gpu_bfyx_f16", 6],
+        "1895954773577076065": ["convolution_gpu_bfyx_f16", 4],
+        "10820634669412096693": ["convolution_gpu_bfyx_f16", 6],
+        "9105871040526273510": ["convolution_gpu_bfyx_f16", 4],
+        "6253056982440997971": ["convolution_gpu_bfyx_f16", 7],
+        "14271936409538632354": ["convolution_gpu_bfyx_f16", 4],
+        "7830723669305086809": ["convolution_gpu_bfyx_f16", 7],
+        "16905205856195133489": ["convolution_gpu_bfyx_f16", 4],
+        "17744780595721014433": ["convolution_gpu_bfyx_f16", 7],
+        "1185658428449577287": ["convolution_gpu_bfyx_f16", 4],
+        "4322844512730914538": ["convolution_gpu_bfyx_f16", 7],
+        "8559998096869077061": ["convolution_gpu_bfyx_f16", 4],
+        "12935328860605637188": ["convolution_gpu_bfyx_f16", 7],
+        "17826095303533956022": ["convolution_gpu_bfyx_f16", 3],
+        "6059064882469521870": ["convolution_gpu_bfyx_f16", 7],
+        "17987726224817029150": ["convolution_gpu_bfyx_f16", 4],
+        "1752617074755449766": ["convolution_gpu_bfyx_f16", 6],
+        "1147527648969475665": ["convolution_gpu_bfyx_f16", 3],
+        "336079374726362009": ["convolution_gpu_bfyx_f16", 6],
+        "3956037701575034246": ["convolution_gpu_bfyx_f16", 3],
+        "9177200416044551211": ["convolution_gpu_bfyx_f16", 7],
+        "3580337905402094261": ["convolution_gpu_bfyx_f16", 4],
+        "8657404564308325878": ["convolution_gpu_bfyx_f16", 6],
+        "9660551017019324634": ["convolution_gpu_bfyx_f16", 4],
+        "2283387892607580344": ["convolution_gpu_bfyx_f16", 7],
+        "9757276965383246450": ["convolution_gpu_bfyx_f16", 4],
+        "5662627047941545281": ["convolution_gpu_bfyx_f16", 7],
+        "14491983419826529399": ["convolution_gpu_bfyx_os_iyx_osv16", 455],
+        "11866343372130060111": ["convolution_gpu_bfyx_os_iyx_osv16", 623],
+        "3750595711145201146": ["convolution_gpu_bfyx_os_iyx_osv16", 463],
+        "555112033233919049": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "9449916193007510499": ["fully_connected_gpu_bf_io_gemm", 1],
+        "821153009898835283": ["fully_connected_gpu_bf_io_gemm", 1],
+        "10018756206737727294": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "5830779024517851317": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "7913817244562964901": ["convolution_gpu_bfyx_f16", 8],
+        "11779589567746893119": ["convolution_gpu_bfyx_f16", 8],
+        "5287441936829096354": ["convolution_gpu_bfyx_os_iyx_osv16", 356],
+        "16879635677321458783": ["convolution_gpu_bfyx_f16", 8],
+        "5936894667802097344": ["convolution_gpu_bfyx_f16", 8],
+        "12029555773381953470": ["convolution_gpu_bfyx_f16", 8],
+        "1395714970525756800": ["convolution_gpu_bfyx_f16", 5],
+        "18366381433142273315": ["convolution_gpu_bfyx_f16", 8],
+        "17839315025229585473": ["convolution_gpu_bfyx_f16", 8],
+        "7428339090190576585": ["convolution_gpu_bfyx_f16", 8],
+        "16427721132197847241": ["convolution_gpu_bfyx_f16", 8],
+        "929038963682864275": ["convolution_gpu_bfyx_f16", 8],
+        "6348679735483401866": ["convolution_gpu_bfyx_f16", 8],
+        "17409943223289937333": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10896472785943286419": ["convolution_gpu_bfyx_f16", 8],
+        "8675423965229942895": ["convolution_gpu_bfyx_f16", 8],
+        "15359653790909326580": ["convolution_gpu_bfyx_f16", 5],
+        "937772044105590355": ["convolution_gpu_bfyx_f16", 5],
+        "11630003841984891663": ["convolution_gpu_bfyx_f16", 8],
+        "15721323944762357421": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "18032560040713612222": ["convolution_gpu_bfyx_f16", 5],
+        "16185581163541386950": ["convolution_gpu_bfyx_f16", 5],
+        "7296460872108123423": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18375557444371775299": ["convolution_gpu_bfyx_f16", 5],
+        "10922059457537054563": ["convolution_gpu_bfyx_f16", 5],
+        "122295605901184339": ["convolution_gpu_bfyx_f16", 4],
+        "12164250230746861951": ["convolution_gpu_bfyx_f16", 3],
+        "1984025014517619256": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "7957167898986800985": ["convolution_gpu_bfyx_f16_depthwise", 1],
+        "412995552853553524": ["convolution_gpu_bfyx_f16", 8],
+        "7058232330882130703": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "15549425900373079382": ["convolution_gpu_bfyx_f16", 8],
+        "2713038204741622907": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "1878980012173918209": ["convolution_gpu_bfyx_f16", 8],
+        "12468208151780727122": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6674575974748163031": ["convolution_gpu_bfyx_f16", 8],
+        "5591111867402032949": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "3413916493145831316": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12421615174911349736": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16689084255978323672": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "12474210147973914830": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "14174889288973953645": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "18224887830367116006": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "16049847963625476676": ["convolution_gpu_bfyx_os_iyx_osv16", 993],
+        "3817623781909159313": ["convolution_gpu_bfyx_f16", 8],
+        "3004968067582685285": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "6876765637331622545": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6802301901709446085": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "13245964863324091195": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "953254263392356310": ["convolution_gpu_bfyx_f16", 4],
+        "5388858533648189105": ["convolution_gpu_bfyx_f16", 5],
+        "3226238265868290723": ["convolution_gpu_bfyx_f16", 7],
+        "10098858620420134682": ["convolution_gpu_bfyx_f16", 6],
+        "18308172581381789101": ["convolution_gpu_bfyx_f16", 3],
+        "12846183737006963638": ["convolution_gpu_bfyx_f16", 7],
+        "8746233054079242877": ["convolution_gpu_bfyx_f16", 3],
+        "7516276889336424671": ["convolution_gpu_bfyx_f16", 5],
+        "8240661672477348007": ["convolution_gpu_bfyx_f16", 0],
+        "7421142512620741721": ["convolution_gpu_bfyx_f16", 6],
+        "17095633565672192085": ["convolution_gpu_bfyx_f16", 6],
+        "7381046541836362634": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "7006663637645720459": ["convolution_gpu_bfyx_f16", 3],
+        "554667746487334145": ["convolution_gpu_bfyx_f16", 4],
+        "1899794088311416867": ["convolution_gpu_bfyx_f16", 0],
+        "4461871297663195464": ["convolution_gpu_bfyx_f16", 2],
+        "845238018552466931": ["convolution_gpu_bfyx_f16", 2],
+        "13889057206654080908": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2813710942447372241": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13633232435632839044": ["convolution_gpu_bfyx_f16", 8],
+        "2883172178329270363": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "9432546329737888706": ["convolution_gpu_bfyx_f16", 8],
+        "12985746913235154779": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "17940668702908419725": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "2064000219100642226": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "5833649709217830223": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "10849235794440642481": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "6321445979984216128": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "14697315322325185660": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "36079357617783912": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "4063865474431180498": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "13167503358764278233": ["convolution_gpu_bfyx_f16_1x1", 2],
+        "17498603449428007802": ["convolution_gpu_bfyx_f16_depthwise", 0],
+        "6304136029727027056": ["convolution_gpu_bfyx_os_iyx_osv16", 995],
+        "1754448782405089213": ["convolution_gpu_bfyx_f16", 7],
+        "15489166244290113065": ["convolution_gpu_bfyx_f16_1x1", 0],
+        "5756918986564223629": ["convolution_gpu_bfyx_f16_depthwise", 2],
+        "8035545676843269497": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "17042017278300937839": ["convolution_gpu_bfyx_f16_1x1", 1],
+        "11260048813076144906": ["convolution_gpu_bfyx_f16", 4],
+        "6873924247641352061": ["convolution_gpu_bfyx_f16", 4],
+        "6474957215284027135": ["convolution_gpu_bfyx_f16", 7],
+        "16573724507496129614": ["convolution_gpu_bfyx_f16", 6],
+        "11210971373278055121": ["convolution_gpu_bfyx_f16", 2],
+        "185717560970701618": ["convolution_gpu_bfyx_f16", 6],
+        "11817410866221484993": ["convolution_gpu_bfyx_f16", 3],
+        "9765519004693711463": ["convolution_gpu_bfyx_f16", 6],
+        "14300671725579588671": ["convolution_gpu_bfyx_f16", 5],
+        "1297549572559338433": ["convolution_gpu_bfyx_f16", 5],
+        "4346210823986581329": ["convolution_gpu_bfyx_f16", 4],
+        "2750608965765787878": ["convolution_gpu_bfyx_f16", 3],
+        "14245442283142381063": ["convolution_gpu_bfyx_f16", 6],
+        "2942593456597250269": ["convolution_gpu_bfyx_f16", 7],
+        "14807774261203767931": ["convolution_gpu_bfyx_f16", 6],
+        "2024891861044519704": ["convolution_gpu_bfyx_f16", 6],
+        "12988352411577718659": ["convolution_gpu_bfyx_f16", 6],
+        "2856387545805299627": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "6931984251726006059": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "10053897550646291639": ["convolution_gpu_bfyx_gemm_like", 2],
+        "166522152877705111": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8194080531314571831": ["convolution_gpu_bfyx_os_iyx_osv16", 835],
+        "8462596687449136841": ["convolution_gpu_bfyx_gemm_like", 1],
+        "16641148739441654579": ["convolution_gpu_bfyx_gemm_like", 0],
+        "3012332306785177280": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "1667559253581127345": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17950962563816983793": ["convolution_gpu_bfyx_gemm_like", 2],
+        "15920581282829793263": ["convolution_gpu_bfyx_os_iyx_osv16", 126],
+        "4931844549089354374": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11227326613484178737": ["convolution_gpu_bfyx_os_iyx_osv16", 734],
+        "8926339988827333993": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14947161471102583853": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7959005479751426244": ["convolution_gpu_bfyx_os_iyx_osv16", 913],
+        "13876295120508241721": ["convolution_gpu_bfyx_os_iyx_osv16", 583],
+        "5450799298000231966": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "745049678230480319": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "17799305583546345514": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 0],
+        "15448134419455024563": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "10247046915015701375": ["convolution_gpu_bfyx_os_iyx_osv16", 855],
+        "818326236814735107": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "11621993279519931789": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "10879300979808656559": ["fully_connected_gpu_bs_f_bsv16_b1", 0],
+        "8049787711095084959": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8361191677655973935": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "6455326407035817823": ["convolution_gpu_bfyx_os_iyx_osv16", 86],
+        "4549875381866576113": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14780479128645572595": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 1106],
+        "9221666339438514459": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "17091218700152862273": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9951123692498529061": ["convolution_gpu_bfyx_os_iyx_osv16", 661],
+        "15226633731441516361": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 359],
+        "4453349487216529991": ["convolution_gpu_bfyx_os_iyx_osv16", 1039],
+        "17929115705990268026": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6621532750524834097": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 353],
+        "16562571407098459049": ["convolution_gpu_bfyx_os_iyx_osv16", 287],
+        "2873284221161386597": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "3769897639705493224": ["convolution_gpu_bfyx_os_iyx_osv16", 174],
+        "5447803100312758964": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 926],
+        "9163238347824560017": ["convolution_gpu_bfyx_os_iyx_osv16", 542],
+        "1688979903294911182": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "9338092674592431198": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 912],
+        "15522545626077485199": ["convolution_gpu_bfyx_os_iyx_osv16", 543],
+        "1797489112792772811": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "5478531388148194783": ["convolution_gpu_bfyx_os_iyx_osv16", 459],
+        "3289369122755371980": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 84],
+        "14572382016053496602": ["convolution_gpu_bfyx_os_iyx_osv16", 837],
+        "16841168676076935693": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "18407347961782182453": ["fused_conv_eltwise_gpu_bfyx_os_iyx_osv16", 83],
+        "8695092335925023399": ["fully_connected_gpu_bf_io_input_spatial", 1],
+        "14168685794682021826": ["convolution_gpu_bfyx_gemm_like", 2],
+        "12423218459706339590": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "8734189831526420226": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14362182205968229036": ["convolution_gpu_bfyx_os_iyx_osv16", 517],
+        "13157476677873103938": ["convolution_gpu_bfyx_os_iyx_osv16", 891],
+        "11940005480315119153": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "1302512649939808216": ["convolution_gpu_bfyx_os_iyx_osv16", 500],
+        "16919811480058643640": ["convolution_gpu_bfyx_os_iyx_osv16", 132],
+        "5208084625746441471": ["convolution_gpu_bfyx_direct_10_12_16", 1],
+        "8262549900448065079": ["convolution_gpu_bfyx_os_iyx_osv16", 46],
+        "5227665249672396809": ["convolution_gpu_bfyx_os_iyx_osv16", 799],
+        "4933328578946081154": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13882747247011638614": ["convolution_gpu_bfyx_os_iyx_osv16", 178],
+        "814582084353022226": ["convolution_gpu_bfyx_os_iyx_osv16", 178],
+        "4844820846457555156": ["convolution_gpu_bfyx_os_iyx_osv16", 887],
+        "6607603202773469786": ["convolution_gpu_bfyx_os_iyx_osv16", 138],
+        "15439502814859116813": ["convolution_gpu_bfyx_os_iyx_osv16", 890],
+        "15777107988701235428": ["convolution_gpu_bfyx_os_iyx_osv16", 501],
+        "12832042711454018844": ["convolution_gpu_bfyx_os_iyx_osv16", 508],
+        "6099745418702030715": ["convolution_gpu_bfyx_os_iyx_osv16", 132],
+        "4230880085403638923": ["convolution_gpu_bfyx_os_iyx_osv16", 47],
+        "62516450676185117": ["convolution_gpu_bfyx_os_iyx_osv16", 422],
+        "5477965717233241895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13669762279828807941": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "11383807956757990177": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9660099130061496863": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17151683028720387864": ["convolution_gpu_bfyx_gemm_like", 2],
+        "1859914910272455189": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7396998153023492339": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2008700175670389343": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "16827869183124732303": ["convolution_gpu_bfyx_gemm_like", 2],
+        "13120889385491477637": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "18305507733019922935": ["convolution_gpu_bfyx_gemm_like", 2],
+        "4387964680811897490": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "9490382148010824252": ["convolution_gpu_bfyx_os_iyx_osv16", 293],
+        "7607585452987307694": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "6647358668213164168": ["convolution_gpu_bfyx_os_iyx_osv16", 1047],
+        "3269426835760928022": ["convolution_gpu_bfyx_os_iyx_osv16", 1045],
+        "8407302923973070317": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "7392260165026897157": ["convolution_gpu_bfyx_os_iyx_osv16", 534],
+        "17129583679506972654": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "15394113208725741887": ["convolution_gpu_bfyx_os_iyx_osv16", 535],
+        "2232515974555590822": ["convolution_gpu_bfyx_os_iyx_osv16", 458],
+        "7051704960834828963": ["convolution_gpu_bfyx_os_iyx_osv16", 494],
+        "5849502570947855625": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "17320230733736402509": ["convolution_gpu_bfyx_gemm_like", 1],
+        "14376448497282593859": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "11059091112167439040": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11073613812342958769": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "4154541958145867375": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11497596156215746295": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "7989457597882264703": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11224449857742374449": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8019330764912846895": ["convolution_gpu_bfyx_gemm_like", 2],
+        "8298488609133255406": ["convolution_gpu_bfyx_depthwise_weights_lwg", 1],
+        "14393217564854520848": ["convolution_gpu_bfyx_os_iyx_osv16", 251],
+        "3141906957984957990": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "8411633870815503324": ["convolution_gpu_bfyx_os_iyx_osv16", 1006],
+        "17289238208820562994": ["convolution_gpu_bfyx_os_iyx_osv16", 254],
+        "17376882838565917025": ["convolution_gpu_bfyx_depthwise_weights_lwg", 0],
+        "376447867595880925": ["convolution_gpu_bfyx_os_iyx_osv16", 905],
+        "9223591734176279618": ["convolution_gpu_bfyx_depthwise_weights_lwg", 2],
+        "2123481240130017671": ["convolution_gpu_bfyx_os_iyx_osv16", 908],
+        "60262519627721258": ["convolution_gpu_bfyx_os_iyx_osv16", 773],
+        "2779831597589397721": ["convolution_gpu_bfyx_gemm_like", 2],
+        "14888498856025675875": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "13008816286946828339": ["convolution_gpu_bfyx_direct_10_12_16", 2],
+        "14472562307183930494": ["convolution_gpu_bfyx_os_iyx_osv16", 508],
+        "12260051528344627305": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "12237139830764526217": ["convolution_gpu_bfyx_direct_10_12_16", 0],
+        "12839904859734107448": ["convolution_gpu_bfyx_os_iyx_osv16", 604],
+        "2557331839687658350": ["convolution_gpu_bfyx_os_iyx_osv16", 3],
+        "14711934417369240383": ["convolution_gpu_bfyx_os_iyx_osv16", 379],
+        "1599135987505067413": ["convolution_gpu_bfyx_gemm_like", 2],
+        "88960405449779079": ["convolution_gpu_bfyx_os_iyx_osv16", 699],
+        "3983071771155729815": ["convolution_gpu_bfyx_os_iyx_osv16", 706],
+        "4686928543634340294": ["convolution_gpu_bfyx_os_iyx_osv16", 879],
+        "9500201961536063781": ["convolution_gpu_bfyx_os_iyx_osv16", 500],
+        "5626617363814193337": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "9493629616033946504": ["convolution_gpu_bfyx_os_iyx_osv16", 1018],
+        "9142997105687030758": ["convolution_gpu_bfyx_os_iyx_osv16", 754],
+        "3565303211593767799": ["convolution_gpu_bfyx_os_iyx_osv16", 756],
+        "2027062613896109334": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "2494989528221736054": ["convolution_gpu_bfyx_f16", 8],
+        "10481457184081052557": ["convolution_gpu_bfyx_f16", 8],
+        "17843566914419305583": ["convolution_gpu_bfyx_f16", 8],
+        "10440359951914302042": ["convolution_gpu_bfyx_f16", 5],
+        "12355534646291322950": ["convolution_gpu_bfyx_f16", 5],
+        "1312046147551402733": ["convolution_gpu_bfyx_f16", 6],
+        "17747064821498992452": ["convolution_gpu_bfyx_f16", 4],
+        "15727623554601964014": ["convolution_gpu_bfyx_f16", 1],
+        "1123438482147655288": ["convolution_gpu_bfyx_f16", 5],
+        "7126696940487701707": ["convolution_gpu_bfyx_f16", 3],
+        "3872390202906772826": ["convolution_gpu_bfyx_f16", 7],
+        "2880589787553789663": ["convolution_gpu_bfyx_os_iyx_osv16", 48],
+        "11290368603402236066": ["convolution_gpu_bfyx_to_bfyx_f16", 8],
+        "1323592601201034234": ["convolution_gpu_bfyx_f16", 8],
+        "14798486770850675841": ["convolution_gpu_bfyx_f16", 8],
+        "11673314628747753691": ["convolution_gpu_bfyx_f16", 6],
+        "7021961511624638678": ["convolution_gpu_bfyx_f16", 7],
+        "5676198353742450430": ["convolution_gpu_bfyx_f16", 7],
+        "4929819810689803833": ["convolution_gpu_bfyx_f16", 3],
+        "240316590146675808": ["convolution_gpu_bfyx_f16", 4],
+        "17625565940895057722": ["convolution_gpu_bfyx_f16", 3],
+        "8688075088415087060": ["convolution_gpu_bfyx_f16", 1],
+        "3109943868702160503": ["convolution_gpu_bfyx_f16", 4],
+        "15650217867869430450": ["convolution_gpu_bfyx_f16", 8],
+        "17908144598228512507": ["convolution_gpu_bfyx_os_iyx_osv16", 419],
+        "18154134293896237020": ["convolution_gpu_bfyx_gemm_like", 2],
+        "9604863051097029874": ["convolution_gpu_bfyx_os_iyx_osv16", 355],
+        "12931069967038668164": ["convolution_gpu_bfyx_os_iyx_osv16", 1085],
+        "6806199908367808607": ["convolution_gpu_bfyx_gemm_like", 2],
+        "11683146685348965370": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "8154297486284619437": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1", 2],
+        "14336744408490491240": ["convolution_gpu_bfyx_os_iyx_osv16", 1118],
+        "4571901717343198720": ["convolution_gpu_bfyx_gemm_like", 2],
+        "6532394816830144120": ["convolution_gpu_bfyx_gemm_like", 2],
+        "2666796249274140911": ["convolution_gpu_bfyx_os_iyx_osv16", 1119],
+        "11653606109120321972": ["convolution_gpu_bfyx_os_iyx_osv16", 735],
+        "6204893434840435239": ["convolution_gpu_bfyx_os_iyx_osv16", 243],
+        "13218364348439640168": ["fully_connected_gpu_bs_f_bsv16_b1", 2],
+        "10201555771333451359": ["convolution_gpu_bfyx_os_iyx_osv16", 1022],
         "13054256137328114261": ["convolution_gpu_1x1", 0],
         "15694677292906293678": ["convolution_gpu_1x1", 2],
         "7281987725176913451": ["convolution_gpu_generic", 1],
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_tutorial.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_tutorial.cl
deleted file mode 100644 (file)
index 31a11d3..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) 2016-2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ADVANCED_TUTORIAL
-
-#include "include/include_all.cl"
-
-KERNEL(activation)(
-    __global UNIT_TYPE* input, 
-    __global UNIT_TYPE* output
-#ifdef PARAMETERIZED 
-    , __global ADDITIONAL_PARAMS_TYPE* params
-#endif
-    )
-{
-#if defined OUTPUT_LAYOUT_YXFB                  // in Case of YXFB we need a different processing order than BFYX (from performance aspect)
-    const uint x = get_global_id(1);
-    const uint y = get_global_id(2);
-#if OUTPUT_BATCH_NUM == 1
-    const uint feature = get_global_id(0);
-    const uint batch = 0;
-#else
-    const uint feature = get_global_id(0) % OUTPUT_FEATURE_NUM;
-    const uint batch = get_global_id(0) / OUTPUT_FEATURE_NUM;
-#endif
-#else
-    const uint x = get_global_id(0);
-    const uint y = get_global_id(1);
-#if OUTPUT_BATCH_NUM == 1
-    const uint feature = get_global_id(2);
-    const uint batch = 0;
-#else
-    const uint feature = get_global_id(2) % OUTPUT_FEATURE_NUM;
-    const uint batch = get_global_id(2) / OUTPUT_FEATURE_NUM;
-#endif
-#endif
-
-    const uint src_index = GET_DATA_INDEX(INPUT0, batch, feature, y, x);    // helper macro to deduce the buffer index.
-    const uint dst_index = GET_DATA_INDEX(OUTPUT, batch, feature, y, x);
-
-#if defined PARAMETERIZED                                                   // in case that the input additional params is located on a bufffer
-        #if PARAMS_NUM > 2
-        #error Too many params
-    #elif PARAMS_NUM == 2
-        #define NL_M_PARAMETERIZED (float)params[2*feature + 0]
-        #define NL_N_PARAMETERIZED (float)params[2*feature + 1]
-    #elif PARAMS_NUM == 1
-        #define NL_M_PARAMETERIZED (float)params[feature]
-        #define NL_N_PARAMETERIZED (float)NL_N
-    #else
-        #define NL_M_PARAMETERIZED (float)NL_M
-        #define NL_N_PARAMETERIZED (float)NL_N
-    #endif
-    #define PARAMETERIZED_ACTIVATION_PARAMS NL_M_PARAMETERIZED, NL_N_PARAMETERIZED
-    output[dst_index] = ACTIVATION(input[src_index], PARAMETERIZED_ACTIVATION_PARAMS);
-#else
-    const float nl_m = (float)NL_M;
-    const float nl_n = (float)NL_N;
-    output[dst_index] = ACTIVATION(input[src_index], ACTIVATION_PARAMS);           // Do the activation
-#endif
-}
-
-#else
-
-//#include "put here your include files"
-
-__kernel void activation_tutorial(
-    const __global UNIT_TYPE* input,
-    __global UNIT_TYPE* output)
-{
-       // fill here your kernel
-}
-
-#endif
\ No newline at end of file
index 1132131..02b78a4 100644 (file)
@@ -47,7 +47,7 @@
 KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
                                   ,__global OUTPUT_TYPE* output
 #ifdef SECOND_OUTPUT_EXIST
-                                  ,__global OUTPUT_TYPE* second_output
+                                  ,__global INPUT1_TYPE* second_output
 #endif
                             )
 {
@@ -166,15 +166,15 @@ KERNEL(arg_max_min_modified)(const __global INPUT0_TYPE* input
         indices[AXIS] = out_position;
 #endif
 #ifdef TOP_K_ORDER
-    output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = result[top_k].value;
+    output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result[top_k].value);
 #else
-    output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = result[top_k].index;
+    output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = TO_OUTPUT_TYPE(result[top_k].index);
 #endif
 #ifdef SECOND_OUTPUT_EXIST
 #ifdef TOP_K_ORDER
-    second_output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = result[top_k].index;
+    second_output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result[top_k].index);
 #else
-    second_output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = result[top_k].value;
+    second_output[GET_DATA_INDEX_5D(OUTPUT, indices[0], indices[1], indices[2], indices[3], indices[4])] = TO_INPUT1_TYPE(result[top_k].value);
 #endif
 #endif
     }
index aa6a0fd..493b699 100644 (file)
@@ -27,7 +27,9 @@ __attribute__((reqd_work_group_size(SUB_GROUP_SIZE, 1, 1)))
 KERNEL(binary_convolution_1x1)(const __global INPUT0_TYPE* input,
                                      __global OUTPUT_TYPE* output,
                                const __global FILTER_TYPE* weights,
-                               FUSED_OPS_DECLS
+#if HAS_FUSED_OPS_DECLS
+                               FUSED_OPS_DECLS,
+#endif
                                uint split_idx)
 {
     const int xy = get_group_id(0);
@@ -171,18 +173,25 @@ KERNEL(binary_convolution_1x1)(const __global INPUT0_TYPE* input,
     }
 
     // Load data for fused operations (scales, biases, quantization thresholds, etc)
+#if CUSTOM_FUSED_OPS
     FUSED_OPS_PREPARE_DATA;
+#endif
 
     UNIT_TYPE dst[OC_BLOCK_SIZE];
     for (int oc = 0; oc < OC_BLOCK_SIZE; oc++)
     {
-        UNIT_TYPE res = TO_UNIT_TYPE(INPUT0_FEATURE_NUM - 2*dst_buf[oc]);
+        CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM - 2*dst_buf[oc]);
+#if CUSTOM_FUSED_OPS
         DO_ELTWISE_FUSED_OPS;
-
 // Don't save floating-point intermediate result, since packed one is already computed
 #if !BINARY_PACKED_OUTPUT
         dst[oc] = res;
 #endif
+#elif HAS_FUSED_OPS
+        FUSED_OPS;
+        dst[oc] = FINAL_NAME;
+#endif
+
     }
 
     bool in_x = x < OUTPUT_SIZE_X;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/binary_convolution_gpu_1x1_b_fs_yx_fsv16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/binary_convolution_gpu_1x1_b_fs_yx_fsv16.cl
new file mode 100644 (file)
index 0000000..47da3ef
--- /dev/null
@@ -0,0 +1,176 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+#include "include/unit_type.cl"
+
+#define OC_BLOCK_SIZE 16
+
+#define GET_SRC(data, id) intel_sub_group_shuffle(data, id)
+#define ALIGNED_BLOCK_READ(ptr, byte_offset) as_uint(intel_sub_group_block_read((const __global uint*)(ptr) + (byte_offset)))
+#define ALIGNED_BLOCK_READ2(ptr, byte_offset) as_uint2(intel_sub_group_block_read2((const __global uint*)(ptr) + (byte_offset)))
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+__attribute__((reqd_work_group_size(SUB_GROUP_SIZE, 1, 1)))
+KERNEL(binary_convolution_1x1_b_fs_yx_fsv16)(const __global INPUT0_TYPE* input,
+                                                   __global OUTPUT_TYPE* output,
+                                             const __global FILTER_TYPE* weights,
+#if HAS_FUSED_OPS_DECLS
+                                             FUSED_OPS_DECLS,
+#endif
+                                             uint split_idx)
+{
+    const int xy = get_group_id(0);
+    const int f_block = get_global_id(1);
+    const int b = get_global_id(2);
+    const int lid = get_sub_group_local_id();
+#if PADDED_INPUT
+    const int x = (xy * XY_BLOCK_SIZE + lid) % OUTPUT_SIZE_X;
+    const int y = (xy * XY_BLOCK_SIZE + lid) / OUTPUT_SIZE_X;
+    const uint input_offset = INPUT0_OFFSET
+                            + b*INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH
+                            + y*INPUT0_Y_PITCH;
+#else
+    const int x = (xy * XY_BLOCK_SIZE + lid) % OUTPUT_SIZE_X;
+    const int y = (xy * XY_BLOCK_SIZE + lid) / OUTPUT_SIZE_X;
+    const uint input_offset = INPUT0_OFFSET
+                            + b*INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH
+                            + xy*XY_BLOCK_SIZE;
+#endif
+    const uint output_x_pitch = OC_BLOCK_SIZE;
+    const uint output_y_pitch = output_x_pitch * (OUTPUT_PAD_BEFORE_SIZE_X +  OUTPUT_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X);
+    const uint output_total_f_size = OUTPUT_PAD_BEFORE_FEATURE_NUM + OUTPUT_FEATURE_NUM + OUTPUT_PAD_AFTER_FEATURE_NUM;
+    const uint output_fs_pitch = output_y_pitch * (OUTPUT_PAD_BEFORE_SIZE_Y +  OUTPUT_SIZE_Y + OUTPUT_PAD_AFTER_SIZE_Y);
+    const uint output_b_pitch = output_fs_pitch * ((output_total_f_size + OC_BLOCK_SIZE - 1) / OC_BLOCK_SIZE);
+    const uint dst_index = OUTPUT_OFFSET*OC_BLOCK_SIZE
+                         + b*output_b_pitch
+                         + f_block*output_fs_pitch;
+
+    const uint filter_offset = ((f_block/2)*2)*OC_BLOCK_SIZE*INPUT0_FEATURE_NUM_PACKED + (f_block%2)*16;
+
+    int dst_buf[OC_BLOCK_SIZE] = { 0 }; // 16 X
+
+    for (int k = 0; k < INPUT0_FEATURE_NUM_PACKED; ++k)
+    {
+        // Load 16 input elements from feature map by subgroup
+#if PADDED_INPUT
+        INPUT0_TYPE src = input[input_offset + k*INPUT0_FEATURE_PITCH + x];
+#else
+        INPUT0_TYPE src = ALIGNED_BLOCK_READ(input, input_offset + k*INPUT0_FEATURE_PITCH);
+#endif
+
+        // Load 32 OC x 32 ICP. Each WI has lid-th and (lid+16)-th channels
+        FILTER_TYPE wei = ALIGNED_BLOCK_READ(weights, filter_offset + k * OC_BLOCK_SIZE*2);
+
+        // Shuffle 2 OC x 32 ICP x 16 X of src
+        const INPUT0_TYPE src0  = GET_SRC(src, 0);
+        const INPUT0_TYPE src1  = GET_SRC(src, 1);
+        const INPUT0_TYPE src2  = GET_SRC(src, 2);
+        const INPUT0_TYPE src3  = GET_SRC(src, 3);
+        const INPUT0_TYPE src4  = GET_SRC(src, 4);
+        const INPUT0_TYPE src5  = GET_SRC(src, 5);
+        const INPUT0_TYPE src6  = GET_SRC(src, 6);
+        const INPUT0_TYPE src7  = GET_SRC(src, 7);
+        const INPUT0_TYPE src8  = GET_SRC(src, 8);
+        const INPUT0_TYPE src9  = GET_SRC(src, 9);
+        const INPUT0_TYPE src10 = GET_SRC(src, 10);
+        const INPUT0_TYPE src11 = GET_SRC(src, 11);
+        const INPUT0_TYPE src12 = GET_SRC(src, 12);
+        const INPUT0_TYPE src13 = GET_SRC(src, 13);
+        const INPUT0_TYPE src14 = GET_SRC(src, 14);
+        const INPUT0_TYPE src15 = GET_SRC(src, 15);
+
+#if LEFTOVERS_IC
+        if (k == INPUT0_FEATURE_NUM_PACKED - 1)
+        {
+            dst_buf[0]  += popcount((wei ^ src0) & FILTER_MASK);
+            dst_buf[1]  += popcount((wei ^ src1) & FILTER_MASK);
+            dst_buf[2]  += popcount((wei ^ src2) & FILTER_MASK);
+            dst_buf[3]  += popcount((wei ^ src3) & FILTER_MASK);
+            dst_buf[4]  += popcount((wei ^ src4) & FILTER_MASK);
+            dst_buf[5]  += popcount((wei ^ src5) & FILTER_MASK);
+            dst_buf[6]  += popcount((wei ^ src6) & FILTER_MASK);
+            dst_buf[7]  += popcount((wei ^ src7) & FILTER_MASK);
+            dst_buf[8]  += popcount((wei ^ src8) & FILTER_MASK);
+            dst_buf[9]  += popcount((wei ^ src9) & FILTER_MASK);
+            dst_buf[10] += popcount((wei ^ src10) & FILTER_MASK);
+            dst_buf[11] += popcount((wei ^ src11) & FILTER_MASK);
+            dst_buf[12] += popcount((wei ^ src12) & FILTER_MASK);
+            dst_buf[13] += popcount((wei ^ src13) & FILTER_MASK);
+            dst_buf[14] += popcount((wei ^ src14) & FILTER_MASK);
+            dst_buf[15] += popcount((wei ^ src15) & FILTER_MASK);
+            break;
+        }
+#endif
+        dst_buf[0]  += popcount(wei ^ src0);
+        dst_buf[1]  += popcount(wei ^ src1);
+        dst_buf[2]  += popcount(wei ^ src2);
+        dst_buf[3]  += popcount(wei ^ src3);
+        dst_buf[4]  += popcount(wei ^ src4);
+        dst_buf[5]  += popcount(wei ^ src5);
+        dst_buf[6]  += popcount(wei ^ src6);
+        dst_buf[7]  += popcount(wei ^ src7);
+        dst_buf[8]  += popcount(wei ^ src8);
+        dst_buf[9]  += popcount(wei ^ src9);
+        dst_buf[10] += popcount(wei ^ src10);
+        dst_buf[11] += popcount(wei ^ src11);
+        dst_buf[12] += popcount(wei ^ src12);
+        dst_buf[13] += popcount(wei ^ src13);
+        dst_buf[14] += popcount(wei ^ src14);
+        dst_buf[15] += popcount(wei ^ src15);
+    }
+
+    // Load data for fused operations (scales, biases, quantization thresholds, etc)
+#if CUSTOM_FUSED_OPS
+    FUSED_OPS_PREPARE_DATA;
+#endif
+
+    OUTPUT_TYPE dst[OC_BLOCK_SIZE];
+    __attribute__((opencl_unroll_hint(OC_BLOCK_SIZE)))
+    for (int oc = 0; oc < OC_BLOCK_SIZE; oc++)
+    {
+        CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM - 2*dst_buf[oc]);
+#if CUSTOM_FUSED_OPS
+        DO_ELTWISE_FUSED_OPS;
+        dst[oc] = res;
+#elif HAS_FUSED_OPS
+        FUSED_OPS;
+        dst[oc] = TO_OUTPUT_TYPE(FINAL_NAME);
+#endif
+    }
+
+#if LEFTOVERS_OC
+    bool in_fm = f_block*OC_BLOCK_SIZE + lid < OUTPUT_FEATURE_NUM;
+    __attribute__((opencl_unroll_hint(SUB_GROUP_SIZE)))
+    for (int ox = 0; ox < SUB_GROUP_SIZE; ox++) {
+        int xi = (xy * XY_BLOCK_SIZE+ox) % OUTPUT_SIZE_X;
+        int yi = (xy * XY_BLOCK_SIZE+ox) / OUTPUT_SIZE_X;
+        bool in_x = xi < OUTPUT_SIZE_X;
+        bool in_y = yi < OUTPUT_SIZE_Y;
+        if (in_x && in_y && in_fm) {
+            output[dst_index + yi*output_y_pitch + xi*output_x_pitch + lid] = dst[ox];
+        }
+    }
+#else
+    for (int ox = 0; ox < SUB_GROUP_SIZE; ox++) {
+        int xi = (xy * XY_BLOCK_SIZE+ox) % OUTPUT_SIZE_X;
+        int yi = (xy * XY_BLOCK_SIZE+ox) / OUTPUT_SIZE_X;
+        bool in_x = xi < OUTPUT_SIZE_X;
+        bool in_y = yi < OUTPUT_SIZE_Y;
+        if (in_x && in_y)
+            UNIT_BLOCK_WRITE(output, dst_index + yi*output_y_pitch + xi*output_x_pitch, dst[ox]);
+    }
+#endif
+}
index 377065f..45a7438 100644 (file)
 
 #define OC_BLOCK_SIZE 32
 
-#define GET_WEI(data, id) intel_sub_group_shuffle(data, id)
-
 #define ALIGNED_BLOCK_READ(ptr, byte_offset) as_uint(intel_sub_group_block_read((const __global uint*)(ptr) + (byte_offset)))
 #define ALIGNED_BLOCK_READ2(ptr, byte_offset) as_uint2(intel_sub_group_block_read2((const __global uint*)(ptr) + (byte_offset)))
 
+#if BINARY_PACKED_OUTPUT
+    #define BUFFER_TYPE UNIT_TYPE
+#else
+    #define BUFFER_TYPE OUTPUT_TYPE
+#endif
+
 __attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
 __attribute__((reqd_work_group_size(SUB_GROUP_SIZE, 1, 1)))
 KERNEL(binary_convolution_generic)(const __global INPUT0_TYPE* input,
                                          __global OUTPUT_TYPE* output,
                                    const __global FILTER_TYPE* weights,
-                                   FUSED_OPS_DECLS
+#if HAS_FUSED_OPS_DECLS
+                                   FUSED_OPS_DECLS,
+#endif
                                    uint split_idx)
 {
     const int f_block = get_global_id(1);
@@ -143,31 +149,41 @@ KERNEL(binary_convolution_generic)(const __global INPUT0_TYPE* input,
 
 #endif
     // Load data for fused operations (scales, biases, quantization thresholds, etc)
+#if CUSTOM_FUSED_OPS
     FUSED_OPS_PREPARE_DATA;
+#endif
 
-    UNIT_TYPE dst[SUB_GROUP_SIZE*2];
+    BUFFER_TYPE dst[SUB_GROUP_SIZE*2];
 
     __attribute__((opencl_unroll_hint(SUB_GROUP_SIZE*2)))
     for (int i = 0; i < SUB_GROUP_SIZE*2; i++)
     {
 #if EXCLUDE_PAD
-        UNIT_TYPE res = TO_UNIT_TYPE(INPUT0_FEATURE_NUM*intel_sub_group_shuffle(real_ks, i%SUB_GROUP_SIZE) - 2*dst_buf[i]);
+        CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM*intel_sub_group_shuffle(real_ks, i%SUB_GROUP_SIZE) - 2*dst_buf[i]);
 #else
-        UNIT_TYPE res = TO_UNIT_TYPE(INPUT0_FEATURE_NUM*FILTER_SIZE_Y*FILTER_SIZE_X - 2*dst_buf[i]);
+        CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM*FILTER_SIZE_Y*FILTER_SIZE_X - 2*dst_buf[i]);
 #endif
+
+#if CUSTOM_FUSED_OPS
         DO_ELTWISE_FUSED_OPS;
         dst[i] = res;
+#elif HAS_FUSED_OPS
+        FUSED_OPS;
+        dst[i] = FINAL_NAME;
+#else
+        dst[i] = res;
+#endif
+
     }
 
 #if BINARY_PACKED_OUTPUT
     int packed_out[SUB_GROUP_SIZE];
-    for (int i = 0; i < SUB_GROUP_SIZE; i++)
-    {
-        int ch0 = dst[0*SUB_GROUP_SIZE + i] > quantize0_threshold.s0 ? (1 << lid) : 0;
-        int ch1 = dst[1*SUB_GROUP_SIZE + i] > quantize0_threshold.s1 ? (1 << (SUB_GROUP_SIZE + lid)) : 0;
-        int res = ch0 + ch1;
-        packed_out[i] = sub_group_reduce_add(res);
-    }
+
+#if CUSTOM_FUSED_OPS
+    DO_CHANNEL_PACK_OPS;
+#else
+    #error "BINARY_PACKED_OUTPUT should be true only if node has fused quantize with bin output"
+#endif
 
     bool in_x = (x + lid) < OUTPUT_SIZE_X;
     bool in_y = y < OUTPUT_SIZE_Y;
index 4f37fe3..77ae18e 100644 (file)
@@ -17,7 +17,9 @@
 KERNEL(binary_convolution_ref)(const __global INPUT0_TYPE* input,
                                      __global OUTPUT_TYPE* output,
                                const __global FILTER_TYPE* weights,
-                               FUSED_OPS_DECLS
+#if HAS_FUSED_OPS_DECLS
+                               FUSED_OPS_DECLS,
+#endif
                                uint split_idx)
 {
     const int b  = get_global_id(0);
@@ -110,7 +112,10 @@ KERNEL(binary_convolution_ref)(const __global INPUT0_TYPE* input,
     UNIT_TYPE res = TO_OUTPUT_TYPE(INPUT0_FEATURE_NUM*FILTER_SIZE_X*FILTER_SIZE_Y - 2*res_popcnt);
 #endif
 
-    DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+    FUSED_OPS;
+    res = FINAL_NAME;
+#endif
 
     output[output_index] = res;
 }
index 0f6a214..9918b57 100644 (file)
@@ -23,77 +23,117 @@ KERNEL(border_gpu_ref)(
     // Border sizes (left-top set and right-bottom set):
     const uint blt_sx = LT_SIZES_SIZE_X;
     const uint blt_sy = LT_SIZES_SIZE_Y;
+#if INPUT0_DIMS >= 5
+    const uint blt_sz = LT_SIZES_SIZE_Z;
+#else
+    const uint blt_sz = 0;
+#endif
+#if INPUT0_DIMS == 6
+    const uint blt_sw = LT_SIZES_SIZE_W;
+#else
+    const uint blt_sw = 0;
+#endif
     const uint blt_sf = LT_SIZES_FEATURE_NUM;
     const uint blt_sb = LT_SIZES_BATCH_NUM;
 
     const uint brb_sx = RB_SIZES_SIZE_X;
     const uint brb_sy = RB_SIZES_SIZE_Y;
+#if INPUT0_DIMS >= 5
+    const uint brb_sz = RB_SIZES_SIZE_Z;
+#else
+    const uint brb_sz = 0;
+#endif
+#if INPUT0_DIMS == 6
+    const uint brb_sw = RB_SIZES_SIZE_W;
+#else
+    const uint brb_sw = 0;
+#endif
     const uint brb_sf = RB_SIZES_FEATURE_NUM;
     const uint brb_sb = RB_SIZES_BATCH_NUM;
 
     // Input sizes:
     const uint in_sx = INPUT0_SIZE_X;
     const uint in_sy = INPUT0_SIZE_Y;
+    const uint in_sz = INPUT0_SIZE_Z;
+    const uint in_sw = INPUT0_SIZE_W;
     const uint in_sf = INPUT0_FEATURE_NUM;
     const uint in_sb = INPUT0_BATCH_NUM;
 
     // Input limits (exclusive; tested on output position):
     const uint in_lx = in_sx + blt_sx;
     const uint in_ly = in_sy + blt_sy;
+    const uint in_lz = in_sz + blt_sz;
+    const uint in_lw = in_sw + blt_sw;
     const uint in_lf = in_sf + blt_sf;
-    const uint in_lb = in_sb + blt_sb;
-
+    const uint in_lb = in_sb + blt_sb;  
 
-    const uint out_x  = (uint) get_global_id(0);
-    const uint out_y  = (uint) get_global_id(1);
+    const uint out_xz  = (uint) get_global_id(0);
+    const uint out_yw  = (uint) get_global_id(1);
     const uint out_fb = (uint) get_global_id(2);
 
     const uint out_f  = out_fb % OUTPUT_FEATURE_NUM;
     const uint out_b  = out_fb / OUTPUT_FEATURE_NUM;
 
+    const uint out_x  = out_xz % OUTPUT_SIZE_X;
+    const uint out_z  = out_xz / OUTPUT_SIZE_X;
+
+    const uint out_y  = out_yw % OUTPUT_SIZE_Y;
+    const uint out_w  = out_yw / OUTPUT_SIZE_Y;
+
 #ifdef BORDER_TYPE_CONSTANT
     UNIT_TYPE in_val = TO_UNIT_TYPE(BORDER_VALUE);
+
     if (out_x >= blt_sx & out_x < in_lx &
         out_y >= blt_sy & out_y < in_ly &
+        out_z >= blt_sz & out_z < in_lz &
+        out_w >= blt_sw & out_w < in_lw &
         out_f >= blt_sf & out_f < in_lf &
         out_b >= blt_sb & out_b < in_lb)
     {
         const uint in_x = out_x - blt_sx;
         const uint in_y = out_y - blt_sy;
+        const uint in_z = out_z - blt_sz;
+        const uint in_w = out_w - blt_sw;
         const uint in_f = out_f - blt_sf;
         const uint in_b = out_b - blt_sb;
 
-        const uint in_pos = GET_DATA_INDEX(INPUT0, in_b, in_f, in_y, in_x);
+        const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x);
         in_val = input[in_pos];
     }
 #elif defined BORDER_TYPE_EDGE
     const uint in_x = (out_x >= blt_sx & out_x < in_lx) ? out_x - blt_sx : (out_x < blt_sx ? 0 : in_sx - 1);
     const uint in_y = (out_y >= blt_sy & out_y < in_ly) ? out_y - blt_sy : (out_y < blt_sy ? 0 : in_sy - 1);
+    const uint in_z = (out_z >= blt_sz & out_z < in_lz) ? out_z - blt_sz : (out_z < blt_sz ? 0 : in_sz - 1);
+    const uint in_w = (out_w >= blt_sw & out_w < in_lw) ? out_w - blt_sw : (out_w < blt_sw ? 0 : in_sw - 1);
     const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? 0 : in_sf - 1);
     const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? 0 : in_sb - 1);
 
-    const uint in_pos = GET_DATA_INDEX(INPUT0, in_b, in_f, in_y, in_x);
+    const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x);
     UNIT_TYPE in_val = input[in_pos];
 #elif defined BORDER_TYPE_MIRROR
     const uint in_x = (out_x >= blt_sx & out_x < in_lx) ? out_x - blt_sx : (out_x < blt_sx ? blt_sx - 1 - out_x : in_sx + in_lx - 1 - out_x);
     const uint in_y = (out_y >= blt_sy & out_y < in_ly) ? out_y - blt_sy : (out_y < blt_sy ? blt_sy - 1 - out_y : in_sy + in_ly - 1 - out_y);
+    const uint in_z = (out_z >= blt_sz & out_z < in_lz) ? out_z - blt_sz : (out_z < blt_sz ? blt_sz - 1 - out_z : in_sz + in_lz - 1 - out_z);
+    const uint in_w = (out_w >= blt_sw & out_w < in_lw) ? out_w - blt_sw : (out_w < blt_sw ? blt_sw - 1 - out_w : in_sw + in_lw - 1 - out_w);
     const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? blt_sf - 1 - out_f : in_sf + in_lf - 1 - out_f);
     const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? blt_sb - 1 - out_b : in_sb + in_lb - 1 - out_b);
 
-    const uint in_pos = GET_DATA_INDEX(INPUT0, in_b, in_f, in_y, in_x);
+    const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x);
     UNIT_TYPE in_val = input[in_pos];
 #elif defined BORDER_TYPE_MIRROR_101
     const uint in_x = (out_x >= blt_sx & out_x < in_lx) ? out_x - blt_sx : (out_x < blt_sx ? blt_sx - out_x : in_sx + in_lx - 2 - out_x);
     const uint in_y = (out_y >= blt_sy & out_y < in_ly) ? out_y - blt_sy : (out_y < blt_sy ? blt_sy - out_y : in_sy + in_ly - 2 - out_y);
+    const uint in_z = (out_z >= blt_sz & out_z < in_lz) ? out_z - blt_sz : (out_z < blt_sz ? blt_sz - out_z : in_sz + in_lz - 2 - out_z);
+    const uint in_w = (out_w >= blt_sw & out_w < in_lw) ? out_w - blt_sw : (out_w < blt_sw ? blt_sw - out_w : in_sw + in_lw - 2 - out_w);
     const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? blt_sf - out_f : in_sf + in_lf - 2 - out_f);
     const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? blt_sb - out_b : in_sb + in_lb - 2 - out_b);
 
-    const uint in_pos = GET_DATA_INDEX(INPUT0, in_b, in_f, in_y, in_x);
+    const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x);
     UNIT_TYPE in_val = input[in_pos];
 #else
     #error Unsupported border type.
 #endif
 
-    const uint out_pos = GET_DATA_INDEX(OUTPUT, out_b, out_f, out_y, out_x);
+    const uint out_pos = GET_DATA_INDEX_6D(OUTPUT, out_b, out_f, out_w, out_z, out_y, out_x);
     output[out_pos] = in_val;
 }
index 72808ad..8ecff97 100644 (file)
@@ -24,6 +24,8 @@ inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x
     return GET_DATA_INDEX_5D(INPUT0, b, f, z, y, x);
 #elif INPUT0_SIMPLE && INPUT0_DIMS == 6
     return GET_DATA_INDEX_6D(INPUT0, b, f, w, z, y, x);
+#elif INPUT0_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
 #else
 #error concatenation_gpu_simple_ref.cl: input format - not supported
 #endif
@@ -38,6 +40,8 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint
     return GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
 #elif OUTPUT_SIMPLE && OUTPUT_DIMS == 6
     return GET_DATA_INDEX_6D(OUTPUT, b, f, w, z, y, x);
+#elif OUTPUT_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
 #else
 #error concatenation_gpu_simple_ref.cl: output format - not supported
 #endif
index d1bfdf2..153dfb8 100644 (file)
@@ -31,7 +31,9 @@ KERNEL(convolution_bfyx_f16)(
 #if BIAS_TERM
     __global BIAS_TYPE* biases,
 #endif
-    FUSED_OPS_DECLS
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
     uint split_idx) {
     const int f_block = get_group_id(1);
     const int lid = get_sub_group_local_id();
@@ -227,8 +229,10 @@ KERNEL(convolution_bfyx_f16)(
 #if OUTPUT_LEFTOVERS
     if ((f_block+1)*FEATURE_SLICE_SIZE >= OUTPUT_FEATURE_NUM) {
         for (int i = 0; i < OUTPUT_X_BLOCK_SIZE; i++) {
-            FUSED_OPS_LOAD_DATA;
-            DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+            FUSED_OPS_SCALAR;
+            dst[i] = FINAL_NAME_SCALAR;
+#endif
             if ((f_block*FEATURE_SLICE_SIZE + lid < OUTPUT_FEATURE_NUM) && (x + i) < OUTPUT_SIZE_X)
                 output[output_offset + i * output_x_pitch + lid] = dst[i];
         }
@@ -237,8 +241,10 @@ KERNEL(convolution_bfyx_f16)(
 #endif  // OUTPUT_LEFTOVERS
     {
         if (x + OUTPUT_X_BLOCK_SIZE <= OUTPUT_SIZE_X) {
-            FUSED_OPS_LOAD_DATA_VEC;
-            DO_ELTWISE_FUSED_OPS_VEC;
+#if HAS_FUSED_OPS
+            FUSED_OPS_VEC;
+            dst = FINAL_NAME_VEC;
+#endif
             // TODO Generalize for other block sizes
 #if OUTPUT_X_BLOCK_SIZE == 8
             UNIT_BLOCK_WRITE8(output, output_offset, dst);
@@ -254,8 +260,10 @@ KERNEL(convolution_bfyx_f16)(
         } else {
             const int x_tail = OUTPUT_SIZE_X - x;
             for (int i = 0; i < x_tail; i++) {
-                FUSED_OPS_LOAD_DATA;
-                DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+                FUSED_OPS_SCALAR;
+                dst[i] = FINAL_NAME_SCALAR;
+#endif
                 UNIT_BLOCK_WRITE(output, output_offset + i * output_x_pitch, dst[i]);
             }
         }
index c60d90e..db805fe 100644 (file)
@@ -32,7 +32,9 @@ KERNEL(convolution_bfyx_f16_1x1)(
 #if BIAS_TERM
     __global BIAS_TYPE* biases,
 #endif
-    FUSED_OPS_DECLS
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
     uint split_idx) {
     const int xy = get_global_id(0);
     const int f_block = get_group_id(1);
@@ -193,8 +195,10 @@ KERNEL(convolution_bfyx_f16_1x1)(
             int xi = (x+i) % OUTPUT_SIZE_X;
             int yi = y + ((x+i) / OUTPUT_SIZE_X);
 
-            FUSED_OPS_LOAD_DATA;
-            DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+            FUSED_OPS_SCALAR;
+            dst[i] = FINAL_NAME_SCALAR;
+#endif
 
             output[output_offset + yi * output_y_pitch + xi * output_x_pitch + lid] = dst[i];
         }
@@ -204,8 +208,10 @@ KERNEL(convolution_bfyx_f16_1x1)(
     {
 #if !PADDED_OUTPUT
         if (xy * X_BLOCK_SIZE + X_BLOCK_SIZE <= OUTPUT_SIZE_X * OUTPUT_SIZE_Y) {
-            FUSED_OPS_LOAD_DATA_VEC;
-            DO_ELTWISE_FUSED_OPS_VEC;
+#if HAS_FUSED_OPS
+            FUSED_OPS_VEC;
+            dst = FINAL_NAME_VEC;
+#endif
 #if X_BLOCK_SIZE == 8
             UNIT_BLOCK_WRITE8(output, output_offset + y * output_y_pitch + x * output_x_pitch, dst);
 #elif X_BLOCK_SIZE == 4
@@ -216,8 +222,10 @@ KERNEL(convolution_bfyx_f16_1x1)(
         } else {
 #else
         if (x * X_BLOCK_SIZE + X_BLOCK_SIZE <= OUTPUT_SIZE_X) {
-            FUSED_OPS_LOAD_DATA_VEC;
-            DO_ELTWISE_FUSED_OPS_VEC;
+#if HAS_FUSED_OPS
+            FUSED_OPS_VEC;
+            dst = FINAL_NAME_VEC;
+#endif
 #if X_BLOCK_SIZE == 8
             UNIT_BLOCK_WRITE8(output, output_offset + y * output_y_pitch + x * output_x_pitch, dst);
 #elif X_BLOCK_SIZE == 4
@@ -234,8 +242,10 @@ KERNEL(convolution_bfyx_f16_1x1)(
                 int xi = (x+i) % OUTPUT_SIZE_X;
                 int yi = y + ((x+i) / OUTPUT_SIZE_X);
 
-                FUSED_OPS_LOAD_DATA;
-                DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+                FUSED_OPS_SCALAR;
+                dst[i] = FINAL_NAME_SCALAR;
+#endif
 
                 UNIT_BLOCK_WRITE(output, output_offset + yi * output_y_pitch + xi * output_x_pitch, dst[i]);
             }
index f660f55..f90573b 100644 (file)
@@ -27,7 +27,9 @@ KERNEL(convolution_depthwise)(
 #if BIAS_TERM
     __global BIAS_TYPE* biases,
 #endif
-    FUSED_OPS_DECLS
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
     uint split_idx)
 {
     const uint yx = get_global_id(0);
@@ -200,7 +202,6 @@ KERNEL(convolution_depthwise)(
 
     const uint output_fs_pad_before = OUTPUT_PAD_BEFORE_FEATURE_NUM / FEATURE_SLICE_SIZE;
 
-
     const uint output_offset =  b * output_b_pitch +
                                 (f_block + output_fs_pad_before) * output_fs_pitch +
                                 (OUTPUT_PAD_BEFORE_SIZE_Y + y) * output_y_pitch +
@@ -210,27 +211,33 @@ KERNEL(convolution_depthwise)(
     if ((f_block+1)*FEATURE_SLICE_SIZE >= OUTPUT_FEATURE_NUM)
     {
         for (int i = 0; i < X_BLOCK_SIZE; i++) {
-            FUSED_OPS_LOAD_DATA;
-            DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+            FUSED_OPS_SCALAR;
+            dst[i] = FINAL_NAME_SCALAR;
+#endif  // HAS_FUSED_OPS
             if ((x+i) < OUTPUT_SIZE_X && f_block*FEATURE_SLICE_SIZE + lid < OUTPUT_FEATURE_NUM)
                 output[output_offset + (x+i)*output_x_pitch + lid] = dst[i];
         }
     }
     else
-#endif
+#endif  // OUTPUT_LEFTOVERS
     {
         if (x + X_BLOCK_SIZE <= OUTPUT_SIZE_X)
         {
-            FUSED_OPS_LOAD_DATA_VEC;
-            DO_ELTWISE_FUSED_OPS_VEC;
+#if HAS_FUSED_OPS
+            FUSED_OPS_VEC;
+            dst = FINAL_NAME_VEC;
+#endif  // HAS_FUSED_OPS
             UNIT_BLOCK_WRITE8(output, output_offset + x*output_x_pitch, dst);
         }
         else
         {
             for (int i = 0; i < (OUTPUT_SIZE_X - x); i++)
             {
-                FUSED_OPS_LOAD_DATA;
-                DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+                FUSED_OPS_SCALAR;
+                dst[i] = FINAL_NAME_SCALAR;
+#endif  // HAS_FUSED_OPS
                 UNIT_BLOCK_WRITE(output, output_offset + (x+i)*output_x_pitch, dst[i]);
             }
         }
index fdba49a..1252370 100644 (file)
@@ -26,6 +26,9 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
 #if BIAS_TERM
     __global BIAS_TYPE* biases,
 #endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
     uint split_idx)
 {
     const int f_block = get_group_id(1);
@@ -159,8 +162,10 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
 #if OUTPUT_LEFTOVERS
     if ((f_block+1)*FEATURE_SLICE_SIZE >= OUTPUT_FEATURE_NUM) {
         for (int i = 0; i < OUTPUT_X_BLOCK_SIZE; i++) {
-            FUSED_OPS_LOAD_DATA;
-            DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+            FUSED_OPS_SCALAR;
+            dst[i] = FINAL_NAME_SCALAR;
+#endif
             if ((f_block*FEATURE_SLICE_SIZE + lid < OUTPUT_FEATURE_NUM) && (x + i) < OUTPUT_SIZE_X)
                 output[output_offset + i * output_x_pitch + lid] = dst[i];
         }
@@ -169,8 +174,10 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
 #endif  // OUTPUT_LEFTOVERS
     {
         if (x + OUTPUT_X_BLOCK_SIZE <= OUTPUT_SIZE_X) {
-            FUSED_OPS_LOAD_DATA_VEC;
-            DO_ELTWISE_FUSED_OPS_VEC;
+#if HAS_FUSED_OPS
+            FUSED_OPS_VEC;
+            dst = FINAL_NAME_VEC;
+#endif
             // TODO Generalize for other block sizes
 #if OUTPUT_X_BLOCK_SIZE == 8
             UNIT_BLOCK_WRITE8(output, output_offset, dst);
@@ -186,8 +193,10 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
         } else {
             const int x_tail = OUTPUT_SIZE_X - x;
             for (int i = 0; i < x_tail; i++) {
-                FUSED_OPS_LOAD_DATA;
-                DO_ELTWISE_FUSED_OPS;
+#if HAS_FUSED_OPS
+            FUSED_OPS_SCALAR;
+            dst[i] = FINAL_NAME_SCALAR;
+#endif
                 UNIT_BLOCK_WRITE(output, output_offset + i * output_x_pitch, dst[i]);
             }
         }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfzyx_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfzyx_ref.cl
deleted file mode 100644 (file)
index e67ceec..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "include/include_all.cl"
-
-KERNEL(convolution)(
-    __global INPUT0_TYPE* input,
-    __global OUTPUT_TYPE* output,
-    __global FILTER_TYPE* weights,
-#if BIAS_TERM
-    __global BIAS_TYPE* biases,
-#endif
-#if QUANTIZATION_TERM
-    __global float* quantizations,
-#endif
-#if CALIBRATION_TERM
-    __global float* calibrations,
-#endif
-    uint split_idx)
-{
-    const uint x = get_global_id(0);
-#if  OUTPUT_SIZE_Z == 1
-    const uint y = get_global_id(1);
-    const uint z = 0;
-#else
-    const uint y = get_global_id(1) % OUTPUT_SIZE_Y;
-    const uint z = get_global_id(1) / OUTPUT_SIZE_Y;
-#endif
-#if OUTPUT_BATCH_NUM == 1
-    const uint f = get_global_id(2);
-    const uint b = 0;
-#else
-    const uint f = get_global_id(2) % OUTPUT_FEATURE_NUM;
-    const uint b = get_global_id(2) / OUTPUT_FEATURE_NUM;
-#endif
-#if QUANTIZATION_TERM
-    int dotProd = 0;
-#else
-    UNIT_TYPE dotProd = UNIT_VAL_ZERO;
-#endif
-    const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
-    const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
-    const int input_z = z * STRIDE_SIZE_Z - PADDING_SIZE_Z;
-
-// TODO check DEPTHWISE_SEPARABLE_OPT
-#if DEPTHWISE_SEPARABLE_OPT
-    const uint in_split_offset = (f / FILTER_OFM_NUM) * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
-#else
-    const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
-#endif
-#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
-    const uint filter_offset = f*FILTER_OFM_PITCH + split_idx * FILTER_LENGTH;
-#else
-    const uint filter_offset = f*FILTER_OFM_PITCH;
-#endif
-    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + in_split_offset;
-
-// TODO check LOCAL_CONVOLUTION
-#ifdef LOCAL_CONVOLUTION
-    const int local_offset = FILTER_SIZE_X * FILTER_SIZE_Y * (x + OUTPUT_SIZE_X * y);
-#endif
-    for (uint k = 0; k < FILTER_IFM_NUM; ++k)
-    {
-        for (uint l = 0; l < FILTER_SIZE_Z ; ++l)
-        {
-            const int input_offset_z = input_z + l * DILATION_SIZE_Z;
-            const bool zero_z = input_offset_z >= INPUT0_SIZE_Z || input_offset_z < 0;
-
-            if(!zero_z)
-            {
-                for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
-                {
-                    const int input_offset_y = input_y + j * DILATION_SIZE_Y;
-                    const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
-
-                    if(!zero_y)
-                    {
-                        for (uint i = 0; i < FILTER_SIZE_X ; ++i)
-                        {
-                            const int input_offset_x = input_x + i * DILATION_SIZE_X;
-                            const bool zero_x = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
-
-                            if(!zero_x)
-                            {
-                                uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH +
-                                                 (uint)input_offset_z*INPUT0_Z_PITCH + k*INPUT0_FEATURE_PITCH;
-#ifdef LOCAL_CONVOLUTION
-                                uint filter_idx = filter_offset + k*FILTER_IFM_PITCH + l*FILTER_Z_PITCH + j*FILTER_Y_PITCH + i*FILTER_X_PITCH + local_offset;
-#else
-                                uint filter_idx = filter_offset + k*FILTER_IFM_PITCH + l*FILTER_Z_PITCH + j*FILTER_Y_PITCH + i*FILTER_X_PITCH;
-#endif
-#if QUANTIZATION_TERM
-                                dotProd += (int)input[input_idx] * (int)weights[filter_idx];
-#else
-                                dotProd += input[input_idx] * weights[filter_idx];
-#endif
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-#if BIAS_TERM
-#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
-    const uint bias_offset = split_idx * BIAS_LENGTH;
-#else
-    const uint bias_offset = 0;
-#endif
-#if   BIAS_PER_OUTPUT
-    const uint bias_index = bias_offset + GET_DATA_INDEX_5D(BIAS, b, f, z, y, x);
-#elif BIAS_PER_OFM
-    const uint bias_index = bias_offset + f;
-#endif
-#if QUANTIZATION_TERM
-#if CALIBRATION_TERM
-
-    dotProd = (UNIT_TYPE)round(((float)dotProd * quantizations[f] * I_QF + biases[bias_index]) * calibrations[f]);
-#else  // CALIBRATION_TERM
-    dotProd = (UNIT_TYPE)round(((float)dotProd * quantizations[f] * I_QF + biases[bias_index]) * O_QF);
-#endif // CALIBRATION_TERM
-#else  // QUANTIZATION_TERM
-    dotProd += (UNIT_TYPE)biases[bias_index];
-#endif // QUANTIZATION_TERM
-#endif
-
-    const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * OUTPUT_FEATURE_NUM;
-    const uint dst_index = GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x) + out_split_offset;
-
-#if QUANTIZATION_TERM
-    output[dst_index] = ACTIVATION(convert_char(dotProd), ACTIVATION_PARAMS);
-#else
-    output[dst_index] = ACTIVATION(dotProd, ACTIVATION_PARAMS);
-#endif
-}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_fs_byx_fsv32_depthwise.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_fs_byx_fsv32_depthwise.cl
new file mode 100644 (file)
index 0000000..bbe6f95
--- /dev/null
@@ -0,0 +1,216 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/data_types.cl"
+#include "include/unit_type.cl"
+
+#define unroll_for __attribute__((opencl_unroll_hint)) for
+
+#define INPUT0_SIZE_X_WITH_PADDING (INPUT0_PAD_BEFORE_SIZE_X + INPUT0_SIZE_X + INPUT0_PAD_AFTER_SIZE_X)
+#define INPUT0_SIZE_Y_WITH_PADDING (INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y)
+
+#define OUTPUT_SIZE_X_WITH_PADDING (OUTPUT_PAD_BEFORE_SIZE_X + OUTPUT_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X)
+#define OUTPUT_SIZE_Y_WITH_PADDING (OUTPUT_PAD_BEFORE_SIZE_Y + OUTPUT_SIZE_Y + OUTPUT_PAD_AFTER_SIZE_Y)
+
+// In some cases input padding may be bigger than needed, those variables describe the offset into padding.
+#define INPUT0_PADDING_OFFSET_SIZE_X (INPUT0_PAD_BEFORE_SIZE_X - PADDING_SIZE_X)
+#define INPUT0_PADDING_OFFSET_SIZE_Y (INPUT0_PAD_BEFORE_SIZE_Y - PADDING_SIZE_Y)
+
+// ======================================================================================
+// Required JIT definitions:
+// --------------------------------------------------------------------------------------
+// SUB_GROUP_SIZE     - [int] sub-group/simd size; limited to 16
+// FSV                - [int] feature slice size; limted to 32
+// FSV_PER_THREAD     - [int] number of features from slice per thread;
+//                            must be equal FSV / SUB_GROUP_SIZE
+// OUTPUT_BLOCK_WIDTH - [int] number of elements calculated in x dimension by one thread
+// INPUT_BLOCK_WIDTH  - [int] number of continous input elements to calculate output
+// ======================================================================================
+
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+__attribute__((reqd_work_group_size(1, 1, SUB_GROUP_SIZE)))
+KERNEL(convolution_gpu_fs_byx_fsv32)(
+       __global UNIT_TYPE* input,
+       __global UNIT_TYPE* output,
+       __global UNIT_TYPE* weights,
+#if BIAS_TERM
+       __global UNIT_TYPE* biases,
+#endif
+       int split_idx)
+{
+    uint oc = get_global_id(0) * OUTPUT_BLOCK_WIDTH;
+    uint or = get_global_id(1);
+    uint fs_b_id = get_group_id(2);
+    uint sglid = get_sub_group_local_id();
+
+    uint fs = fs_b_id / INPUT0_BATCH_NUM;
+    uint b = fs_b_id - fs * INPUT0_BATCH_NUM;
+
+    UNIT_TYPE in[INPUT_BLOCK_WIDTH * FSV_PER_THREAD];
+    UNIT_TYPE w[FSV_PER_THREAD];
+    UNIT_TYPE out[OUTPUT_BLOCK_WIDTH * FSV_PER_THREAD];
+
+    for (uint out_i = 0; out_i < OUTPUT_BLOCK_WIDTH * FSV_PER_THREAD; ++out_i)
+    {
+        out[out_i] = UNIT_VAL_ZERO;
+    }
+
+    uint input_offset = 0;
+    input_offset += (oc * STRIDE_SIZE_X + INPUT0_PADDING_OFFSET_SIZE_X) * FSV;
+    input_offset += (or * STRIDE_SIZE_Y + INPUT0_PADDING_OFFSET_SIZE_Y) * INPUT0_SIZE_X_WITH_PADDING * FSV;
+    input_offset += b * INPUT0_SIZE_X_WITH_PADDING * INPUT0_SIZE_Y_WITH_PADDING * FSV;
+    input_offset += fs * INPUT0_SIZE_X_WITH_PADDING * INPUT0_SIZE_Y_WITH_PADDING * FSV * INPUT0_BATCH_NUM;
+
+    uint weight_offset = 0;
+
+    weight_offset += fs * FILTER_SIZE_X * FILTER_SIZE_Y * FSV;
+
+    uint tmp_input_offset = input_offset;
+    for (uint f_y = 0; f_y < FILTER_SIZE_Y; ++f_y)
+    {
+        // ====================================================================
+        // Load input:
+        uint in_x = 0;
+        unroll_for (; in_x + 2 <= INPUT_BLOCK_WIDTH; in_x += 2) // 16-bit ushort reading
+        {
+            UNIT_TYPE4 tmp_read = UNIT_BLOCK_READ4(input, tmp_input_offset + in_x * FSV);
+            in[in_x * FSV_PER_THREAD + 0] = tmp_read.s0;
+            in[in_x * FSV_PER_THREAD + 1] = tmp_read.s1;
+            in[in_x * FSV_PER_THREAD + 2] = tmp_read.s2;
+            in[in_x * FSV_PER_THREAD + 3] = tmp_read.s3;
+        }
+        unroll_for (; in_x < INPUT_BLOCK_WIDTH; ++in_x)
+        {
+            UNIT_TYPE2 tmp_read = UNIT_BLOCK_READ2(input, tmp_input_offset + in_x * FSV);
+            in[in_x * FSV_PER_THREAD + 0] = tmp_read.s0;
+            in[in_x * FSV_PER_THREAD + 1] = tmp_read.s1;
+        }
+        // ====================================================================
+
+        // Move temporary input offset to next row
+        tmp_input_offset += DILATION_SIZE_Y * INPUT0_SIZE_X_WITH_PADDING * FSV;
+
+        uint tmp_weight_offset = weight_offset;
+
+        // Perform convolutions with loaded input features
+        unroll_for (uint f_x = 0; f_x < FILTER_SIZE_X; ++f_x)
+        {
+            // Load weights
+            UNIT_TYPE2 tmp_read = UNIT_BLOCK_READ2(weights, tmp_weight_offset + f_x * FSV);
+            w[0] = tmp_read.s0;
+            w[1] = tmp_read.s1;
+
+            unroll_for (uint out_x = 0; out_x < OUTPUT_BLOCK_WIDTH; ++out_x)
+            {
+                unroll_for (uint out_f = 0; out_f < FSV_PER_THREAD; ++out_f)
+                {
+                    const uint in_idx = (out_x * STRIDE_SIZE_X + f_x * DILATION_SIZE_X) * FSV_PER_THREAD + out_f;
+                    const UNIT_TYPE in_val = in[in_idx];
+                    const uint out_idx = out_x * FSV_PER_THREAD + out_f;
+
+                    out[out_idx] = mad(in_val, w[out_f], out[out_idx]);
+                }
+            }
+
+        }
+        // Move temporary weight offset to next input feature
+        tmp_weight_offset += FILTER_SIZE_Y * FILTER_SIZE_X * FSV;
+        // ====================================================================
+        // Move weight offset to next row
+        weight_offset += FILTER_SIZE_X * FSV;
+    }
+    // ========================================================================
+    // Bias
+#if BIAS_TERM
+    unroll_for (uint out_x = 0; out_x < OUTPUT_BLOCK_WIDTH; ++out_x)
+    {
+#if BIAS_PER_OUTPUT
+            // TODO Change bias format to use block reads
+            unroll_for (uint out_f = 0; out_f < FSV_PER_THREAD; ++out_f)
+            {
+                const uint bias_index = (fs * FSV + out_f * SUB_GROUP_SIZE + sglid) * OUTPUT_SIZE_X * OUTPUT_SIZE_Y +
+                                        or * OUTPUT_SIZE_X +
+                                        (oc + out_x);
+                out[out_x * FSV_PER_THREAD + out_f] += biases[bias_index];
+            }
+#else // BIAS_PER_OUTPUT
+            const uint bias_index = fs * FSV;
+            UNIT_TYPE2 bias_read = UNIT_BLOCK_READ2(biases, bias_index);
+            out[out_x * FSV_PER_THREAD + 0] += bias_read.s0;
+            out[out_x * FSV_PER_THREAD + 1] += bias_read.s1;
+#endif // BIAS_PER_OUTPUT
+    }
+#endif // BIAS_TERM
+    // ========================================================================
+
+    // ========================================================================
+    // Activation
+    unroll_for (uint out_x = 0; out_x < OUTPUT_BLOCK_WIDTH; ++out_x)
+    {
+        unroll_for (uint out_f = 0; out_f < FSV_PER_THREAD; ++out_f)
+        {
+            const uint out_idx = out_x * FSV_PER_THREAD + out_f;
+            out[out_idx] = ACTIVATION(out[out_idx], ACTIVATION_PARAMS);
+        }
+    }
+    // ========================================================================
+
+    // ========================================================================
+    // Store results:
+    const uint pad_before_fs = (OUTPUT_PAD_BEFORE_FEATURE_NUM / FSV);
+
+    uint output_offset = 0;
+    output_offset += (oc + OUTPUT_PAD_BEFORE_SIZE_X) * FSV;
+    output_offset += (or + OUTPUT_PAD_BEFORE_SIZE_Y) * FSV * OUTPUT_SIZE_X_WITH_PADDING;
+    output_offset += b  * FSV * OUTPUT_SIZE_X_WITH_PADDING * OUTPUT_SIZE_Y_WITH_PADDING;
+    output_offset += (pad_before_fs + fs) * FSV * OUTPUT_SIZE_X_WITH_PADDING * OUTPUT_SIZE_Y_WITH_PADDING * OUTPUT_BATCH_NUM;
+
+    const bool full_f = OUTPUT_FEATURE_NUM % FSV == 0 || fs * FSV + FSV <= OUTPUT_FEATURE_NUM;
+    const bool full_x = OUTPUT_SIZE_X % OUTPUT_BLOCK_WIDTH == 0 || oc + OUTPUT_BLOCK_WIDTH <= OUTPUT_SIZE_X;
+
+    if (full_f && full_x)
+    {
+        // Case without bounds checking
+        unroll_for (uint out_x = 0; out_x < OUTPUT_BLOCK_WIDTH; ++out_x)
+        {
+            UNIT_TYPE2 tmp_write = (UNIT_TYPE2)(out[out_x * FSV_PER_THREAD + 0],
+                                                out[out_x * FSV_PER_THREAD + 1]);
+            UNIT_BLOCK_WRITE2(output, output_offset, tmp_write);
+            output_offset += FSV;
+        }
+    }
+    else
+    {
+        unroll_for (uint out_x = 0; out_x < OUTPUT_BLOCK_WIDTH; ++out_x)
+        {
+            unroll_for (uint out_f = 0; out_f < FSV_PER_THREAD; ++out_f)
+            {
+                if (oc + out_x < OUTPUT_SIZE_X && fs * FSV + sglid + out_f * SUB_GROUP_SIZE < OUTPUT_FEATURE_NUM)
+                    output[output_offset + sglid] = out[out_x * FSV_PER_THREAD + out_f];
+                output_offset += SUB_GROUP_SIZE;
+            }
+        }
+    }
+    // ========================================================================
+}
+
+#undef unroll_for
+
+#undef INPUT0_SIZE_X_WITH_PADDING
+#undef INPUT0_SIZE_Y_WITH_PADDING
+
+#undef OUTPUT_SIZE_X_WITH_PADDING
+#undef OUTPUT_SIZE_Y_WITH_PADDING
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_tutorial.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_tutorial.cl
deleted file mode 100644 (file)
index 560e92a..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright (c) 2016-2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ADVANCED_TUTORIAL
-
-#include "include/include_all.cl"
-
-// change this function with your own idea. please note that it's a naive implementation.
-KERNEL(convolution_tutorial)(
-    __global INPUT0_TYPE* input,        // input buffer
-    __global OUTPUT_TYPE* output,       // output buffer
-    __global FILTER_TYPE* weights,      // weights buffer (training output)
-#if BIAS_TERM                           // in case we have bias in convolution params
-    __global BIAS_TYPE* biases,         // bias buffer (training output)
-#endif
-    uint split_idx)                     // which split index to process
-{
-#if defined OUTPUT_LAYOUT_YXFB                  // in Case of YXFB we need a different processing order than BFYX (from performance aspect)
-    const uint x = get_global_id(1);
-    const uint y = get_global_id(2);
-#if OUTPUT_BATCH_NUM == 1
-    const uint f = get_global_id(0);
-    const uint b = 0;
-#else
-    const uint f = get_global_id(0) % OUTPUT_FEATURE_NUM;
-    const uint b = get_global_id(0) / OUTPUT_FEATURE_NUM;
-#endif
-#else
-    const uint x = get_global_id(0);
-    const uint y = get_global_id(1);
-#if OUTPUT_BATCH_NUM == 1
-    const uint f = get_global_id(2);
-    const uint b = 0;
-#else
-    const uint f = get_global_id(2) % OUTPUT_FEATURE_NUM;
-    const uint b = get_global_id(2) / OUTPUT_FEATURE_NUM;
-#endif
-#endif
-
-    UNIT_TYPE dotProd = UNIT_VAL_ZERO;                                          // UNIT_TYPE - half/float/etc
-    
-#if BIAS_TERM
-    #if   BIAS_PER_OUTPUT
-        const uint bias_index = GET_DATA_INDEX(BIAS, b, f, y, x);               // helper macro to cacluate indices
-    #elif BIAS_PER_OFM
-        const uint bias_index = f;
-    #endif
-    dotProd = biases[bias_index];
-#endif
-
-    const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
-    const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
-
-    // in case of depth separable optimization we have to dynamically calculate the split index
-#if DEPTHWISE_SEPARABLE_OPT
-    const uint in_split_offset = (f / FILTER_OFM_NUM) * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
-#else
-    const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
-#endif
-    const uint filter_offset = f*FILTER_OFM_PITCH;
-    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + in_split_offset;
-
-    for (uint k = 0; k < FILTER_IFM_NUM; ++k)
-    {
-        for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
-        {
-            const int input_offset_y = input_y + j * DILATION_SIZE_Y;
-            const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
-
-            if(!zero_y)
-            {
-                for (uint i = 0; i < FILTER_SIZE_X ; ++i)
-                {
-                    const int input_offset_x = input_x + i * DILATION_SIZE_X;
-                    const bool zero_x = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
-
-                    if(!zero_x)
-                    {
-                        uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH + k*INPUT0_FEATURE_PITCH;
-                        uint filter_idx = filter_offset + k*FILTER_IFM_PITCH + j*FILTER_Y_PITCH + i*FILTER_X_PITCH;
-                        dotProd += input[input_idx]*weights[filter_idx];    // finally the convolution calcualtion.
-                    }
-                }
-            }
-        }
-    }
-    
-    const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * OUTPUT_FEATURE_NUM;    // calculating output split offset
-    const uint dst_index = GET_DATA_INDEX(OUTPUT, b, f, y, x) + out_split_offset;           // helper macro to calculate output index
-    output[dst_index] = ACTIVATION(dotProd, ACTIVATION_PARAMS);                                    // run activation functions (RelU in most cases) and set output
-}
-
-#else
-
-//#include "put here your include files"
-
-__kernel void convolution_tutorial(
-    const __global UNIT_TYPE* input,
-    __global UNIT_TYPE* output,
-    const __global UNIT_TYPE* filter,
-    const __global UNIT_TYPE* bias)
-{
-    // fill here your kernel
-}
-
-#endif
\ No newline at end of file
index 093b2fb..829d64a 100644 (file)
@@ -98,23 +98,36 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                             uint fixed_input_offset_x = (uint)input_offset_x / STRIDE_SIZE_X;
                             uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
                             uint fixed_input_offset_z = (uint)input_offset_z / STRIDE_SIZE_Z;
+#if OUTPUT_LAYOUT_BFZYX_F16
+                            uint input_idx;
+#else
                             uint input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH + (uint)fixed_input_offset_z*INPUT0_Z_PITCH;
-
+#endif
 #if GRADIENT
                             uint filter_idx = filter_offset + ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Z - k - 1)*FILTER_Z_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                             for (uint h = 0; h < FILTER_OFM_NUM; h++)
                             {
+#if OUTPUT_LAYOUT_BFZYX_F16
+                                input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, batch_offset, h, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
+#endif
                                 result = fma(input[input_idx], filter[filter_idx], result);
                                 filter_idx += FILTER_OFM_PITCH;
+#ifndef OUTPUT_LAYOUT_BFZYX_F16
                                 input_idx += INPUT0_FEATURE_PITCH;
+#endif
                             }
 #else
                             uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Z - k - 1)*FILTER_Z_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                             for (uint h = 0; h < FILTER_IFM_NUM; h++)
                             {
+#if OUTPUT_LAYOUT_BFZYX_F16
+                                input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, batch_offset, h, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
+#endif
                                 result = fma(input[input_idx], filter[filter_idx], result);
                                 filter_idx += FILTER_IFM_PITCH;
+#ifndef OUTPUT_LAYOUT_BFZYX_F16
                                 input_idx += INPUT0_FEATURE_PITCH;
+#endif
                             }
 #endif
                         }
@@ -133,9 +146,17 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
     result += bias[ofm_offset + bias_offset];
 #endif
     const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
+#if defined OUTPUT_LAYOUT_BFZYX_F16
+    const uint dst_index = OUTPUT_OFFSET + GET_DATA_BFZYX_F16_INDEX(OUTPUT, batch_offset, ofm_offset, out_z, out_y, out_x);
+#else
     const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + out_z*OUTPUT_Z_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH;
+#endif
 #if FUSED_ELTWISE
+#if defined OUTPUT_LAYOUT_BFZYX_F16
+    const uint fused_index = INPUT1_OFFSET + GET_DATA_BFZYX_F16_INDEX(INPUT1, batch_offset, ofm_offset, out_z, out_y, out_x);
+#else
     const uint fused_index = INPUT1_OFFSET + split_idx * INPUT1_FEATURE_PITCH * FILTER_OFM_NUM + batch_offset*INPUT1_BATCH_PITCH + ofm_offset*INPUT1_FEATURE_PITCH + out_z*INPUT1_Z_PITCH + out_y*INPUT1_Y_PITCH + out_x*INPUT1_X_PITCH;
+#endif
 #if !GRADIENT
        output[dst_index] = ACTIVATION(result + fuse_input[fused_index], ACTIVATION_PARAMS);
 #else
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_image_tutorial.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_image_tutorial.cl
deleted file mode 100644 (file)
index c0a1e10..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (c) 2016-2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "include/include_all.cl"
-
-KERNEL(fully_connected_gpu_image_tutorial)(
-    const __global INPUT0_TYPE* input,
-    __global OUTPUT_TYPE* output,
-    read_only image2d_t weights
-#if BIAS_TERM
-    , const __global BIAS_TYPE* biases
-#endif
-    )
-{
-    const uint ofm = get_global_id(0);
-    const uint b = get_global_id(1);
-    DECLARE_SAMPLER;
-    
-    ACCUMULATOR_TYPE dotProd = 0;
-
-    for (uint iyx = 0; iyx < (INPUT0_FEATURE_NUM * INPUT0_SIZE_Y * INPUT0_SIZE_X + 3) / 4; ++iyx)
-    {
-        MAKE_VECTOR_TYPE(UNIT_TYPE, 4) weights_val = IMAGE_READ(weights, (int2)(iyx, ofm));
-        const uint input0_idx = INPUT0_OFFSET + b * INPUT0_BATCH_PITCH + iyx * 4;
-        
-        dotProd += (ACCUMULATOR_TYPE)(input[input0_idx] * weights_val.x);
-        if(iyx*4 + 1 >= INPUT0_BATCH_PITCH) break;
-        dotProd += (ACCUMULATOR_TYPE)(input[input0_idx + 1] * weights_val.y);
-        if(iyx*4 + 2 >= INPUT0_BATCH_PITCH) break;
-        dotProd += (ACCUMULATOR_TYPE)(input[input0_idx + 2] * weights_val.z);
-        if(iyx*4 + 3 >= INPUT0_BATCH_PITCH) break;
-        dotProd += (ACCUMULATOR_TYPE)(input[input0_idx + 3] * weights_val.w);
-    }
-    
-    const uint output_idx = GET_DATA_INDEX(OUTPUT, b, ofm, 0, 0);
-
-#if BIAS_TERM
-    dotProd += (ACCUMULATOR_TYPE)biases[ofm];
-#endif
-
-    output[output_idx] = ACTIVATION((UNIT_TYPE)dotProd, ACTIVATION_PARAMS);
-    MAKE_VECTOR_TYPE(UNIT_TYPE, 4) weights_val = IMAGE_READ(weights, (int2)(1, 0));
-}
\ No newline at end of file
index e01db8b..bc24b50 100644 (file)
@@ -92,7 +92,13 @@ KERNEL(kernel_name)(
 {
     // Convolution part.
     const uint x = get_global_id(0);
+#if  OUTPUT_DIMS > 4
+    const uint y = get_global_id(1) % OUTPUT_SIZE_Y;
+    const uint z = get_global_id(1) / OUTPUT_SIZE_Y;
+#else
     const uint y = get_global_id(1);
+    const uint z = 0;
+#endif
 #if OUTPUT_BATCH_NUM == 1
     const uint f = get_global_id(2);
     const uint b = 0;
@@ -104,6 +110,11 @@ KERNEL(kernel_name)(
     ACCUMULATOR_TYPE dotProd = (ACCUMULATOR_TYPE)0;
     const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
     const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+#if  OUTPUT_DIMS > 4
+    const int input_z = z * STRIDE_SIZE_Z - PADDING_SIZE_Z;
+#else
+    const int input_z = 0;
+#endif
 
 #if DEPTHWISE_SEPARABLE_OPT
     const uint in_split_offset = (f / FILTER_OFM_NUM) * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
@@ -112,33 +123,42 @@ KERNEL(kernel_name)(
 #endif
     for (uint k = 0; k < FILTER_IFM_NUM; ++k)
     {
-        for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
+        for (uint l = 0; l < FILTER_SIZE_Z ; ++l)
         {
-            const int input_offset_y = input_y + j * DILATION_SIZE_Y;
-            const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
+            const int input_offset_z = input_z + l * DILATION_SIZE_Z;
+            const bool zero_z = input_offset_z >= INPUT0_SIZE_Z || input_offset_z < 0;
 
-            if(!zero_y)
+            if(!zero_z)
             {
-                for (uint i = 0; i < FILTER_SIZE_X ; ++i)
+                for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
                 {
-                    const int input_offset_x = input_x + i * DILATION_SIZE_X;
-                    const bool zero_x = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
+                    const int input_offset_y = input_y + j * DILATION_SIZE_Y;
+                    const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
 
-                    if(!zero_x)
+                    if(!zero_y)
                     {
-                        uint input_idx =
-                            GET_DATA_INDEX(
-                                INPUT0, b, k, input_offset_y, input_offset_x)
-                            + in_split_offset;
-                        uint filter_idx = GET_FILTER_INDEX(FILTER, f, k, j, i);
+                        for (uint i = 0; i < FILTER_SIZE_X ; ++i)
+                        {
+                            const int input_offset_x = input_x + i * DILATION_SIZE_X;
+                            const bool zero_x = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
+
+                            if(!zero_x)
+                            {
+                                uint input_idx =
+                                    GET_DATA_INDEX_5D(
+                                        INPUT0, b, k, input_offset_z, input_offset_y, input_offset_x)
+                                    + in_split_offset;
+                                uint filter_idx = GET_FILTER_INDEX_5D(FILTER, f, k, l, j, i);
 #if GROUPED && !DEPTHWISE_SEPARABLE_OPT
-                        filter_idx += split_idx * FILTER_LENGTH;
+                                filter_idx += split_idx * FILTER_LENGTH;
 #endif
 #ifdef LOCAL_CONVOLUTION
-                        filter_idx += FILTER_SIZE_X * FILTER_SIZE_Y
-                            * (x + OUTPUT_SIZE_X * y);
+                                filter_idx += FILTER_SIZE_X * FILTER_SIZE_Y * FILTER_SIZE_Z
+                                    * (x + OUTPUT_SIZE_X * y + OUTPUT_SIZE_X * OUTPUT_SIZE_Y * z);
 #endif
-                        dotProd += TO_ACCUMULATOR_TYPE(conv_input[input_idx]) * TO_ACCUMULATOR_TYPE(weights[filter_idx]);
+                                dotProd += TO_ACCUMULATOR_TYPE(conv_input[input_idx]) * TO_ACCUMULATOR_TYPE(weights[filter_idx]);
+                            }
+                        }
                     }
                 }
             }
@@ -152,7 +172,7 @@ KERNEL(kernel_name)(
         const uint bias_offset = 0;
     #endif
     #if   BIAS_PER_OUTPUT
-        const uint bias_index = bias_offset + GET_DATA_INDEX(BIAS, b, f, y, x);
+        const uint bias_index = bias_offset + GET_DATA_INDEX_5D(BIAS, b, f, z, y, x);
     #elif BIAS_PER_OFM
         const uint bias_index = bias_offset + f;
     #endif
@@ -185,7 +205,7 @@ KERNEL(kernel_name)(
 #endif
 
     ACTIVATION_TYPE after_activation =
-        ACTIVATION_FUNC_CONV_TYPED(ACTIVATION_TYPE_BASE, dequantized, NL_M_CONV_TYPED, NL_N_CONV_TYPED);
+        ACTIVATION_CONV_TYPED(ACTIVATION_TYPE_BASE, dequantized, ACTIVATION_PARAMS_CONV_TYPED);
 
 #if CALIBRATION_TERM
     #if GROUPED && !DEPTHWISE_SEPARABLE_OPT
@@ -204,7 +224,7 @@ KERNEL(kernel_name)(
     after_output_calibration = AFTER_CALIBRATION_ROUND(after_output_calibration);
 
     const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * OUTPUT_FEATURE_NUM;
-    const uint dst_index = GET_DATA_INDEX(OUTPUT, b, f, y, x) + out_split_offset;
+    const uint dst_index = GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x) + out_split_offset;
 
 #if !defined(ACTIVATION_ELTW_TYPED)
     output[dst_index] = TO_OUTPUT_TYPE_SAT(after_output_calibration);
@@ -213,7 +233,7 @@ KERNEL(kernel_name)(
 #    if IN_OUT_OPT == 1
     OUTPUT_TYPE eltw_elem = output[dst_index];
 #    else
-    INPUT1_TYPE eltw_elem = eltw_input[GET_DATA_INDEX(INPUT1, b, f, y * ELTW_STRIDE_Y, x * ELTW_STRIDE_X)];
+    INPUT1_TYPE eltw_elem = eltw_input[GET_DATA_INDEX_5D(INPUT1, b, f, z * ELTW_STRIDE_Z, y * ELTW_STRIDE_Y, x * ELTW_STRIDE_X)];
 #    endif
 
 #    if defined(NON_CONV_SCALE)
@@ -228,11 +248,10 @@ KERNEL(kernel_name)(
     // TODO: Support other eltwise operations.
     ACTIVATION_TYPE before_eltw_activation = after_output_calibration + eltw_elem_scaled;
     ACTIVATION_TYPE after_eltw_activation =
-        ACTIVATION_FUNC_ELTW_TYPED(
+        ACTIVATION_ELTW_TYPED(
             ACTIVATION_TYPE_BASE,
             before_eltw_activation,
-            NL_M_ELTW_TYPED,
-            NL_N_ELTW_TYPED);
+            ACTIVATION_PARAMS_ELTW_TYPED);
 
     after_eltw_activation =
         AFTER_ELTW_CALIBRATION_ROUND(after_eltw_activation
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_tree_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_tree_gpu_ref.cl
new file mode 100644 (file)
index 0000000..3f3bee3
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+KERNEL(gather_tree_gpu_ref.cl)(
+    const __global UNIT_TYPE* step_input,
+    const __global UNIT_TYPE* parent_input,
+    const __global UNIT_TYPE* max_seq_len_input,
+    const __global UNIT_TYPE* end_token,
+    __global UNIT_TYPE* output)
+{
+    const uint beam = get_global_id(0);
+    const uint batch = get_global_id(1);
+    /*
+         b -> time
+         f -> batch
+         y -> beam
+    */
+    uint parent = beam;
+    for(int time = INPUT0_BATCH_NUM - 1; time >= 0; time--) {
+
+        while (time >= (uint)max_seq_len_input[batch]) {
+            output[OUTPUT_GET_INDEX(time, batch, beam, 0)] = end_token[0];
+            time--;
+        }
+        output[OUTPUT_GET_INDEX(time, batch, beam, 0)] =
+            step_input[INPUT0_GET_INDEX(time, batch, parent, 0)];
+        parent = (uint)parent_input[INPUT0_GET_INDEX(time, batch, parent, 0)];
+    }
+
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl
new file mode 100644 (file)
index 0000000..ed3f3da
--- /dev/null
@@ -0,0 +1,312 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "ocl_types.h"
+
+__attribute__((reqd_work_group_size(16, 1, 1)))
+#    if VER_16MB16C == 1 || VER_8OW16C == 1
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+#    endif
+
+KERNEL(gen9_common_conv_bwd_data_kernel)(
+        const  __global DATA_T *diff_dst,
+        __global DATA_T *diff_src,
+        const __global DATA_T *wei,
+#if WITH_BIAS
+        const __global DATA_T *bias,
+#endif
+        uint split_idx)
+{
+
+#    if VER_16MB16C == 1 || VER_8OW16C == 1
+    const int mb_unroll = 16;
+
+    const int ic = get_group_id(0);
+    const int sp = get_group_id(1);
+    const int local_id = get_local_id(0);
+    int mb = get_group_id(2) * mb_unroll;
+
+#        if IS_DW
+    const int g = ic * IC_BLOCK;
+    const int gic = 0;
+#        else
+    const int g = split_idx;
+    const int gic = ic;
+#        endif
+
+#        if CASE_3D
+    const int id = sp / (IW * IH);
+    const int ihw = sp % (IW * IH);
+#        else
+    const int id = 0;
+    const int ihw = sp;
+#        endif
+    const int ih = ihw / IW;
+    const int iw = ihw % IW;
+
+    diff_dst += mb * OC * G * OD * OH * OW + g * OC * OD * OH * OW * MB_BLOCK;
+
+#if WITH_BIAS
+    DATA8_T blockC00 = bias[ic * IC_BLOCK + local_id];
+    DATA8_T blockC01 = bias[ic * IC_BLOCK + local_id];
+#else
+    DATA8_T blockC00 = 0.0f;
+    DATA8_T blockC01 = 0.0f;
+#endif
+
+
+    wei += gic * KD * KH * KW * OC_BLOCK * IC_BLOCK;
+
+    int ocb = 0;
+    do {
+#        if KH != 1 || KW != 1 || KD != 1
+        for (int kd = 0; kd < KD; ++kd)
+            for (int kh = 0; kh < KH; ++kh)
+                for (int kw = 0; kw < KW; ++kw) {
+
+                    if (iw + PW < kw * (1 + DW) || ih + PH < kh * (1 + DH))
+                        continue;
+#            if CASE_3D
+                    if (id + PD < kd * (1 + DD))
+                        continue;
+                    int od = id - kd * (1 + DD) + PD;
+                    if (od % SD != 0)
+                        continue;
+                    od /= SD;
+                    if (od >= OD)
+                        continue;
+#            endif
+
+                    int ow = iw - kw * (1 + DW) + PW;
+                    int oh = ih - kh * (1 + DH) + PH;
+                    if (ow % SW != 0 || oh % SH != 0)
+                        continue;
+                    ow /= SW;
+                    oh /= SH;
+                    if (oh >= OH || ow >= OW)
+                        continue;
+
+                    const __global DATA_T *diff_dst1 = diff_dst
+                            + ow * OC_BLOCK * MB_BLOCK
+                            + oh * OW * OC_BLOCK * MB_BLOCK;
+#            if CASE_3D
+                    diff_dst1 += od * OH * OW * OC_BLOCK * MB_BLOCK;
+#            endif
+#            if IS_DW
+                    const __global DATA_T *wei1 = wei
+#                if CASE_3D
+                            + kd * KH * KW * OC_BLOCK
+#                endif
+                            + kh * KW * OC_BLOCK + kw * OC_BLOCK;
+#            else
+                    const __global DATA_T *wei1 = wei
+#                if CASE_3D
+                            + kd * KH * KW * OC_BLOCK * IC_BLOCK
+#                endif
+                            + kh * KW * OC_BLOCK * IC_BLOCK
+                            + kw * OC_BLOCK * IC_BLOCK;
+#            endif
+#        else
+        int ow = (iw + PW);
+        int oh = (ih + PH);
+#            if CASE_3D
+        int od = (id + PD);
+#            endif
+        bool do_ker = true;
+#            if SW != 1 || SH != 1 || SD != 1
+        do_ker = ow % SW == 0 && oh % SH == 0;
+        ow /= SW;
+        oh /= SH;
+#                if CASE_3D
+        do_ker = do_ker && od % SD == 0;
+        od /= SD;
+#                endif
+#            endif
+#            if PH != 0 || PW != 0 || PD != 0
+        do_ker = do_ker && (oh < OH && ow < OW);
+#                if CASE_3D
+        do_ker = do_ker && (od < OD);
+#                endif
+#            endif
+#            if SW != 1 || SH != 1 || SD != 1 || PH != 0 || PW != 0 || PD != 0
+        if (do_ker) {
+#            endif
+            const __global DATA_T *diff_dst1 = diff_dst
+                    + ow * OC_BLOCK * MB_BLOCK + oh * OW * OC_BLOCK * MB_BLOCK;
+#            if CASE_3D
+            diff_dst1 += od * OH * OW * OC_BLOCK * MB_BLOCK;
+#            endif
+            const __global DATA_T *wei1 = wei;
+#        endif
+
+#        if MB == MB_LAST
+#            define LOAD_DIFF_DST(_block, _diff_dst, mb_chunk)        \
+                {                                                     \
+                    (_block) = AS_DATA8_T(BLOCK_READ8( \
+                            (const __global BLOCK_DATA_T *)((_diff_dst)       \
+                                    + (mb_chunk)*OC_BLOCK)));         \
+                }
+#        else
+#            define LOAD_DIFF_DST(_block, _diff_dst, mb_chunk)                 \
+                {                                                              \
+                    if (mb == MB_LAST) {                                       \
+                        for (int i = 0; i < min(8, MB - MB_LAST - (mb_chunk)); \
+                                i++)                                           \
+                            (_block)[i] = AS_DATA_T(BLOCK_READ( \
+                                    (const __global BLOCK_DATA_T *)(&(                 \
+                                            _diff_dst)[((mb_chunk) + i) * OC   \
+                                            * G * OD * OH * OW])));            \
+                    } else {                                                   \
+                        for (int i = 0; i < 8; i++)                            \
+                            (_block)[i] = AS_DATA_T(BLOCK_READ( \
+                                    (const __global BLOCK_DATA_T *)(&(                 \
+                                            _diff_dst)[((mb_chunk) + i) * OC   \
+                                            * G * OD * OH * OW])));            \
+                    }                                                          \
+                }
+#        endif
+
+#        if MB == MB_LAST
+#            define SAVE_SRC_DIFF(_block, _diff_src, mb_chunk)        \
+                {                                                     \
+                    BLOCK_WRITE8(                     \
+                            (__global unsigned int *)(&(              \
+                                    _diff_src)[(mb_chunk)*IC_BLOCK]), \
+                            AS_UINT8_T((_block)));                      \
+                }
+#        else
+#            define SAVE_SRC_DIFF(_block, _diff_src, mb_chunk)                 \
+                {                                                              \
+                    if (mb == MB_LAST) {                                       \
+                        for (int i = 0; i < min(8, MB - MB_LAST - (mb_chunk)); \
+                                i++) {                                         \
+                            BLOCK_WRITE(                       \
+                                    (__global unsigned int *)(&(               \
+                                            _diff_src)[((mb_chunk) + i) * IC   \
+                                            * G * ID * IH * IW]),              \
+                                    AS_UINT_T((_block)[i]));                     \
+                        }                                                      \
+                    } else {                                                   \
+                        for (int i = 0; i < 8; i++) {                          \
+                            BLOCK_WRITE(                       \
+                                    (__global unsigned int *)(&(               \
+                                            _diff_src)[((mb_chunk) + i) * IC   \
+                                            * G * ID * IH * IW]),              \
+                                    AS_UINT_T((_block)[i]));                     \
+                        }                                                      \
+                    }                                                          \
+                }
+#        endif
+
+#        if DT_F32
+#        define TRANSPOSE_8(_block, _col) \
+            (DATA8_T)(intel_sub_group_shuffle(_block, _col))
+#        else
+#        define TRANSPOSE_8(_block, _col)                     \
+            (DATA8_T)(intel_sub_group_shuffle(_block[0], _col), \
+                    intel_sub_group_shuffle(_block[1], _col), \
+                    intel_sub_group_shuffle(_block[2], _col), \
+                    intel_sub_group_shuffle(_block[3], _col), \
+                    intel_sub_group_shuffle(_block[4], _col), \
+                    intel_sub_group_shuffle(_block[5], _col), \
+                    intel_sub_group_shuffle(_block[6], _col), \
+                    intel_sub_group_shuffle(_block[7], _col))
+#        endif
+
+#        define FMA8(a, b, c) fma((DATA8_T)(a), (DATA8_T)b, (DATA8_T)c)
+
+#        define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB, _blockB1)       \
+            {                                                                  \
+                _result = FMA8(_blockB.s0, TRANSPOSE_8(_blockA, 0), _result);  \
+                _result = FMA8(_blockB.s1, TRANSPOSE_8(_blockA, 1), _result);  \
+                _result = FMA8(_blockB.s2, TRANSPOSE_8(_blockA, 2), _result);  \
+                _result = FMA8(_blockB.s3, TRANSPOSE_8(_blockA, 3), _result);  \
+                _result = FMA8(_blockB.s4, TRANSPOSE_8(_blockA, 4), _result);  \
+                _result = FMA8(_blockB.s5, TRANSPOSE_8(_blockA, 5), _result);  \
+                _result = FMA8(_blockB.s6, TRANSPOSE_8(_blockA, 6), _result);  \
+                _result = FMA8(_blockB.s7, TRANSPOSE_8(_blockA, 7), _result);  \
+                _result = FMA8(_blockB1.s0, TRANSPOSE_8(_blockA, 8), _result); \
+                _result = FMA8(_blockB1.s1, TRANSPOSE_8(_blockA, 9), _result); \
+                _result = FMA8(                                                \
+                        _blockB1.s2, TRANSPOSE_8(_blockA, 10), _result);       \
+                _result = FMA8(                                                \
+                        _blockB1.s3, TRANSPOSE_8(_blockA, 11), _result);       \
+                _result = FMA8(                                                \
+                        _blockB1.s4, TRANSPOSE_8(_blockA, 12), _result);       \
+                _result = FMA8(                                                \
+                        _blockB1.s5, TRANSPOSE_8(_blockA, 13), _result);       \
+                _result = FMA8(                                                \
+                        _blockB1.s6, TRANSPOSE_8(_blockA, 14), _result);       \
+                _result = FMA8(                                                \
+                        _blockB1.s7, TRANSPOSE_8(_blockA, 15), _result);       \
+            }
+
+#        if IS_DW
+                    DATA_T blockB00 = AS_DATA_T(BLOCK_READ(
+                            (const __global BLOCK_DATA_T *)wei1));
+#        else
+            DATA8_T blockB00 = AS_DATA8_T(
+                    BLOCK_READ8((const __global BLOCK_DATA_T *)wei1));
+            DATA8_T blockB01 = AS_DATA8_T(BLOCK_READ8(
+                    (const __global BLOCK_DATA_T *)(wei1 + 8 * IC_BLOCK)));
+#        endif
+                    DATA8_T blockA;
+
+                    LOAD_DIFF_DST(blockA, diff_dst1, 0);
+#        if IS_DW
+                    blockC00 = fma(blockA, (DATA8_T)blockB00, blockC00);
+#        else
+            MULTIPLY_BLOCKS_8x8(blockC00, blockA, blockB00, blockB01);
+#        endif
+
+                    LOAD_DIFF_DST(blockA, diff_dst1, 8);
+                    if ((mb != MB_LAST) || (MB % 16 > 8)) {
+#        if IS_DW
+                        blockC01 = fma(blockA, (DATA8_T)blockB00, blockC01);
+#        else
+                MULTIPLY_BLOCKS_8x8(blockC01, blockA, blockB00, blockB01);
+#        endif
+                    }
+
+#        undef TRANSPOSE_BLOCK_8
+#        undef MULTIPLY_BLOCKS_8x8
+#        if KH != 1 || KW != 1 || KD != 1
+                }
+#        else
+#            if SW != 1 || SH != 1 || SD != 1 || PH != 0 || PW != 0 || PD != 0
+        }
+#            endif
+#        endif
+        diff_dst += OC_BLOCK * OD * OH * OW * MB_BLOCK;
+        wei += IC * KD * KH * KW * OC_BLOCK;
+        ocb += OC_BLOCK;
+    } while (ocb < OC);
+
+    __global DATA_T *src_write0 = diff_src + mb * IC * G * ID * IH * IW
+            + gic * ID * IH * IW * IC_BLOCK * MB_BLOCK
+            + g * IC * ID * IH * IW * MB_BLOCK
+            + id * IH * IW * IC_BLOCK * MB_BLOCK + ih * IW * IC_BLOCK * MB_BLOCK
+            + iw * IC_BLOCK * MB_BLOCK;
+
+    blockC00 = ACTIVATION(blockC00, ACTIVATION_PARAMS);
+    blockC01 = ACTIVATION(blockC01, ACTIVATION_PARAMS);
+
+    SAVE_SRC_DIFF(blockC00, src_write0, 0);
+    SAVE_SRC_DIFF(blockC01, src_write0, 8);
+
+#    endif
+}
+
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_fwd_data_f16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_fwd_data_f16.cl
new file mode 100644 (file)
index 0000000..dceb239
--- /dev/null
@@ -0,0 +1,1155 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#define WITH_ELTWISE 1
+
+#if WITH_ELTWISE == 1
+//#include "ocl_post_ops.h"    // Use CLDNN activation
+#endif
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// Use CLDNN activation
+#define DO_ELTWISE(blockC, nelems, alpha, beta) \
+    do { \
+        for (uint i = 0; i < nelems; i++) \
+            blockC[i] = ACTIVATION(blockC[i], ACTIVATION_PARAMS); \
+    } while (0)
+
+#define ODHW_SIZE (OD * OH * OW)
+#define IDHW_SIZE (ID * IH * IW)
+#define KDHW_SIZE (KD * KH * KW)
+
+#define HAS_PAD_D (PD != 0 || PD_R != 0)
+#define HAS_PAD_H (PH != 0 || PH_R != 0)
+#define HAS_PAD_W (PW != 0 || PW_R != 0)
+
+__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2))) // attr:no-format
+#if SUB_GROUP_SIZE != 1
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) // attr:no-format
+#endif
+KERNEL(gen9_common_conv_fwd_f16_kernel)(
+        const __global half *src,
+        __global half *dst,
+#if USE_IMAGE == 1
+        __read_only image2d_t wei,
+#else
+        const __global half *wei,
+#endif
+#if WITH_BIAS
+        const __global half *bias,
+#endif
+#if QUANTIZATION_TERM
+    __global float* quantizations,
+#endif
+#if CALIBRATION_TERM
+    __global float* calibrations,
+#endif
+    uint split_idx) 
+{
+    const half eltwise_alpha = 0;
+    const half eltwise_beta = 0;
+    const half sum_scale_ = 1;
+
+    half relu_negative_slope = eltwise_alpha;
+    half sum_scale = sum_scale_;
+
+#if IC == 3 && OC % 32 == 0
+#if MB % 2 == 0
+    /* First convovution unrolled by MB2. */
+    const int oc = get_group_id(0) * 2;
+    const int sp = get_group_id(1);
+    const int local_id = get_local_id(0);
+    int mb = get_group_id(2) * 2;
+
+#if CASE_3D
+    const int od = sp / (OWB * OHB);
+    const int ohw = sp % (OWB * OHB);
+    const int id = od * SD - PD;
+#else
+    const int od = 0;
+    const int id = 0;
+    const int ohw = sp;
+#endif
+    const int oh = (ohw / OWB) * OH_BLOCK;
+    const int ow = (ohw % OWB) * OW_BLOCK;
+
+#if OW_BLOCK == 8
+#if WITH_BIAS
+    half8 C00 = bias[oc * OC_BLOCK + local_id];
+    half8 C01 = C00;
+    half8 C10 = bias[(oc + 1) * OC_BLOCK + local_id];
+    half8 C11 = C10;
+#else
+    half8 C00 = 0.0, C01 = 0.0;
+    half8 C10 = 0.0, C11 = 0.0;
+#endif
+#else
+#if WITH_BIAS
+    half C00[OW_BLOCK];
+    half C01[OW_BLOCK];
+    half C10[OW_BLOCK];
+    half C11[OW_BLOCK];
+    for (int i = 0; i < OW_BLOCK; i++) {
+        C00[i] = bias[oc * OC_BLOCK + local_id];
+        C01[i] = bias[oc * OC_BLOCK + local_id];
+        C10[i] = bias[(oc + 1) * OC_BLOCK + local_id];
+        C11[i] = bias[(oc + 1) * OC_BLOCK + local_id];
+    }
+#else
+    half C00[OW_BLOCK] = {0.0}, C01[OW_BLOCK] = {0.0};
+    half C10[OW_BLOCK] = {0.0}, C11[OW_BLOCK] = {0.0};
+#endif
+#endif
+
+    int ih = oh * SH - PH;
+    int iw = ow * SW - PW;
+#if NHWC == 1
+    src += mb * IC * IDHW_SIZE + iw * IC + ih * IW * IC + id * IH * IW * IC;
+#else
+    src += mb * IC * IDHW_SIZE + iw + ih * IW + id * IH * IW;
+#endif
+
+    wei += oc * OC_BLOCK * IC * KDHW_SIZE;
+
+    for (int kd = 0; kd < KD; ++kd)
+        for (int kh = 0; kh < KH; ++kh) {
+
+#if CASE_3D
+            if (id + kd * (1 + DD) < 0 || id + kd * (1 + DD) >= ID) {
+                continue;
+            }
+#endif
+            if (ih + kh * (1 + DH) < 0 || ih + kh * (1 + DH) >= IH) {
+                continue;
+            }
+#if NHWC == 1
+            const __global half *src1 = src + kd * (1 + DD) * IH * IW * IC
+                    + kh * (1 + DH) * IW * IC + local_id;
+#define SP_OFF IC
+#else
+            const __global half *src1 = src + kd * (1 + DD) * IH * IW
+                    + kh * (1 + DH) * IW + local_id * IDHW_SIZE;
+            const __global half *src2 = src + kd * (1 + DD) * IH * IW
+                    + kh * (1 + DH) * IW + local_id * IDHW_SIZE
+                    + IC * IDHW_SIZE;
+#define SP_OFF 1
+#endif
+
+            half tempA1[SW * OW_BLOCK + KW * (1 + DW)];
+            half tempA2[SW * OW_BLOCK + KW * (1 + DW)];
+            int k = iw;
+            if (local_id < 3) {
+                if (k < 0 || k + SW * OW_BLOCK + KW * (1 + DW) >= IW) {
+                    __attribute__((opencl_unroll_hint(
+                            SW * OW_BLOCK + KW * (1 + DW)))) // attr:no-format
+                    for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW); i++) {
+                        if (k >= 0 && k < IW) {
+                            tempA1[i] = src1[i * SP_OFF];
+                            tempA2[i] = src2[i * SP_OFF];
+                        } else {
+                            tempA1[i] = 0.0f;
+                            tempA2[i] = 0.0f;
+                        }
+                        k++;
+                    }
+                } else {
+                    __attribute__((opencl_unroll_hint(
+                            SW * OW_BLOCK + KW * (1 + DW)))) // attr:no-format
+                    for (int i = 0; i < SW * OW_BLOCK + KW; i++) {
+                        tempA1[i] = src1[i * SP_OFF];
+                        tempA2[i] = src2[i * SP_OFF];
+                    }
+                }
+            }
+            __attribute__((opencl_unroll_hint(KW))) // attr:no-format
+            for (int kw = 0; kw < KW; ++kw) {
+
+                const __global half *wei1 = wei + kd * KH * KW * OC_BLOCK * IC
+                        + kh * KW * OC_BLOCK * IC + kw * OC_BLOCK * IC;
+
+#define TRANSPOSE_1(_block, _col) (half)(intel_sub_group_shuffle(_block, _col))
+
+#define FMA8(a, b, c) fma((half)(a), (half)b, (half)c)
+
+#define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB0, _blockB1, _blockB2) \
+    { \
+        _result = FMA8(_blockB0, TRANSPOSE_1(_blockA, 0), _result); \
+        _result = FMA8(_blockB1, TRANSPOSE_1(_blockA, 1), _result); \
+        _result = FMA8(_blockB2, TRANSPOSE_1(_blockA, 2), _result); \
+    }
+
+                half blockB00 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)wei1));
+                half blockB01 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1 + OC_BLOCK)));
+                half blockB02 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1 + 2 * OC_BLOCK)));
+
+                half blockA1[OW_BLOCK] = {0.0f};
+                half blockA2[OW_BLOCK] = {0.0f};
+                if (local_id < 3)
+                    for (int i = 0; i < OW_BLOCK; i++) {
+                        blockA1[i] = tempA1[kw * (1 + DW) + i * SW];
+                        blockA2[i] = tempA2[kw * (1 + DW) + i * SW];
+                    }
+                __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                for (int i = 0; i < OW_BLOCK; i++) {
+                    MULTIPLY_BLOCKS_8x8(
+                            C00[i], blockA1[i], blockB00, blockB01, blockB02);
+                    MULTIPLY_BLOCKS_8x8(
+                            C01[i], blockA2[i], blockB00, blockB01, blockB02);
+                }
+
+                blockB00 = as_half(intel_sub_group_block_read_us((const __global
+                                ushort *)&wei1[IC * KDHW_SIZE * OC_BLOCK]));
+                blockB01 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1
+                                + IC * KDHW_SIZE * OC_BLOCK + OC_BLOCK)));
+                blockB02 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1
+                                + IC * KDHW_SIZE * OC_BLOCK + 2 * OC_BLOCK)));
+
+                __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                for (int i = 0; i < OW_BLOCK; i++) {
+                    MULTIPLY_BLOCKS_8x8(
+                            C10[i], blockA1[i], blockB00, blockB01, blockB02);
+                    MULTIPLY_BLOCKS_8x8(
+                            C11[i], blockA2[i], blockB00, blockB01, blockB02);
+                }
+
+#undef TRANSPOSE_BLOCK_1
+#undef MULTIPLY_BLOCKS_8x8
+            }
+        }
+    __global half *dst_write0 = dst
+            + (mb / MB_BLOCK) * OC * ODHW_SIZE * MB_BLOCK
+            + oc * OC_BLOCK * MB_BLOCK * ODHW_SIZE
+            + od * OH * OW * OC_BLOCK * MB_BLOCK + oh * OW * OC_BLOCK * MB_BLOCK
+            + ow * OC_BLOCK * MB_BLOCK + (mb % MB_BLOCK) * OC_BLOCK;
+    __global half *dst_write1 = dst
+            + ((mb + 1) / MB_BLOCK) * OC * ODHW_SIZE * MB_BLOCK
+            + oc * OC_BLOCK * MB_BLOCK * ODHW_SIZE
+            + od * OH * OW * OC_BLOCK * MB_BLOCK + oh * OW * OC_BLOCK * MB_BLOCK
+            + ow * OC_BLOCK * MB_BLOCK + ((mb + 1) % MB_BLOCK) * OC_BLOCK;
+
+#if WITH_SUM == 1
+    half8 blockS00, blockS01, blockS10, blockS11;
+    if (ow == OW_LAST) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            blockS00[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write0[i * OC_BLOCK * MB_BLOCK]));
+            blockS10[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write0[OC_BLOCK * MB_BLOCK * ODHW_SIZE
+                    + i * OC_BLOCK * MB_BLOCK]));
+            blockS01[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write1[i * OC_BLOCK * MB_BLOCK]));
+            blockS11[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write1[OC_BLOCK * MB_BLOCK * ODHW_SIZE
+                    + i * OC_BLOCK * MB_BLOCK]));
+        }
+    } else {
+        for (int i = 0; i < OW_BLOCK; i++) {
+            blockS00[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write0[i * OC_BLOCK * MB_BLOCK]));
+            blockS10[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write0[OC_BLOCK * MB_BLOCK * ODHW_SIZE
+                    + i * OC_BLOCK * MB_BLOCK]));
+            blockS01[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write1[i * OC_BLOCK * MB_BLOCK]));
+            blockS11[i] = as_half(intel_sub_group_block_read_us((const __global
+                            ushort *)&dst_write1[OC_BLOCK * MB_BLOCK * ODHW_SIZE
+                    + i * OC_BLOCK * MB_BLOCK]));
+        }
+    }
+    for (int i = 0; i < OW_BLOCK; i++) {
+#if SUM_SCALE == 1
+        C00[i] += blockS00[i];
+        C10[i] += blockS10[i];
+        C01[i] += blockS01[i];
+        C11[i] += blockS11[i];
+#else
+        C00[i] = fma(blockS00[i], (half)sum_scale, C00[i]);
+        C10[i] = fma(blockS10[i], (half)sum_scale, C10[i]);
+        C01[i] = fma(blockS01[i], (half)sum_scale, C01[i]);
+        C11[i] = fma(blockS11[i], (half)sum_scale, C11[i]);
+#endif
+    }
+#endif // with_sum
+
+#if WITH_ELTWISE == 1
+    DO_ELTWISE(C00, OW_BLOCK, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C10, OW_BLOCK, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C01, OW_BLOCK, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C11, OW_BLOCK, eltwise_alpha, eltwise_beta);
+#endif
+
+    if (ow == OW_LAST) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C00[i]));
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[OC_BLOCK * MB_BLOCK
+                                    * ODHW_SIZE
+                            + i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C10[i]));
+
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write1[i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C01[i]));
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write1[OC_BLOCK * MB_BLOCK
+                                    * ODHW_SIZE
+                            + i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C11[i]));
+        }
+    } else {
+        for (int i = 0; i < OW_BLOCK; i++) {
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C00[i]));
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[OC_BLOCK * MB_BLOCK
+                                    * ODHW_SIZE
+                            + i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C10[i]));
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write1[i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C01[i]));
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write1[OC_BLOCK * MB_BLOCK
+                                    * ODHW_SIZE
+                            + i * OC_BLOCK * MB_BLOCK]),
+                    as_ushort(C11[i]));
+        }
+    }
+
+#else
+    /* First convolution. */
+    const int oc = get_group_id(0) * 2;
+    const int sp = get_group_id(1);
+    const int local_id = get_local_id(0);
+    int mb = get_group_id(2);
+
+#if CASE_3D
+    const int od = sp / (OWB * OHB);
+    const int ohw = sp % (OWB * OHB);
+    const int id = od * SD - PD;
+#else
+    const int od = 0;
+    const int id = 0;
+    const int ohw = sp;
+#endif
+    const int oh = (ohw / OWB) * OH_BLOCK;
+    const int ow = (ohw % OWB) * OW_BLOCK;
+
+#if OW_BLOCK == 8
+#if WITH_BIAS
+    half8 C00 = bias[oc * OC_BLOCK + local_id];
+    half8 C10 = bias[(oc + 1) * OC_BLOCK + local_id];
+#else
+    half8 C00 = 0.0;
+    half8 C10 = 0.0;
+#endif
+#else
+#if WITH_BIAS
+    half C00[OW_BLOCK];
+    half C10[OW_BLOCK];
+    for (int i = 0; i < OW_BLOCK; i++) {
+        C00[i] = bias[oc * OC_BLOCK + local_id];
+        C10[i] = bias[(oc + 1) * OC_BLOCK + local_id];
+    }
+#else
+    half C00[OW_BLOCK] = {0.0};
+    half C10[OW_BLOCK] = {0.0};
+#endif
+#endif
+
+    int ih = oh * SH - PH;
+    int iw = ow * SW - PW;
+#if NHWC == 1
+    src += mb * IC * IDHW_SIZE + iw * IC + ih * IW * IC + id * IH * IW * IC;
+#else
+    src += mb * IC * IDHW_SIZE + iw + ih * IW + id * IH * IW;
+#endif
+
+    wei += oc * OC_BLOCK * IC * KDHW_SIZE;
+
+    for (int kd = 0; kd < KD; ++kd)
+        for (int kh = 0; kh < KH; ++kh) {
+
+#if CASE_3D
+            if (id + kd * (1 + DD) < 0 || id + kd * (1 + DD) >= ID) {
+                continue;
+            }
+#endif
+            if (ih + kh * (1 + DH) < 0 || ih + kh * (1 + DH) >= IH) {
+                continue;
+            }
+#if NHWC == 1
+            const __global half *src1 = src + kd * (1 + DD) * IH * IW * IC
+                    + kh * (1 + DH) * IW * IC + local_id;
+#define SP_OFF IC
+#else
+            const __global half *src1 = src + kd * (1 + DD) * IH * IW
+                    + kh * (1 + DH) * IW + local_id * IDHW_SIZE;
+#define SP_OFF 1
+#endif
+
+            half tempA1[SW * OW_BLOCK + KW * (1 + DW)];
+            int k = iw;
+            if (local_id < 3) {
+                if (k < 0 || k + SW * OW_BLOCK + KW * (1 + DW) >= IW) {
+                    for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW); i++) {
+                        if (k >= 0 && k < IW) {
+                            tempA1[i] = src1[i * SP_OFF];
+                        } else {
+                            tempA1[i] = 0.0f;
+                        }
+                        k++;
+                    }
+                } else {
+                    for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW); i++) {
+                        tempA1[i] = src1[i * SP_OFF];
+                    }
+                }
+            }
+
+            for (int kw = 0; kw < KW; ++kw) {
+
+                const __global half *wei1 = wei + kd * KH * KW * IC * OC_BLOCK
+                        + kh * KW * OC_BLOCK * IC + kw * OC_BLOCK * IC;
+
+#define TRANSPOSE_1(_block, _col) (half)(intel_sub_group_shuffle(_block, _col))
+
+#define FMA8(a, b, c) fma((half)(a), (half)b, (half)c)
+
+#define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB0, _blockB1, _blockB2) \
+    { \
+        _result = FMA8(_blockB0, TRANSPOSE_1(_blockA, 0), _result); \
+        _result = FMA8(_blockB1, TRANSPOSE_1(_blockA, 1), _result); \
+        _result = FMA8(_blockB2, TRANSPOSE_1(_blockA, 2), _result); \
+    }
+
+                half blockB00 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)wei1));
+                half blockB01 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1 + OC_BLOCK)));
+                half blockB02 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1 + 2 * OC_BLOCK)));
+
+                half8 blockA1 = 0.0f;
+                if (local_id < 3)
+                    for (int i = 0; i < OW_BLOCK; i++) {
+                        blockA1[i] = tempA1[kw * (1 + DW) + i * SW];
+                    }
+                __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                for (int i = 0; i < OW_BLOCK; i++) {
+                    MULTIPLY_BLOCKS_8x8(
+                            C00[i], blockA1[i], blockB00, blockB01, blockB02);
+                }
+
+                blockB00 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1
+                                + KDHW_SIZE * IC * OC_BLOCK)));
+                blockB01 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1
+                                + KDHW_SIZE * IC * OC_BLOCK + OC_BLOCK)));
+                blockB02 = as_half(intel_sub_group_block_read_us(
+                        (const __global ushort *)(wei1
+                                + KDHW_SIZE * IC * OC_BLOCK + 2 * OC_BLOCK)));
+                __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                for (int i = 0; i < OW_BLOCK; i++) {
+                    MULTIPLY_BLOCKS_8x8(
+                            C10[i], blockA1[i], blockB00, blockB01, blockB02);
+                }
+
+#undef TRANSPOSE_BLOCK_1
+#undef MULTIPLY_BLOCKS_8x8
+            }
+        }
+    __global half *dst_write0 = dst + mb * OC * ODHW_SIZE
+            + oc * OC_BLOCK * ODHW_SIZE + od * OH * OW * OC_BLOCK
+            + oh * OW * OC_BLOCK + ow * OC_BLOCK;
+
+#if WITH_SUM == 1
+    half8 blockS00, blockS10;
+    if (ow == OW_LAST) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            blockS00[i] = as_half(intel_sub_group_block_read_us(
+                    (const __global ushort *)&dst_write0[i * OC_BLOCK]));
+            blockS10[i] = as_half(intel_sub_group_block_read_us(
+                    (const __global ushort *)&dst_write0[ODHW_SIZE * OC_BLOCK
+                            + i * OC_BLOCK]));
+        }
+    } else {
+        for (int i = 0; i < OW_BLOCK; i++) {
+            blockS00[i] = as_half(intel_sub_group_block_read_us(
+                    (const __global ushort *)&dst_write0[i * OC_BLOCK]));
+            blockS10[i] = as_half(intel_sub_group_block_read_us(
+                    (const __global ushort *)&dst_write0[ODHW_SIZE * OC_BLOCK
+                            + i * OC_BLOCK]));
+        }
+    }
+    for (int i = 0; i < OW_BLOCK; i++) {
+#if SUM_SCALE == 1
+        C00[i] += blockS00[i];
+        C10[i] += blockS10[i];
+#else
+        C00[i] = fma(blockS00[i], (half)sum_scale, C00[i]);
+        C10[i] = fma(blockS10[i], (half)sum_scale, C10[i]);
+#endif
+    }
+#endif
+#if WITH_ELTWISE == 1
+    DO_ELTWISE(C00, OW_BLOCK, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C10, OW_BLOCK, eltwise_alpha, eltwise_beta);
+#endif
+
+    if (ow == OW_LAST) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[i * OC_BLOCK]),
+                    as_ushort(C00[i]));
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[ODHW_SIZE * OC_BLOCK
+                            + i * OC_BLOCK]),
+                    as_ushort(C10[i]));
+        }
+    } else {
+        for (int i = 0; i < OW_BLOCK; i++) {
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[i * OC_BLOCK]),
+                    as_ushort(C00[i]));
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[ODHW_SIZE * OC_BLOCK
+                            + i * OC_BLOCK]),
+                    as_ushort(C10[i]));
+        }
+    }
+
+#endif
+#endif
+#if VER_16MB16C == 1 && MB % 32 == 0
+
+    /*
+  For now USE_32OC_UNROLL is always 0.
+  TODO: Find a proper cross point for both cases
+*/
+#if OC % 32 == 0
+#define USE_32OC_UNROLL 0
+#else
+#define USE_32OC_UNROLL 0
+#endif
+
+    /* Regular convolution unrolled by MB32. */
+#if USE_32OC_UNROLL
+    const int oc = get_group_id(0) * 2;
+#else
+    const int oc = get_group_id(0);
+#endif
+    const int sp = get_group_id(1);
+    int mb = get_group_id(2) * MB_BLOCK * 2;
+
+    const int g = split_idx;
+    const int goc = oc;
+
+#if CASE_3D
+    const int od = sp / (OW * OH);
+    const int ohw = sp % (OW * OH);
+    const int id = od * SD - PD;
+#else
+    const int od = 0;
+    const int id = 0;
+    const int ohw = sp;
+#endif
+    const int oh = ohw / OW;
+    const int ow = ohw % OW;
+
+#if WITH_BIAS
+    const int local_id = get_local_id(0);
+    half8 C00 = bias[oc * OC_BLOCK + local_id];
+    half8 C01 = C00, C02 = C00, C03 = C00;
+#if USE_32OC_UNROLL
+    half8 C10 = bias[(oc + 1) * OC_BLOCK + local_id];
+    half8 C11 = C10, C12 = C10, C13 = C10;
+#endif
+#else
+    half8 C00 = 0.0f, C01 = 0.0f, C02 = 0.0f, C03 = 0.0f;
+#if USE_32OC_UNROLL
+    half8 C10 = 0.0f, C11 = 0.0f, C12 = 0.0f, C13 = 0.0f;
+#endif
+#endif
+
+    int ih = oh * SH - PH;
+    int iw = ow * SW - PW;
+    src += mb * IC * G * IDHW_SIZE + id * IH * IW * IC_BLOCK * MB_BLOCK
+            + ih * IW * IC_BLOCK * MB_BLOCK + iw * IC_BLOCK * MB_BLOCK
+            + g * IC * IDHW_SIZE * MB_BLOCK;
+
+    wei += goc * KDHW_SIZE * OC_BLOCK * IC;
+
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1) || (HAS_PAD_W && KW == 1))
+    if (!(id < 0 || id >= ID || ih < 0 || ih >= IH || iw < 0 || iw >= IW)) {
+#endif
+#if KH != 1 || KW != 1 || KD != 1
+        for (int kd = 0; kd < KD; ++kd)
+            for (int kh = 0; kh < KH; ++kh)
+                for (int kw = 0; kw < KW; ++kw) {
+
+                    if (ih + kh * (1 + DH) < 0 || ih + kh * (1 + DH) >= IH
+                            || iw + kw * (1 + DW) < 0
+                            || iw + kw * (1 + DW) >= IW
+#if CASE_3D
+                            || id + kd * (1 + DD) < 0
+                            || id + kd * (1 + DD) >= ID) {
+#else
+                    ) {
+#endif
+                        continue;
+                    }
+
+                    const __global half *src1 = src
+                            + kd * (1 + DD) * IH * IW * IC_BLOCK * MB_BLOCK
+                            + kh * (1 + DH) * IW * IC_BLOCK * MB_BLOCK
+                            + kw * (1 + DW) * IC_BLOCK * MB_BLOCK;
+                    const __global half *wei1 = wei
+                            + kd * KH * KW * OC_BLOCK * IC_BLOCK
+                            + kh * KW * OC_BLOCK * IC_BLOCK
+                            + kw * OC_BLOCK * IC_BLOCK;
+
+#else
+    const __global half *src1 = src;
+    const __global half *wei1 = wei;
+#endif
+                    for (int icb = 0; icb < IC / IC_BLOCK; icb++) {
+
+#define TRANSPOSE_8(_block, _col) \
+    as_half8(intel_sub_group_shuffle(as_ushort8(_block), _col))
+
+#define FMA8(a, b, c) fma((half8)(a), (half8)b, (half8)c)
+
+#define MULTIPLY_BLOCKS_8x16(_result, _blockA, _blockB) \
+    { \
+        _result = FMA8(_blockB[0], TRANSPOSE_8(_blockA, 0), _result); \
+        _result = FMA8(_blockB[1], TRANSPOSE_8(_blockA, 1), _result); \
+        _result = FMA8(_blockB[2], TRANSPOSE_8(_blockA, 2), _result); \
+        _result = FMA8(_blockB[3], TRANSPOSE_8(_blockA, 3), _result); \
+        _result = FMA8(_blockB[4], TRANSPOSE_8(_blockA, 4), _result); \
+        _result = FMA8(_blockB[5], TRANSPOSE_8(_blockA, 5), _result); \
+        _result = FMA8(_blockB[6], TRANSPOSE_8(_blockA, 6), _result); \
+        _result = FMA8(_blockB[7], TRANSPOSE_8(_blockA, 7), _result); \
+        _result = FMA8(_blockB[8], TRANSPOSE_8(_blockA, 8), _result); \
+        _result = FMA8(_blockB[9], TRANSPOSE_8(_blockA, 9), _result); \
+        _result = FMA8(_blockB[10], TRANSPOSE_8(_blockA, 10), _result); \
+        _result = FMA8(_blockB[11], TRANSPOSE_8(_blockA, 11), _result); \
+        _result = FMA8(_blockB[12], TRANSPOSE_8(_blockA, 12), _result); \
+        _result = FMA8(_blockB[13], TRANSPOSE_8(_blockA, 13), _result); \
+        _result = FMA8(_blockB[14], TRANSPOSE_8(_blockA, 14), _result); \
+        _result = FMA8(_blockB[15], TRANSPOSE_8(_blockA, 15), _result); \
+    }
+                        half16 W0 = as_half16(intel_sub_group_block_read8(
+                                (const __global uint *)wei1));
+#if USE_32OC_UNROLL
+                        half16 W1 = as_half16(intel_sub_group_block_read8(
+                                (const __global uint *)&wei1[IC * KDHW_SIZE
+                                        * OC_BLOCK]));
+#endif
+
+                        half8 A0 = as_half8(intel_sub_group_block_read_us8(
+                                (const __global ushort *)src1));
+                        MULTIPLY_BLOCKS_8x16(C00, A0, W0);
+#if USE_32OC_UNROLL
+                        MULTIPLY_BLOCKS_8x16(C10, A0, W1);
+#endif
+
+                        A0 = as_half8(intel_sub_group_block_read_us8(
+                                (const __global ushort *)&src1[8 * IC_BLOCK]));
+                        MULTIPLY_BLOCKS_8x16(C01, A0, W0);
+#if USE_32OC_UNROLL
+                        MULTIPLY_BLOCKS_8x16(C11, A0, W1);
+#endif
+
+                        A0 = as_half8(intel_sub_group_block_read_us8(
+                                (const __global ushort *)&src1[MB_BLOCK * IC * G
+                                        * IDHW_SIZE]));
+                        MULTIPLY_BLOCKS_8x16(C02, A0, W0);
+#if USE_32OC_UNROLL
+                        MULTIPLY_BLOCKS_8x16(C12, A0, W1);
+#endif
+
+                        A0 = as_half8(intel_sub_group_block_read_us8(
+                                (const __global ushort *)&src1[MB_BLOCK * IC * G
+                                                * IDHW_SIZE
+                                        + 8 * IC_BLOCK]));
+                        MULTIPLY_BLOCKS_8x16(C03, A0, W0);
+#if USE_32OC_UNROLL
+                        MULTIPLY_BLOCKS_8x16(C13, A0, W1);
+#endif
+                        src1 += IC_BLOCK * IDHW_SIZE * MB_BLOCK;
+                        wei1 += IC_BLOCK * KDHW_SIZE * OC_BLOCK;
+                    }
+#if KH != 1 || KW != 1 || KD != 1
+                }
+#endif
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1) || (HAS_PAD_W && KW == 1))
+    }
+#endif
+    __global half *dst_write0 = dst + mb * OC * G * ODHW_SIZE
+            + goc * ODHW_SIZE * OC_BLOCK * MB_BLOCK
+            + g * OC * ODHW_SIZE * MB_BLOCK + od * OH * OW * OC_BLOCK * MB_BLOCK
+            + oh * OW * OC_BLOCK * MB_BLOCK + ow * OC_BLOCK * MB_BLOCK;
+#if USE_32OC_UNROLL
+    __global half *dst_write1 = dst_write0 + OC_BLOCK * ODHW_SIZE * MB_BLOCK;
+#endif
+
+#if WITH_SUM == 1
+    half8 blockS00 = as_half8(intel_sub_group_block_read_us8(
+            (const __global ushort *)dst_write0));
+    half8 blockS01 = as_half8(intel_sub_group_block_read_us8(
+            (const __global ushort *)(dst_write0 + 8 * OC_BLOCK)));
+#if USE_32OC_UNROLL
+    half8 blockS10 = as_half8(intel_sub_group_block_read_us8(
+            (const __global ushort *)dst_write1));
+    half8 blockS11 = as_half8(intel_sub_group_block_read_us8(
+            (const __global ushort *)(dst_write1 + 8 * OC_BLOCK)));
+#endif
+#if SUM_SCALE == 1
+    C00 += blockS00;
+    C01 += blockS01;
+#if USE_32OC_UNROLL
+    C10 += blockS10;
+    C11 += blockS11;
+#endif
+#else
+    C00 = fma(blockS00, (half8)sum_scale, C00);
+    C01 = fma(blockS01, (half8)sum_scale, C01);
+#if USE_32OC_UNROLL
+    C10 = fma(blockS10, (half8)sum_scale, C10);
+    C11 = fma(blockS11, (half8)sum_scale, C11);
+#endif
+#endif
+#endif
+
+#if WITH_ELTWISE == 1
+    DO_ELTWISE(C00, 8, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C01, 8, eltwise_alpha, eltwise_beta);
+#if USE_32OC_UNROLL
+    DO_ELTWISE(C10, 8, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C11, 8, eltwise_alpha, eltwise_beta);
+#endif
+#endif
+
+    intel_sub_group_block_write_us8(
+            (__global ushort *)dst_write0, as_ushort8(C00));
+    intel_sub_group_block_write_us8(
+            (__global ushort *)&dst_write0[8 * OC_BLOCK], as_ushort8(C01));
+#if USE_32OC_UNROLL
+    intel_sub_group_block_write_us8(
+            (__global ushort *)&dst_write1[0], as_ushort8(C10));
+    intel_sub_group_block_write_us8(
+            (__global ushort *)&dst_write1[8 * OC_BLOCK], as_ushort8(C11));
+#endif
+
+#if WITH_SUM == 1
+    half8 blockS02 = as_half8(
+            intel_sub_group_block_read_us8((const __global ushort *)(dst_write0
+                    + MB_BLOCK * OC * G * ODHW_SIZE)));
+    half8 blockS03 = as_half8(
+            intel_sub_group_block_read_us8((const __global ushort *)(dst_write0
+                    + MB_BLOCK * OC * G * ODHW_SIZE + 8 * OC_BLOCK)));
+#if USE_32OC_UNROLL
+    half8 blockS12 = as_half8(
+            intel_sub_group_block_read_us8((const __global ushort *)(dst_write1
+                    + MB_BLOCK * OC * G * ODHW_SIZE)));
+    half8 blockS13 = as_half8(
+            intel_sub_group_block_read_us8((const __global ushort *)(dst_write1
+                    + MB_BLOCK * OC * G * ODHW_SIZE + 8 * OC_BLOCK)));
+#endif
+#if SUM_SCALE == 1
+    C02 += blockS02;
+    C03 += blockS03;
+#if USE_32OC_UNROLL
+    C12 += blockS12;
+    C13 += blockS13;
+#endif
+#else
+    C02 = fma(blockS02, (half8)sum_scale, C02);
+    C03 = fma(blockS03, (half8)sum_scale, C03);
+#if USE_32OC_UNROLL
+    C12 = fma(blockS12, (half8)sum_scale, C12);
+    C13 = fma(blockS13, (half8)sum_scale, C13);
+#endif
+#endif
+#endif
+#if WITH_ELTWISE == 1
+    DO_ELTWISE(C02, 8, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C03, 8, eltwise_alpha, eltwise_beta);
+#if USE_32OC_UNROLL
+    DO_ELTWISE(C12, 8, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(C13, 8, eltwise_alpha, eltwise_beta);
+#endif
+#endif
+
+    intel_sub_group_block_write_us8(
+            (__global ushort *)&dst_write0[MB_BLOCK * OC * G * ODHW_SIZE],
+            as_ushort8(C02));
+    intel_sub_group_block_write_us8(
+            (__global ushort *)&dst_write0[MB_BLOCK * OC * G * ODHW_SIZE
+                    + 8 * OC_BLOCK],
+            as_ushort8(C03));
+#if USE_32OC_UNROLL
+    intel_sub_group_block_write_us8(
+            (__global ushort *)&dst_write1[MB_BLOCK * OC * G * ODHW_SIZE],
+            as_ushort8(C12));
+    intel_sub_group_block_write_us8(
+            (__global ushort *)&dst_write1[MB_BLOCK * OC * G * ODHW_SIZE
+                    + 8 * OC_BLOCK],
+            as_ushort8(C13));
+#endif
+#endif
+
+#if VER_8OW16C == 1 && IC % 16 == 0
+    /* Regular convolution. */
+    const int sp = get_group_id(1);
+    const int local_id = get_local_id(0);
+    const int ocb_mb = get_group_id(2);
+    const int ocb = ocb_mb / (MB);
+    const int mb = ocb_mb % (MB);
+    const int oc = (ocb * OCB) / OC_BLOCK + get_group_id(0);
+
+    const int g = split_idx;
+    const int goc = oc;
+
+#if CASE_3D
+    const int od = sp / (OWB * OHB);
+    const int ohw = sp % (OWB * OHB);
+    const int id = od * SD - PD;
+#else
+    const int od = 0;
+    const int id = 0;
+    const int ohw = sp;
+#endif
+    const int oh = (ohw / OWB) * OH_BLOCK;
+    const int ow = (ohw % OWB) * OW_BLOCK;
+
+#if WITH_BIAS
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+    half blockC00[OW_BLOCK];
+    for (int i = 0; i < OW_BLOCK; i++)
+        blockC00[i] = bias[oc * OC_BLOCK + local_id];
+#else
+    half8 blockC00 = bias[oc * OC_BLOCK + local_id];
+#if OW_BLOCK == 16
+    half8 blockC01 = blockC00;
+#endif
+#endif
+#else
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+    half blockC00[OW_BLOCK] = {0.0f};
+#else
+    half8 blockC00 = 0.0f;
+#if OW_BLOCK == 16
+    half8 blockC01 = 0.0f;
+#endif
+#endif
+#endif
+
+    int ih = oh * SH - PH;
+    int iw = ow * SW - PW;
+
+    /* shift input pointers */
+    src += mb * IC * G * IDHW_SIZE + iw * IC_BLOCK + ih * IW * IC_BLOCK
+            + id * IH * IW * IC_BLOCK + g * IC * IDHW_SIZE;
+    wei += goc * KDHW_SIZE * IC * OC_BLOCK;
+
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1))
+    if (!(id < 0 || id >= ID || ih < 0 || ih >= IH)) {
+#endif
+        int icb = 0;
+        do {
+#if KH != 1 || KW != 1 || KD != 1
+            for (int kd = 0; kd < KD; ++kd)
+                for (int kh = 0; kh < KH; ++kh) {
+
+#if CASE_3D
+                    if (id + kd * (1 + DD) < 0 || id + kd * (1 + DD) >= ID)
+                        continue;
+#endif
+                    if (ih + kh * (1 + DH) < 0 || ih + kh * (1 + DH) >= IH)
+                        continue;
+
+                    const __global half *src1 = src
+                            + kd * (1 + DD) * IH * IW * IC_BLOCK
+                            + kh * (1 + DH) * IW * IC_BLOCK;
+
+                    half tempA[SW * OW_BLOCK + KW * (1 + DW)];
+                    int k = iw;
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+                    if (k < 0 || k + SW * OW_BLOCK + KW * (1 + DW) >= IW) {
+                        __attribute__((opencl_unroll_hint(SW * OW_BLOCK
+                                + KW * (1 + DW)))) // attr:no-format
+                        for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW);
+                                i++) {
+                            if (k >= 0 && k < IW)
+                                tempA[i] = as_half(
+                                        intel_sub_group_block_read_us((
+                                                const __global ushort *)(&src1[i
+                                                * IC_BLOCK])));
+                            else
+                                tempA[i] = 0.0h;
+                            k++;
+                        }
+                    } else {
+#endif
+                        __attribute__((opencl_unroll_hint(SW * OW_BLOCK
+                                + KW * (1 + DW)))) // attr:no-format
+                        for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW);
+                                i++) {
+                            tempA[i] = as_half(intel_sub_group_block_read_us(
+                                    (const __global ushort
+                                                    *)(&src1[i * IC_BLOCK])));
+                        }
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+                    }
+#endif
+
+                    __attribute__((opencl_unroll_hint(KW))) // attr:no-format
+                    for (int kw = 0; kw < KW; ++kw) {
+
+                        const __global half *wei1 = wei
+                                + kd * KH * KW * IC_BLOCK * OC_BLOCK
+                                + kh * KW * IC_BLOCK * OC_BLOCK
+                                + kw * IC_BLOCK * OC_BLOCK;
+
+#else
+        const __global half *src1 = src;
+        const __global half *wei1 = wei;
+#endif
+#define TRANSPOSE_1(_block, _col) (half)intel_sub_group_shuffle(_block, _col)
+
+#define FMA8(a, b, c) fma((half)(a), (half)b, (half)c)
+
+#define MULTIPLY_BLOCKS_8x16(_result, _blockA, _blockB, _blockB1) \
+    { \
+        _result = FMA8(_blockB.s0, TRANSPOSE_1(_blockA, 0), _result); \
+        _result = FMA8(_blockB.s1, TRANSPOSE_1(_blockA, 1), _result); \
+        _result = FMA8(_blockB.s2, TRANSPOSE_1(_blockA, 2), _result); \
+        _result = FMA8(_blockB.s3, TRANSPOSE_1(_blockA, 3), _result); \
+        _result = FMA8(_blockB.s4, TRANSPOSE_1(_blockA, 4), _result); \
+        _result = FMA8(_blockB.s5, TRANSPOSE_1(_blockA, 5), _result); \
+        _result = FMA8(_blockB.s6, TRANSPOSE_1(_blockA, 6), _result); \
+        _result = FMA8(_blockB.s7, TRANSPOSE_1(_blockA, 7), _result); \
+        _result = FMA8(_blockB1.s0, TRANSPOSE_1(_blockA, 8), _result); \
+        _result = FMA8(_blockB1.s1, TRANSPOSE_1(_blockA, 9), _result); \
+        _result = FMA8(_blockB1.s2, TRANSPOSE_1(_blockA, 10), _result); \
+        _result = FMA8(_blockB1.s3, TRANSPOSE_1(_blockA, 11), _result); \
+        _result = FMA8(_blockB1.s4, TRANSPOSE_1(_blockA, 12), _result); \
+        _result = FMA8(_blockB1.s5, TRANSPOSE_1(_blockA, 13), _result); \
+        _result = FMA8(_blockB1.s6, TRANSPOSE_1(_blockA, 14), _result); \
+        _result = FMA8(_blockB1.s7, TRANSPOSE_1(_blockA, 15), _result); \
+    }
+
+                        half8 blockB00
+                                = as_half8(intel_sub_group_block_read_us8(
+                                        (const __global ushort *)wei1));
+                        half8 blockB01
+                                = as_half8(intel_sub_group_block_read_us8(
+                                        (const __global ushort *)(wei1
+                                                + 8 * OC_BLOCK)));
+
+#if KH != 1 || KW != 1 || KD != 1
+                        half blockA[OW_BLOCK];
+                        __attribute__((
+                                opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                        for (int i = 0; i < OW_BLOCK; i++) {
+                            blockA[i] = tempA[kw * (1 + DW) + SW * i];
+                        }
+#else
+#if OW_BLOCK != 8 || HAS_PAD_W
+        half blockA[OW_BLOCK];
+#else
+        half8 blockA;
+#endif
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+        if (ow == OW_LAST) {
+            for (int i = 0; i < OW - OW_LAST; i++) {
+#if HAS_PAD_W
+                if (iw + i * SW < 0 || iw + i * SW >= IW) {
+                    blockA[i] = 0.0f;
+                } else {
+#endif
+                    blockA[i] = as_half(
+                            intel_sub_group_block_read_us((const __global ushort
+                                            *)(&src1[i * IC_BLOCK * SW])));
+#if HAS_PAD_W
+                }
+#endif
+            }
+            for (int i = OW - OW_LAST; i < OW_BLOCK; i++)
+                blockA[i] = 0.0f;
+        } else {
+#endif
+#if SW != 1 || OW_BLOCK != 8 || HAS_PAD_W
+            __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+            for (int i = 0; i < OW_BLOCK; i++) {
+#if HAS_PAD_W
+                if (iw + i * SW < 0 || iw + i * SW >= IW) {
+                    blockA[i] = 0.0f;
+                } else {
+#endif
+                    blockA[i] = as_half(
+                            intel_sub_group_block_read_us((const __global ushort
+                                            *)(&src1[i * IC_BLOCK * SW])));
+#if HAS_PAD_W
+                }
+#endif
+            }
+#else
+        blockA = as_half8(intel_sub_group_block_read_us8(
+                (const __global ushort *)(&src1[0])));
+#endif
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+        }
+#endif
+#endif
+#if OW_BLOCK != 16
+                        __attribute__((
+                                opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                        for (int i = 0; i < OW_BLOCK; i++) {
+                            MULTIPLY_BLOCKS_8x16(
+                                    blockC00[i], blockA[i], blockB00, blockB01);
+                        }
+#else
+        __attribute__((opencl_unroll_hint(8))) // attr:no-format
+        for (int i = 0; i < 8; i++) {
+            MULTIPLY_BLOCKS_8x16(blockC00[i], blockA[i], blockB00, blockB01);
+            MULTIPLY_BLOCKS_8x16(
+                    blockC01[i], blockA[i + 8], blockB00, blockB01);
+        }
+#endif
+
+#undef TRANSPOSE_BLOCK_1
+#undef MULTIPLY_BLOCKS_8x16
+#if KH != 1 || KW != 1 || KD != 1
+                    }
+                }
+#endif
+            src += IDHW_SIZE * IC_BLOCK;
+            wei += OC_BLOCK * KDHW_SIZE * IC_BLOCK;
+            icb += IC_BLOCK;
+        } while (icb < IC);
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1))
+    }
+#endif
+
+    __global half *dst_write0 = dst + mb * OC * G * ODHW_SIZE
+            + goc * ODHW_SIZE * OC_BLOCK + g * OC * ODHW_SIZE
+            + od * OH * OW * OC_BLOCK + oh * OW * OC_BLOCK + ow * OC_BLOCK;
+
+#if WITH_SUM == 1
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+    half blockS00[OW_BLOCK];
+#else
+    half8 blockS00;
+#if OW_BLOCK == 16
+    half8 blockS01;
+#endif
+#endif
+#if OW % OW_BLOCK != 0
+    if (ow == OW_LAST) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            blockS00[i] = as_half(intel_sub_group_block_read_us(
+                    (const __global ushort *)&dst_write0[i * OC_BLOCK]));
+        }
+    } else {
+#endif
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+        for (int i = 0; i < OW_BLOCK; i++) {
+            blockS00[i] = as_half(intel_sub_group_block_read_us(
+                    (const __global ushort *)&dst_write0[i * OC_BLOCK]));
+        }
+#else
+    blockS00 = as_half8(intel_sub_group_block_read_us8(
+            (const __global ushort *)dst_write0));
+#if OW_BLOCK == 16
+    blockS01 = as_half8(intel_sub_group_block_read_us8(
+            (const __global ushort *)&dst_write0[8 * OC_BLOCK]));
+#endif
+#endif
+#if OW % OW_BLOCK != 0
+    }
+#endif
+
+#if OW_BLOCK != 16
+    for (int i = 0; i < OW_BLOCK; i++) {
+#if SUM_SCALE == 1
+        blockC00[i] += blockS00[i];
+#else
+        blockC00[i] = fma(blockS00[i], (half)sum_scale, blockC00[i]);
+#endif
+    }
+#else
+#if SUM_SCALE == 1
+    blockC00 += blockS00;
+    blockC01 += blockS01;
+#else
+    blockC00 = fma(blockS00, (half8)sum_scale, blockC00);
+    blockC01 = fma(blockS01, (half8)sum_scale, blockC01);
+#endif
+#endif
+#endif
+
+#if WITH_ELTWISE == 1
+#if OW_BLOCK != 16
+    DO_ELTWISE(blockC00, OW_BLOCK, eltwise_alpha, eltwise_beta);
+#else
+    DO_ELTWISE(blockC00, 8, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(blockC01, 8, eltwise_alpha, eltwise_beta);
+#endif
+#endif
+
+#if OW % OW_BLOCK != 0
+    if (ow + OW_BLOCK > OW) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[i * OC_BLOCK]),
+                    as_ushort(blockC00[i]));
+        }
+    } else {
+#endif
+
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+        __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+        for (int i = 0; i < OW_BLOCK; i++) {
+            intel_sub_group_block_write_us(
+                    (__global ushort *)(&dst_write0[i * OC_BLOCK]),
+                    as_ushort(blockC00[i]));
+        }
+#else
+    intel_sub_group_block_write_us8(
+            (__global ushort *)(&dst_write0[0]), as_ushort8(blockC00));
+#if OW_BLOCK == 16
+    intel_sub_group_block_write_us8(
+            (__global ushort *)(&dst_write0[8 * OC_BLOCK]),
+            as_ushort8(blockC01));
+#endif
+#endif
+#if OW % OW_BLOCK != 0
+    }
+#endif
+#endif
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_fwd_data_f32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_fwd_data_f32.cl
new file mode 100644 (file)
index 0000000..cdfafed
--- /dev/null
@@ -0,0 +1,810 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#define WITH_ELTWISE 1
+
+#if WITH_ELTWISE == 1
+//#include "ocl_post_ops.h"    // Use CLDNN activation
+#endif
+
+#define ODHW_SIZE (OD * OH * OW)
+#define IDHW_SIZE (ID * IH * IW)
+#define KDHW_SIZE (KD * KH * KW)
+
+#define HAS_PAD_D (PD != 0 || PD_R != 0)
+#define HAS_PAD_H (PH != 0 || PH_R != 0)
+#define HAS_PAD_W (PW != 0 || PW_R != 0)
+
+#define SRC_OFF(n, ic, ih, iw) \
+    (((((n * G) * IC + (ic)) * IH + (ih)) * IW + (iw)))
+#define DST_OFF(n, oc, oh, ow) ((((n * G) * OC + (oc)) * OH + (oh)) * OW + (ow))
+
+// Use CLDNN activation
+#define DO_ELTWISE(blockC, nelems, alpha, beta) \
+    do { \
+        for (uint i = 0; i < nelems; i++) \
+            blockC[i] = ACTIVATION(blockC[i], ACTIVATION_PARAMS); \
+    } while (0)
+
+__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2))) // attr:no-format
+#if SUB_GROUP_SIZE != 1
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) // attr:no-format
+#endif
+KERNEL(gen9_common_conv_fwd_f32_kernel)(
+        const __global float *src,
+        __global float *dst,
+#if USE_IMAGE == 1
+        __read_only image2d_t wei,
+#else
+        const __global float *wei,
+#endif
+#if WITH_BIAS
+        const __global float *bias,
+#endif
+#if QUANTIZATION_TERM
+    __global float* quantizations,
+#endif
+#if CALIBRATION_TERM
+    __global float* calibrations,
+#endif
+    uint split_idx) 
+{
+
+const float eltwise_alpha = 0;
+const float eltwise_beta = 0;
+const float sum_scale = 1;
+
+#ifdef VER_16MB16C
+    const int oc = get_group_id(0);
+    const int sp = get_group_id(1);
+    const int local_id = get_local_id(0);
+    int mb = get_group_id(2) * MB_BLOCK;
+
+    const int g = split_idx;
+    const int goc = oc;
+
+#if CASE_3D
+    const int od = sp / (OW * OH);
+    const int ohw = sp % (OW * OH);
+    const int id = od * SD - PD;
+#else
+    const int od = 0;
+    const int id = 0;
+    const int ohw = sp;
+#endif
+    const int oh = ohw / OW;
+    const int ow = ohw % OW;
+
+    int ih = oh * SH - PH;
+    int iw = ow * SW - PW;
+
+    __global float *dst_write0 = dst + mb * OC * G * ODHW_SIZE
+            + goc * ODHW_SIZE * OC_BLOCK * MB_BLOCK
+            + g * OC * ODHW_SIZE * MB_BLOCK + oh * OW * OC_BLOCK * MB_BLOCK
+            + ow * OC_BLOCK * MB_BLOCK + od * OH * OW * OC_BLOCK * MB_BLOCK;
+
+    src += mb * IC * G * IDHW_SIZE + iw * IC_BLOCK * MB_BLOCK
+            + ih * IW * IC_BLOCK * MB_BLOCK + g * IDHW_SIZE * IC * MB_BLOCK
+            + id * IH * IW * IC_BLOCK * MB_BLOCK;
+
+#if USE_IMAGE == 1
+    int2 coordB0 = (int2)((oc * OC_BLOCK) * sizeof(uint), 0);
+    int2 coordB1 = (int2)((oc * OC_BLOCK) * sizeof(uint), 8);
+#else
+    wei += goc * KDHW_SIZE * OC_BLOCK * IC_BLOCK;
+#endif
+
+#if WITH_BIAS
+    float8 blockC00 = bias[oc * OC_BLOCK + local_id];
+    float8 blockC01 = bias[oc * OC_BLOCK + local_id];
+#else
+    float8 blockC00 = 0.0f;
+    float8 blockC01 = 0.0f;
+#endif
+
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1) || (HAS_PAD_W && KW == 1))
+    if (!(id < 0 || id >= ID || ih < 0 || ih >= IH || iw < 0 || iw >= IW)) {
+#endif
+#if KH != 1 || KW != 1 || KD != 1
+        for (int kd = 0; kd < KD; ++kd)
+            for (int kh = 0; kh < KH; ++kh)
+                for (int kw = 0; kw < KW; ++kw) {
+                    if (ih + kh * (1 + DH) < 0 || ih + kh * (1 + DH) >= IH
+                            || iw + kw * (1 + DW) < 0
+                            || iw + kw * (1 + DW) >= IW
+#if CASE_3D
+                            || id + kd * (1 + DD) < 0
+                            || id + kd * (1 + DD) >= ID) {
+#else
+                    ) {
+#endif
+#if USE_IMAGE == 1
+                        coordB0.y += IC;
+                        coordB1.y += IC;
+#endif
+                        continue;
+                    }
+
+                    const __global float *src1 = src
+                            + kd * (1 + DD) * IH * IW * IC_BLOCK * MB_BLOCK
+                            + kh * (1 + DH) * IW * IC_BLOCK * MB_BLOCK
+                            + kw * (1 + DW) * IC_BLOCK * MB_BLOCK;
+                    const __global float *wei1 = wei
+                            + kd * KH * KW * OC_BLOCK * IC_BLOCK
+                            + kh * KW * OC_BLOCK * IC_BLOCK
+                            + kw * OC_BLOCK * IC_BLOCK;
+#else
+    const __global float *src1 = src;
+    const __global float *wei1 = wei;
+#endif
+                    int icb = 0;
+                    do {
+#define TRANSPOSE_8(_block, _col) \
+    (float8)(intel_sub_group_shuffle(_block, _col))
+
+#define FMA8(a, b, c) fma((float8)(a), (float8)b, (float8)c)
+
+#define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB, _blockB1) \
+    { \
+        _result = FMA8(_blockB.s0, TRANSPOSE_8(_blockA, 0), _result); \
+        _result = FMA8(_blockB.s1, TRANSPOSE_8(_blockA, 1), _result); \
+        _result = FMA8(_blockB.s2, TRANSPOSE_8(_blockA, 2), _result); \
+        _result = FMA8(_blockB.s3, TRANSPOSE_8(_blockA, 3), _result); \
+        _result = FMA8(_blockB.s4, TRANSPOSE_8(_blockA, 4), _result); \
+        _result = FMA8(_blockB.s5, TRANSPOSE_8(_blockA, 5), _result); \
+        _result = FMA8(_blockB.s6, TRANSPOSE_8(_blockA, 6), _result); \
+        _result = FMA8(_blockB.s7, TRANSPOSE_8(_blockA, 7), _result); \
+        _result = FMA8(_blockB1.s0, TRANSPOSE_8(_blockA, 8), _result); \
+        _result = FMA8(_blockB1.s1, TRANSPOSE_8(_blockA, 9), _result); \
+        _result = FMA8(_blockB1.s2, TRANSPOSE_8(_blockA, 10), _result); \
+        _result = FMA8(_blockB1.s3, TRANSPOSE_8(_blockA, 11), _result); \
+        _result = FMA8(_blockB1.s4, TRANSPOSE_8(_blockA, 12), _result); \
+        _result = FMA8(_blockB1.s5, TRANSPOSE_8(_blockA, 13), _result); \
+        _result = FMA8(_blockB1.s6, TRANSPOSE_8(_blockA, 14), _result); \
+        _result = FMA8(_blockB1.s7, TRANSPOSE_8(_blockA, 15), _result); \
+    }
+
+#if USE_IMAGE == 1
+                        float8 blockB00 = as_float8(
+                                intel_sub_group_block_read8(wei, coordB0));
+                        float8 blockB01 = as_float8(
+                                intel_sub_group_block_read8(wei, coordB1));
+#else
+        float8 blockB00 = as_float8(
+                intel_sub_group_block_read8((const __global uint *)wei1));
+        float8 blockB01 = as_float8(intel_sub_group_block_read8(
+                (const __global uint *)(wei1 + 8 * IC_BLOCK)));
+#endif
+                        float8 blockA;
+
+                        blockA = as_float8(intel_sub_group_block_read8(
+                                (const __global uint *)(src1)));
+
+                        MULTIPLY_BLOCKS_8x8(
+                                blockC00, blockA, blockB00, blockB01);
+
+                        blockA = as_float8(intel_sub_group_block_read8(
+                                (const __global uint *)(src1 + 8 * IC_BLOCK)));
+
+                        MULTIPLY_BLOCKS_8x8(
+                                blockC01, blockA, blockB00, blockB01);
+
+#undef TRANSPOSE_BLOCK_8
+#undef MULTIPLY_BLOCKS_8x8
+                        src1 += IC_BLOCK * IDHW_SIZE * MB_BLOCK;
+#if USE_IMAGE == 1
+                        coordB0.y += IC_BLOCK;
+                        coordB1.y += IC_BLOCK;
+#else
+        wei1 += OC * KDHW_SIZE * IC_BLOCK;
+#endif
+                        icb += IC_BLOCK;
+                    } while (icb < IC);
+#if KH != 1 || KW != 1 || KD != 1
+                }
+#endif
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1) || (HAS_PAD_W && KW == 1))
+    }
+#endif
+
+#if WITH_SUM == 1
+    float8 blockS00 = as_float8(
+            intel_sub_group_block_read8((const __global uint *)dst_write0));
+    float8 blockS01 = as_float8(intel_sub_group_block_read8(
+            (const __global uint *)(dst_write0 + 8 * OC_BLOCK)));
+
+#if SUM_SCALE == 1
+    blockC00 += blockS00;
+    blockC01 += blockS01;
+#else
+    blockC00 = fma(blockS00, (float8)sum_scale, blockC00);
+    blockC01 = fma(blockS01, (float8)sum_scale, blockC01);
+#endif
+#endif // with_sum
+#if WITH_ELTWISE == 1
+    DO_ELTWISE(blockC00, 8, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(blockC01, 8, eltwise_alpha, eltwise_beta);
+#endif
+
+    intel_sub_group_block_write8(
+            (__global unsigned int *)(&dst_write0[0]), as_uint8(blockC00));
+    intel_sub_group_block_write8(
+            (__global unsigned int *)(&dst_write0[8 * OC_BLOCK]),
+            as_uint8(blockC01));
+#endif
+
+#ifdef VER_8OW16C
+#if IC == 3
+    const int sp = get_group_id(1);
+    const int local_id = get_local_id(0);
+    const int ocb_mb = get_group_id(2);
+    const int ocb = ocb_mb / (MB);
+    const int mb = ocb_mb % (MB);
+    const int oc = (ocb * OCB) / OC_BLOCK + get_group_id(0);
+
+#if CASE_3D
+    const int od = sp / (OWB * OHB);
+    const int ohw = sp % (OWB * OHB);
+    const int id = od * SD - PD;
+#else
+    const int od = 0;
+    const int id = 0;
+    const int ohw = sp;
+#endif
+    const int oh = (ohw / OWB) * OH_BLOCK;
+    const int ow = (ohw % OWB) * OW_BLOCK;
+
+#if WITH_BIAS
+    float8 blockC00 = bias[oc * OC_BLOCK + local_id];
+#if OCB == 32
+    float8 blockC01 = bias[oc * OC_BLOCK + local_id + 16];
+#endif
+#else
+#if OW_BLOCK != 8
+    float blockC00[OW_BLOCK] = {0.0f};
+#if OCB == 32
+    float blockC01[OW_BLOCK] = {0.0f};
+#endif
+#else
+    float8 blockC00 = 0.0f;
+#if OCB == 32
+    float8 blockC01 = 0.0f;
+#endif
+#endif
+#endif
+
+    int ih = oh * SH - PH;
+    int iw = ow * SW - PW;
+#if NHWC == 1
+    src += mb * IC * IDHW_SIZE + iw * IC + ih * IW * IC + id * IH * IW * IC;
+#else
+    src += mb * IC * IDHW_SIZE + iw + ih * IW + id * IH * IW;
+#endif
+
+    wei += oc * OC_BLOCK * IC * KDHW_SIZE;
+
+    for (int kd = 0; kd < KD; ++kd)
+        for (int kh = 0; kh < KH; ++kh) {
+
+            if (ih + kh * (1 + DH) < 0 || ih + kh * (1 + DH) >= IH
+#if CASE_3D
+                    || id + kd * (1 + DD) < 0 || id + kd * (1 + DD) >= ID) {
+#else
+            ) {
+#endif
+                continue;
+            }
+#if NHWC == 1
+            const __global float *src1 = src + kd * (1 + DD) * IH * IW * IC
+                    + kh * (1 + DH) * IW * IC + local_id;
+#define SP_OFF IC
+#else
+            const __global float *src1 = src + kd * (1 + DD) * IH * IW
+                    + kh * (1 + DH) * IW + local_id * IDHW_SIZE;
+#define SP_OFF 1
+#endif
+
+            float tempA[SW * OW_BLOCK + KW * (1 + DW)];
+            int k = iw;
+            if (local_id < 3) {
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+                if (k < 0 || k + SW * OW_BLOCK + KW * (1 + DW) >= IW) {
+                    __attribute__((opencl_unroll_hint(
+                            SW * OW_BLOCK + KW * (1 + DW)))) // attr:no-format
+                    for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW); i++) {
+                        if (k >= 0 && k < IW)
+                            tempA[i] = src1[i * SP_OFF];
+                        else
+                            tempA[i] = 0.0f;
+                        k++;
+                    }
+                } else {
+#endif
+                    __attribute__((opencl_unroll_hint(
+                            SW * OW_BLOCK + KW * (1 + DW)))) // attr:no-format
+                    for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW); i++) {
+                        tempA[i] = src1[i * SP_OFF];
+                    }
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+                }
+#endif
+            }
+            __attribute__((opencl_unroll_hint(KW))) // attr:no-format
+            for (int kw = 0; kw < KW; ++kw) {
+
+                const __global float *wei1 = wei + kd * KH * KW * OC_BLOCK * IC
+                        + kh * KW * OC_BLOCK * IC + kw * OC_BLOCK * IC;
+
+#define TRANSPOSE_1(_block, _col) (float)(intel_sub_group_shuffle(_block, _col))
+
+#define FMA8(a, b, c) fma((float)(a), (float)b, (float)c)
+
+#define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB0, _blockB1, _blockB2) \
+    { \
+        _result = FMA8(_blockB0, TRANSPOSE_1(_blockA, 0), _result); \
+        _result = FMA8(_blockB1, TRANSPOSE_1(_blockA, 1), _result); \
+        _result = FMA8(_blockB2, TRANSPOSE_1(_blockA, 2), _result); \
+    }
+
+                float blockB00 = as_float(intel_sub_group_block_read(
+                        (const __global uint *)wei1));
+                float blockB01 = as_float(intel_sub_group_block_read(
+                        (const __global uint *)(wei1 + OC_BLOCK)));
+                float blockB02 = as_float(intel_sub_group_block_read(
+                        (const __global uint *)(wei1 + 2 * OC_BLOCK)));
+
+                float blockA[OW_BLOCK] = {0.0f};
+                if (local_id < 3) {
+                    __attribute__((
+                            opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                    for (int i = 0; i < OW_BLOCK; i++) {
+                        blockA[i] = tempA[kw * (1 + DW) + i * SW];
+                    }
+                }
+                __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                for (int i = 0; i < OW_BLOCK; i++) {
+                    MULTIPLY_BLOCKS_8x8(blockC00[i], blockA[i], blockB00,
+                            blockB01, blockB02);
+                }
+#if OCB == 32
+                wei1 += KD * KH * KW * IC * OC_BLOCK;
+                blockB00 = as_float(intel_sub_group_block_read(
+                        (const __global uint *)wei1));
+                blockB01 = as_float(intel_sub_group_block_read(
+                        (const __global uint *)(wei1 + OC_BLOCK)));
+                blockB02 = as_float(intel_sub_group_block_read(
+                        (const __global uint *)(wei1 + 2 * OC_BLOCK)));
+
+                __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                for (int i = 0; i < OW_BLOCK; i++) {
+                    MULTIPLY_BLOCKS_8x8(blockC01[i], blockA[i], blockB00,
+                            blockB01, blockB02);
+                }
+#endif
+
+#undef TRANSPOSE_BLOCK_1
+#undef MULTIPLY_BLOCKS_8x8
+            }
+        }
+    __global float *dst_write0 = dst
+            + (mb / MB_BLOCK) * OC * ODHW_SIZE * MB_BLOCK
+            + oc * OC_BLOCK * MB_BLOCK * ODHW_SIZE
+            + od * OH * OW * OC_BLOCK * MB_BLOCK + oh * OW * OC_BLOCK * MB_BLOCK
+            + ow * OC_BLOCK * MB_BLOCK + (mb % MB_BLOCK) * OC_BLOCK;
+#if OCB == 32
+    __global float *dst_write1 = dst_write0 + OC_BLOCK * MB_BLOCK * ODHW_SIZE;
+#endif
+#if WITH_SUM == 1
+    float8 blockS00, blockS01;
+    if (ow == OW_LAST) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            blockS00[i] = as_float(intel_sub_group_block_read((const __global
+                            uint *)&dst_write0[i * OC_BLOCK * MB_BLOCK]));
+#if OCB == 32
+            blockS01[i] = as_float(intel_sub_group_block_read((const __global
+                            uint *)&dst_write1[i * OC_BLOCK * MB_BLOCK]));
+#endif
+        }
+    } else {
+        for (int i = 0; i < OW_BLOCK; i++) {
+            blockS00[i] = as_float(intel_sub_group_block_read((const __global
+                            uint *)&dst_write0[i * OC_BLOCK * MB_BLOCK]));
+#if OCB == 32
+            blockS01[i] = as_float(intel_sub_group_block_read((const __global
+                            uint *)&dst_write1[i * OC_BLOCK * MB_BLOCK]));
+#endif
+        }
+    }
+    for (int i = 0; i < OW_BLOCK; i++) {
+#if SUM_SCALE == 1
+        blockC00[i] += blockS00[i];
+#if OCB == 32
+        blockC01[i] += blockS01[i];
+#endif
+#else
+        blockC00[i] = fma(blockS00[i], (float)sum_scale, blockC00[i]);
+#if OCB == 32
+        blockC01[i] = fma(blockS01[i], (float)sum_scale, blockC01[i]);
+#endif
+#endif
+    }
+#endif
+#if WITH_ELTWISE == 1
+    DO_ELTWISE(blockC00, OW_BLOCK, eltwise_alpha, eltwise_beta);
+#if OCB == 32
+    DO_ELTWISE(blockC01, OW_BLOCK, eltwise_alpha, eltwise_beta);
+#endif
+#endif
+
+#if OW % OW_BLOCK != 0
+    if (ow + OW_BLOCK > OW) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            intel_sub_group_block_write((__global unsigned int *)(&dst_write0[i
+                                                * OC_BLOCK * MB_BLOCK]),
+                    as_uint(blockC00[i]));
+#if OCB == 32
+            intel_sub_group_block_write(
+                    (__global unsigned int
+                                    *)(&dst_write0[i * OC_BLOCK * MB_BLOCK
+                            + OC_BLOCK * MB_BLOCK * ODHW_SIZE]),
+                    as_uint(blockC01[i]));
+#endif
+        }
+    } else {
+#endif
+#if OW_BLOCK != 8 || MB_BLOCK != 1
+        __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+        for (int i = 0; i < OW_BLOCK; i++) {
+            intel_sub_group_block_write((__global unsigned int *)(&dst_write0[i
+                                                * OC_BLOCK * MB_BLOCK]),
+                    as_uint(blockC00[i]));
+#if OCB == 32
+            intel_sub_group_block_write(
+                    (__global unsigned int
+                                    *)(&dst_write0[i * OC_BLOCK * MB_BLOCK
+                            + OC_BLOCK * MB_BLOCK * ODHW_SIZE]),
+                    as_uint(blockC01[i]));
+#endif
+        }
+#else
+    intel_sub_group_block_write8(
+            (__global unsigned int *)(&dst_write0[0]), as_uint8(blockC00));
+#if OCB == 32
+    intel_sub_group_block_write8((__global unsigned int *)(&dst_write0[OC_BLOCK
+                                         * MB_BLOCK * ODHW_SIZE]),
+            as_uint8(blockC01));
+#endif
+#endif
+#if OW % OW_BLOCK != 0
+    }
+#endif
+
+#else
+    const int sp = get_group_id(1);
+    const int local_id = get_local_id(0);
+    const int ocb_mb = get_group_id(2);
+    const int ocb = ocb_mb / (MB);
+    const int mb = ocb_mb % (MB);
+    const int oc = (ocb * OCB) / OC_BLOCK + get_group_id(0);
+    const int g = split_idx;
+    const int goc = oc;
+
+#if CASE_3D
+    const int od = sp / (OWB * OHB);
+    const int ohw = sp % (OWB * OHB);
+    const int id = od * SD - PD;
+#else
+    const int od = 0;
+    const int id = 0;
+    const int ohw = sp;
+#endif
+    const int oh = (ohw / OWB) * OH_BLOCK;
+    const int ow = (ohw % OWB) * OW_BLOCK;
+
+#if WITH_BIAS
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+    float blockC00[OW_BLOCK];
+    for (int i = 0; i < OW_BLOCK; i++)
+        blockC00[i] = bias[oc * OC_BLOCK + local_id];
+#else
+    float8 blockC00 = bias[oc * OC_BLOCK + local_id];
+#if OW_BLOCK == 16
+    float8 blockC01 = blockC00;
+#endif
+#endif
+#else
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+    float blockC00[OW_BLOCK] = {0.0f};
+#else
+    float8 blockC00 = 0.0f;
+#if OW_BLOCK == 16
+    float8 blockC01 = 0.0f;
+#endif
+#endif
+#endif
+
+    int ih = oh * SH - PH;
+    int iw = ow * SW - PW;
+    src += mb * IC * G * IDHW_SIZE + iw * IC_BLOCK + ih * IW * IC_BLOCK
+            + id * IH * IW * IC_BLOCK + g * IDHW_SIZE * IC;
+    wei += goc * KDHW_SIZE * OC_BLOCK * IC;
+
+    const bool do_if = iw < 0 || iw + SW * OW_BLOCK + KW * (1 + DW) >= IW;
+
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1))
+    if (!(id < 0 || id >= ID || ih < 0 || ih >= IH)) {
+#endif
+        int icb = 0;
+        do {
+#if KH != 1 || KW != 1 || KD != 1
+            __attribute__((opencl_unroll_hint(1))) // attr:no-format
+            for (int kd = 0; kd < KD; ++kd)
+                    __attribute__((opencl_unroll_hint(1))) // attr:no-format
+                    for (int kh = 0; kh < KH; ++kh) {
+
+                if (ih + kh * (1 + DH) < 0 || ih + kh * (1 + DH) >= IH
+#if CASE_3D
+                        || id + kd * (1 + DD) < 0 || id + kd * (1 + DD) >= ID) {
+#else
+                ) {
+#endif
+                    continue;
+                }
+                const __global float *src1 = src
+                        + kd * (1 + DD) * IH * IW * IC_BLOCK
+                        + kh * (1 + DH) * IW * IC_BLOCK;
+
+                float tempA[SW * OW_BLOCK + KW * (1 + DW)];
+                int k = iw;
+                if (do_if) {
+                    __attribute__((opencl_unroll_hint(
+                            SW * OW_BLOCK + KW * (1 + DW)))) // attr:no-format
+                    for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW); i++) {
+                        if (k >= 0 && k < IW)
+                            tempA[i] = as_float(intel_sub_group_block_read(
+                                    (const __global uint
+                                                    *)(&src1[i * IC_BLOCK])));
+                        else
+                            tempA[i] = 0.0f;
+                        k++;
+                    }
+                } else {
+                    __attribute__((opencl_unroll_hint(
+                            SW * OW_BLOCK + KW * (1 + DW)))) // attr:no-format
+                    for (int i = 0; i < SW * OW_BLOCK + KW * (1 + DW); i++) {
+                        tempA[i] = as_float(intel_sub_group_block_read(
+                                (const __global uint *)(&src1[i * IC_BLOCK])));
+                    }
+                }
+                __attribute__((opencl_unroll_hint(KW))) // attr:no-format
+                for (int kw = 0; kw < KW; ++kw) {
+
+                    const __global float *wei1 = wei
+                            + kd * KH * KW * OC_BLOCK * IC_BLOCK
+                            + kh * KW * OC_BLOCK * IC_BLOCK
+                            + kw * OC_BLOCK * IC_BLOCK;
+
+#else
+        const __global float *src1 = src;
+        const __global float *wei1 = wei;
+#endif
+#define TRANSPOSE_1(_block, _col) (float)(intel_sub_group_shuffle(_block, _col))
+
+#define FMA8(a, b, c) fma((float)(a), (float)b, (float)c)
+
+#define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB, _blockB1) \
+    { \
+        _result = FMA8(_blockB.s0, TRANSPOSE_1(_blockA, 0), _result); \
+        _result = FMA8(_blockB.s1, TRANSPOSE_1(_blockA, 1), _result); \
+        _result = FMA8(_blockB.s2, TRANSPOSE_1(_blockA, 2), _result); \
+        _result = FMA8(_blockB.s3, TRANSPOSE_1(_blockA, 3), _result); \
+        _result = FMA8(_blockB.s4, TRANSPOSE_1(_blockA, 4), _result); \
+        _result = FMA8(_blockB.s5, TRANSPOSE_1(_blockA, 5), _result); \
+        _result = FMA8(_blockB.s6, TRANSPOSE_1(_blockA, 6), _result); \
+        _result = FMA8(_blockB.s7, TRANSPOSE_1(_blockA, 7), _result); \
+        _result = FMA8(_blockB1.s0, TRANSPOSE_1(_blockA, 8), _result); \
+        _result = FMA8(_blockB1.s1, TRANSPOSE_1(_blockA, 9), _result); \
+        _result = FMA8(_blockB1.s2, TRANSPOSE_1(_blockA, 10), _result); \
+        _result = FMA8(_blockB1.s3, TRANSPOSE_1(_blockA, 11), _result); \
+        _result = FMA8(_blockB1.s4, TRANSPOSE_1(_blockA, 12), _result); \
+        _result = FMA8(_blockB1.s5, TRANSPOSE_1(_blockA, 13), _result); \
+        _result = FMA8(_blockB1.s6, TRANSPOSE_1(_blockA, 14), _result); \
+        _result = FMA8(_blockB1.s7, TRANSPOSE_1(_blockA, 15), _result); \
+    }
+
+                    float8 blockB00 = as_float8(intel_sub_group_block_read8(
+                            (const __global uint *)wei1));
+                    float8 blockB01 = as_float8(intel_sub_group_block_read8(
+                            (const __global uint *)(wei1 + 8 * IC_BLOCK)));
+
+#if KH != 1 || KW != 1 || KD != 1
+                    float blockA[OW_BLOCK];
+                    __attribute__((
+                            opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                    for (int i = 0; i < OW_BLOCK; i++) {
+                        blockA[i] = tempA[kw * (1 + DW) + SW * i];
+                    }
+#else
+#if OW_BLOCK != 8 || HAS_PAD_W
+        float blockA[OW_BLOCK];
+#else
+        float8 blockA;
+#endif
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+        if (ow == OW_LAST) {
+            for (int i = 0; i < OW - OW_LAST; i++) {
+#if HAS_PAD_W
+                if (iw + i * SW < 0 || iw + i * SW >= IW) {
+                    blockA[i] = 0.0f;
+                } else {
+#endif
+                    blockA[i] = as_float(intel_sub_group_block_read(
+                            (const __global uint *)(&src1[i * IC_BLOCK * SW])));
+#if HAS_PAD_W
+                }
+#endif
+            }
+            for (int i = OW - OW_LAST; i < OW_BLOCK; i++)
+                blockA[i] = 0.0f;
+        } else {
+#endif
+#if SW != 1 || OW_BLOCK != 8 || HAS_PAD_W
+            __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+            for (int i = 0; i < OW_BLOCK; i++) {
+#if HAS_PAD_W
+                if (iw + i * SW < 0 || iw + i * SW >= IW) {
+                    blockA[i] = 0.0f;
+                } else {
+#endif
+                    blockA[i] = as_float(intel_sub_group_block_read(
+                            (const __global uint *)(&src1[i * IC_BLOCK * SW])));
+#if HAS_PAD_W
+                }
+#endif
+            }
+#else
+        blockA = as_float8(
+                intel_sub_group_block_read8((const __global uint *)(&src1[0])));
+#endif
+#if OW % OW_BLOCK != 0 || HAS_PAD_W
+        }
+#endif
+#endif
+#if OW_BLOCK != 16
+                    __attribute__((
+                            opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+                    for (int i = 0; i < OW_BLOCK; i++) {
+                        MULTIPLY_BLOCKS_8x8(
+                                blockC00[i], blockA[i], blockB00, blockB01);
+                    }
+#else
+        __attribute__((opencl_unroll_hint(8))) // attr:no-format
+        for (int i = 0; i < 8; i++) {
+            MULTIPLY_BLOCKS_8x8(blockC00[i], blockA[i], blockB00, blockB01);
+            MULTIPLY_BLOCKS_8x8(blockC01[i], blockA[i + 8], blockB00, blockB01);
+        }
+#endif
+
+#undef TRANSPOSE_BLOCK_1
+#undef MULTIPLY_BLOCKS_8x8
+#if KH != 1 || KW != 1 || KD != 1
+                }
+            }
+#endif
+            src += IC_BLOCK * IDHW_SIZE;
+            wei += OC_BLOCK * KDHW_SIZE * IC_BLOCK;
+            icb += IC_BLOCK;
+        } while (icb < IC);
+#if ((HAS_PAD_D && KD == 1) || (HAS_PAD_H && KH == 1))
+    }
+#endif
+    __global float *dst_write0 = dst + mb * OC * G * ODHW_SIZE
+            + goc * ODHW_SIZE * OC_BLOCK + g * OC * ODHW_SIZE
+            + od * OH * OW * OC_BLOCK + oh * OW * OC_BLOCK + ow * OC_BLOCK;
+
+#if WITH_SUM == 1
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+    float blockS00[OW_BLOCK];
+#else
+    float8 blockS00;
+#if OW_BLOCK == 16
+    float8 blockS01;
+#endif
+#endif
+#if OW % OW_BLOCK != 0
+    if (ow == OW_LAST) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            blockS00[i] = as_float(intel_sub_group_block_read(
+                    (const __global uint *)&dst_write0[i * OC_BLOCK]));
+        }
+    } else {
+#endif
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+        for (int i = 0; i < OW_BLOCK; i++) {
+            blockS00[i] = as_float(intel_sub_group_block_read(
+                    (const __global uint *)&dst_write0[i * OC_BLOCK]));
+        }
+#else
+    blockS00 = as_float8(
+            intel_sub_group_block_read8((const __global uint *)dst_write0));
+#if OW_BLOCK == 16
+    blockS01 = as_float8(intel_sub_group_block_read8(
+            (const __global uint *)&dst_write0[8 * OC_BLOCK]));
+#endif
+#endif
+#if OW % OW_BLOCK != 0
+    }
+#endif
+
+#if OW_BLOCK != 16
+    for (int i = 0; i < OW_BLOCK; i++) {
+#if SUM_SCALE == 1
+        blockC00[i] += blockS00[i];
+#else
+        blockC00[i] = fma(blockS00[i], (float)sum_scale, blockC00[i]);
+#endif
+    }
+#else
+#if SUM_SCALE == 1
+    blockC00 += blockS00;
+    blockC01 += blockS01;
+#else
+    blockC00 = fma(blockS00, (float8)sum_scale, blockC00);
+    blockC01 = fma(blockS01, (float8)sum_scale, blockC01);
+#endif
+#endif
+#endif // with_sum
+#if WITH_ELTWISE == 1
+#if OW_BLOCK != 16
+    DO_ELTWISE(blockC00, OW_BLOCK, eltwise_alpha, eltwise_beta);
+#else
+    DO_ELTWISE(blockC00, 8, eltwise_alpha, eltwise_beta);
+    DO_ELTWISE(blockC01, 8, eltwise_alpha, eltwise_beta);
+#endif
+#endif
+
+#if OW % OW_BLOCK != 0
+    if (ow + OW_BLOCK > OW) {
+        for (int i = 0; i < OW - OW_LAST; i++) {
+            intel_sub_group_block_write(
+                    (__global unsigned int *)(&dst_write0[i * OC_BLOCK]),
+                    as_uint(blockC00[i]));
+        }
+    } else {
+#endif
+#if OW_BLOCK != 8 && OW_BLOCK != 16
+        __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
+        for (int i = 0; i < OW_BLOCK; i++) {
+            intel_sub_group_block_write(
+                    (__global unsigned int *)(&dst_write0[i * OC_BLOCK]),
+                    as_uint(blockC00[i]));
+        }
+#else
+    intel_sub_group_block_write8(
+            (__global unsigned int *)(&dst_write0[0]), as_uint8(blockC00));
+#if OW_BLOCK == 16
+    intel_sub_group_block_write8(
+            (__global unsigned int *)(&dst_write0[8 * OC_BLOCK]),
+            as_uint8(blockC01));
+#endif
+#endif
+#if OW % OW_BLOCK != 0
+    }
+#endif
+
+#endif
+#endif
+    return;
+}
index 383be31..a34d5f9 100644 (file)
 
 #include "include/include_all.cl"
 
-#if defined(INPUT_STRIDED) && OUTPUT_DIMS == 5
-    #define GET_INDEX(prefix, num, idx_order) \
-                CAT(CAT(prefix, num), _OFFSET) + \
-                ((d1 * CAT(CAT(prefix, num), _STRIDE_X)) % CAT(CAT(prefix, num), _SIZE_X))*CAT(CAT(prefix, num), _X_PITCH) +\
-                ((d2 * CAT(CAT(prefix, num), _STRIDE_Y)) % CAT(CAT(prefix, num), _SIZE_Y))*CAT(CAT(prefix, num), _Y_PITCH) +\
-                ((d3 * CAT(CAT(prefix, num), _STRIDE_Z)) % CAT(CAT(prefix, num), _SIZE_Z))*CAT(CAT(prefix, num), _Z_PITCH) +\
-                (d4 % CAT(CAT(prefix, num), _FEATURE_NUM))*CAT(CAT(prefix, num), _FEATURE_PITCH) + \
-                (d5 % CAT(CAT(prefix, num), _BATCH_NUM  ))*CAT(CAT(prefix, num), _BATCH_PITCH)
+#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
+    #define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX_SAFE)(idx_order)
+#elif ELTWISE_NO_PITCH_SAME_DIMS
+    #define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _OFFSET) + idx_order
 #else
-    #if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
-        #define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX_SAFE)(idx_order)
-    #elif ELTWISE_NO_PITCH_SAME_DIMS
-        #define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _OFFSET) + idx_order
-    #else
-        #define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX)(idx_order)
-    #endif
+    #define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX)(idx_order)
 #endif
 
 KERNEL(eltwise)(
index 1d0a0da..df7f37a 100644 (file)
@@ -113,20 +113,20 @@ inline uint FUNC(get_bf8_xy16_index)(uint b, uint f, uint y, uint x, uint x_size
     return idx;
 }
 
-inline uint FUNC(get_bfyx_f16_index)(uint b, uint f, uint y, uint x,
-                                     uint x_size, uint y_size, uint f_size,
-                                     uint f_pad_before, uint f_pad_after,
-                                     uint y_pad_before, uint y_pad_after,
-                                     uint x_pad_before, uint x_pad_after) {
-    const uint fs = f / 16;
-    const uint fsv = f % 16;
-    const uint x_pitch = 16;
+inline uint FUNC(get_b_fs_yx_fsv_index)(uint b, uint f, uint y, uint x,
+                                        uint x_size, uint y_size, uint f_size,
+                                        uint f_pad_before, uint f_pad_after,
+                                        uint y_pad_before, uint y_pad_after,
+                                        uint x_pad_before, uint x_pad_after, uint alignment) {
+    const uint fs = f / alignment;
+    const uint fsv = f % alignment;
+    const uint x_pitch = alignment;
     const uint y_pitch = x_pitch * (x_pad_before +  x_size + x_pad_after);
     const uint total_f_size = f_pad_before + f_size + f_pad_after;
     const uint fs_pitch = y_pitch * (y_pad_before +  y_size + y_pad_after);
-    const uint b_pitch = fs_pitch * ((total_f_size + 16 - 1) / 16);
+    const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
 
-    const uint fs_pad_before = f_pad_before / 16;
+    const uint fs_pad_before = f_pad_before / alignment;
 
     const uint output_offset =  b * b_pitch +
                                 (fs + fs_pad_before) * fs_pitch +
@@ -137,20 +137,20 @@ inline uint FUNC(get_bfyx_f16_index)(uint b, uint f, uint y, uint x,
     return output_offset;
 }
 
-inline uint FUNC(get_bfyx_f16_index_safe)(uint b, uint f, uint y, uint x,
-                                          uint x_size, uint y_size, uint f_size,
-                                          uint f_pad_before, uint f_pad_after,
-                                          uint y_pad_before, uint y_pad_after,
-                                          uint x_pad_before, uint x_pad_after) {
-    const uint fs = f / 16;
-    const uint fsv = f % 16;
-    const uint x_pitch = 16;
+inline uint FUNC(get_b_fs_yx_fsv_index_safe)(uint b, uint f, uint y, uint x,
+                                             uint x_size, uint y_size, uint f_size,
+                                             uint f_pad_before, uint f_pad_after,
+                                             uint y_pad_before, uint y_pad_after,
+                                             uint x_pad_before, uint x_pad_after, uint alignment) {
+    const uint fs = f / alignment;
+    const uint fsv = f % alignment;
+    const uint x_pitch = alignment;
     const uint y_pitch = x_pitch * (x_pad_before +  x_size + x_pad_after);
     const uint total_f_size = f_pad_before + f_size + f_pad_after;
     const uint fs_pitch = y_pitch * (y_pad_before +  y_size + y_pad_after);
-    const uint b_pitch = fs_pitch * ((total_f_size + 16 - 1) / 16);
+    const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
 
-    const uint fs_pad_before = f_pad_before / 16;
+    const uint fs_pad_before = f_pad_before / alignment;
 
     const uint output_offset =  b * b_pitch +
                                 ((fs + fs_pad_before) % f_size) * fs_pitch +
@@ -162,7 +162,7 @@ inline uint FUNC(get_bfyx_f16_index_safe)(uint b, uint f, uint y, uint x,
 }
 
 #define GET_DATA_BFYX_F16_INDEX(prefix, b, f, y, x)     \
-    FUNC_CALL(get_bfyx_f16_index)(                      \
+    FUNC_CALL(get_b_fs_yx_fsv_index)(                   \
         b, f, y, x,                                     \
         CAT(prefix, _SIZE_X ),                          \
         CAT(prefix, _SIZE_Y),                           \
@@ -172,10 +172,10 @@ inline uint FUNC(get_bfyx_f16_index_safe)(uint b, uint f, uint y, uint x,
         CAT(prefix, _PAD_BEFORE_SIZE_Y),                \
         CAT(prefix, _PAD_AFTER_SIZE_Y),                 \
         CAT(prefix, _PAD_BEFORE_SIZE_X),                \
-        CAT(prefix, _PAD_AFTER_SIZE_X))
+        CAT(prefix, _PAD_AFTER_SIZE_X), 16)
 
 #define GET_DATA_BFYX_F16_INDEX_SAFE(prefix, b, f, y, x) \
-    FUNC_CALL(get_bfyx_f16_index_safe)(                  \
+    FUNC_CALL(get_b_fs_yx_fsv_index_safe)(               \
         b, f, y, x,                                      \
         CAT(prefix, _SIZE_X ),                           \
         CAT(prefix, _SIZE_Y),                            \
@@ -185,7 +185,7 @@ inline uint FUNC(get_bfyx_f16_index_safe)(uint b, uint f, uint y, uint x,
         CAT(prefix, _PAD_BEFORE_SIZE_Y),                 \
         CAT(prefix, _PAD_AFTER_SIZE_Y),                  \
         CAT(prefix, _PAD_BEFORE_SIZE_X),                 \
-        CAT(prefix, _PAD_AFTER_SIZE_X))
+        CAT(prefix, _PAD_AFTER_SIZE_X), 16)
 
 #define GET_FILTER_O_I_YX_I16_O16_INDEX(prefix, o, i, y, x, sub_group_size)  \
     CAT(prefix, _OFFSET) +                                                   \
@@ -198,6 +198,30 @@ inline uint FUNC(get_bfyx_f16_index_safe)(uint b, uint f, uint y, uint x,
         ((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH)                     \
     )
 
+#define GET_FILTER_O_I_ZYX_I16_O16_INDEX(prefix, o, i, z, y, x, sub_group_size) \
+    CAT(prefix, _OFFSET) +                                                   \
+    ((o) % (sub_group_size)) +                                               \
+    (sub_group_size)*(                                                       \
+        (x)*(sub_group_size)*CAT(prefix, _X_PITCH) +                         \
+        (y)*(sub_group_size)*CAT(prefix, _Y_PITCH) +                         \
+        (z)*(sub_group_size)*CAT(prefix, _Z_PITCH) +                         \
+        ((i) % (sub_group_size)) +                                           \
+        ((i) / (sub_group_size))*(sub_group_size)*CAT(prefix, _IFM_PITCH) +  \
+        ((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH)                     \
+    )
+
+#define GET_FILTER_I_O_ZYX_O16_I16_INDEX(prefix, o, i, z, y, x, sub_group_size) \
+    CAT(prefix, _OFFSET) +                                                   \
+    ((o) % (sub_group_size)) +                                               \
+    (sub_group_size)*(                                                       \
+        (x)*(sub_group_size)*CAT(prefix, _X_PITCH) +                         \
+        (y)*(sub_group_size)*CAT(prefix, _Y_PITCH) +                         \
+        (z)*(sub_group_size)*CAT(prefix, _Z_PITCH) +                         \
+        ((i) % (sub_group_size)) +                                           \
+        ((o) / (sub_group_size))*(sub_group_size)*CAT(prefix, _OFM_PITCH) +  \
+        ((i) / (sub_group_size))*CAT(prefix, _IFM_PITCH)                     \
+    )
+
 inline uint FUNC(get_oiyx_o16_index)(uint o, uint i, uint y, uint x, uint i_size, uint o_size, uint x_size, uint y_size)
 {
     const uint s_off = (x + y*x_size)*16;
@@ -311,6 +335,22 @@ inline uint FUNC(get_fs_bs_yx_bsv4_fsv32_index)(uint b, uint f, uint y, uint x,
     (i % CAT(prefix, _IFM_NUM))*CAT(prefix, _IFM_PITCH) +   \
     (o % CAT(prefix, _OFM_NUM))*CAT(prefix, _OFM_PITCH)
 
+#define GET_FILTER_INDEX_5D(prefix, o, i, z, y, x) \
+    CAT(prefix, _OFFSET) +                      \
+    (x)*CAT(prefix, _X_PITCH) +                 \
+    (y)*CAT(prefix, _Y_PITCH) +                 \
+    (z)*CAT(prefix, _Z_PITCH) +                 \
+    (i)*CAT(prefix, _IFM_PITCH) +               \
+    (o)*CAT(prefix, _OFM_PITCH)
+
+#define GET_FILTER_INDEX_5D_SAFE(prefix, o, i, z, y, x)     \
+    CAT(prefix, _OFFSET) +                                  \
+    (x % CAT(prefix, _SIZE_X ))*CAT(prefix, _X_PITCH) +     \
+    (y % CAT(prefix, _SIZE_Y ))*CAT(prefix, _Y_PITCH) +     \
+    (z % CAT(prefix, _SIZE_Z ))*CAT(prefix, _Z_PITCH) +     \
+    (i % CAT(prefix, _IFM_NUM))*CAT(prefix, _IFM_PITCH) +   \
+    (o % CAT(prefix, _OFM_NUM))*CAT(prefix, _OFM_PITCH)
+
 #define GET_FILTER_OS_IYX_OSV8_INDEX(prefix, o, i, y, x, sub_group_size)    \
     CAT(prefix, _OFFSET) +                                                  \
     ((o) % (sub_group_size)) +                                              \
@@ -641,6 +681,34 @@ inline uint FUNC(get_fs_b_yx_fsv32_index)(uint b, uint f, uint y, uint x,
     return index;
 }
 
+#define GET_DATA_BFZYX_F16_INDEX(prefix, b, f, z, y, x) \
+    FUNC_CALL(get_bfzyx_f16_index)(                     \
+        b, f, z, y, x, CAT(prefix, _SIZE_X ),           \
+        CAT(prefix, _SIZE_Y),                           \
+        CAT(prefix, _SIZE_Z),                           \
+        CAT(prefix, _FEATURE_NUM),                      \
+        CAT(prefix, _OFFSET),                           \
+        CAT(prefix, _PAD_BEFORE_SIZE_Z),                \
+        CAT(prefix, _PAD_AFTER_SIZE_Z),                 \
+        CAT(prefix, _PAD_BEFORE_SIZE_Y),                \
+        CAT(prefix, _PAD_AFTER_SIZE_Y),                 \
+        CAT(prefix, _PAD_BEFORE_SIZE_X),                \
+        CAT(prefix, _PAD_AFTER_SIZE_X))
+
+inline uint FUNC(get_bfzyx_f16_index)(uint b, uint f,  uint z, uint y, uint x, uint x_size, uint y_size, uint z_size, uint f_size, uint offset, uint z_pad_before,uint z_pad_after, uint y_pad_before,uint y_pad_after, uint x_pad_before, uint x_pad_after)
+{
+    const uint full_width = x_size + x_pad_before + x_pad_after;
+    const uint full_height = y_size + y_pad_before + y_pad_after;
+    const uint full_depth  = z_size + z_pad_before + z_pad_after;
+
+    const uint xyz_offset = (x + y * full_width + z * full_width * full_height)*16; // w*16 + h*W*16 + d*W*H*16
+    const uint f_offset = (f / 16) * full_width*full_height*full_depth*16 + (f % 16);  //(c / 16) * HWD*16  + (c % 16)
+    const uint b_offset = b * f_size * full_width * full_height * full_depth;    //n * CHWD
+
+    const size_t idx = offset + xyz_offset + f_offset + b_offset;
+
+    return idx;
+}
 
 #define DECLARE_SAMPLER const sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_input_bfyx_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_input_bfyx_opt.cl
new file mode 100644 (file)
index 0000000..6e36d64
--- /dev/null
@@ -0,0 +1,121 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+#include "include/unit_type.cl"
+#include "include/sub_group.cl"
+
+#if FP16_UNIT_USED
+    #define MAD_1X8(_result_block, _input_value, _weights_block) \
+    { \
+        _result_block.s0 = fma(_input_value, _weights_block.s0, _result_block.s0); \
+        _result_block.s1 = fma(_input_value, _weights_block.s1, _result_block.s1); \
+        _result_block.s2 = fma(_input_value, _weights_block.s2, _result_block.s2); \
+        _result_block.s3 = fma(_input_value, _weights_block.s3, _result_block.s3); \
+        _result_block.s4 = fma(_input_value, _weights_block.s4, _result_block.s4); \
+        _result_block.s5 = fma(_input_value, _weights_block.s5, _result_block.s5); \
+        _result_block.s6 = fma(_input_value, _weights_block.s6, _result_block.s6); \
+        _result_block.s7 = fma(_input_value, _weights_block.s7, _result_block.s7); \
+    }
+#else
+    #define MAD_1X8(_result_block, _input_value, _weights_block) \
+    { \
+        _result_block.s0 = mad(_input_value, _weights_block.s0, _result_block.s0); \
+        _result_block.s1 = mad(_input_value, _weights_block.s1, _result_block.s1); \
+        _result_block.s2 = mad(_input_value, _weights_block.s2, _result_block.s2); \
+        _result_block.s3 = mad(_input_value, _weights_block.s3, _result_block.s3); \
+        _result_block.s4 = mad(_input_value, _weights_block.s4, _result_block.s4); \
+        _result_block.s5 = mad(_input_value, _weights_block.s5, _result_block.s5); \
+        _result_block.s6 = mad(_input_value, _weights_block.s6, _result_block.s6); \
+        _result_block.s7 = mad(_input_value, _weights_block.s7, _result_block.s7); \
+    }
+#endif
+
+#define INC_OFFSET(_offset, _value) _offset += _value
+#define SIMD_SIZE 8
+
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
+KERNEL(lstm_dynamic_input_bfyx_opt)(
+    const __global INPUT0_TYPE* input,
+    const __global DYN_LENGTH_TYPE* dyn_lengths,
+    __global OUTPUT_TYPE* output,
+    const __global WEIGHTS_TYPE* weights
+#if BIAS_TERM
+    , const __global BIAS_TYPE* biases
+#endif
+    )
+{
+    const uint batch    = get_global_id(1) % INPUT0_BATCH_NUM;
+    const uint dir      = get_global_id(1) / INPUT0_BATCH_NUM;
+    const uint timestep = get_global_id(2);
+    if(timestep > (uint)dyn_lengths[batch])
+        return;
+    // which general local work item within work group we have
+    const uint local_work_item_id = get_local_id(0);
+    // which id in SUBGROUP we have (0..7)
+    const uint sub_group_local_id = get_sub_group_local_id();
+    // which SUBGROUP we have
+    const uint sub_group_id     = local_work_item_id / SIMD_SIZE;//get_sub_group_id();
+    const uint dir_sub_group_id = sub_group_id % SIMD_SIZE;
+    //which workgroup we have <0,1>
+    const uint wg_id     = get_group_id(0);
+    const uint wg_offset = wg_id * get_local_size(0) * SIMD_SIZE;
+    //Subgroups have region of calcuations (ROC) within each local work item calculate simd_size values across y spatial.
+    //i.e sub_group_id = 1 have ROC, which starts at 64th y'th position
+    const uint sub_group_offset        = SIMD_SIZE * 8;
+    const uint weights_single_dir_size = WEIGHTS_SIZE_X * WEIGHTS_SIZE_Y;
+    const uint dir_offset_for_weights  = dir * weights_single_dir_size;
+    uint calcuation_offset      = dir_offset_for_weights + wg_offset + dir_sub_group_id * sub_group_offset;
+    uint input_offset           = GET_DATA_INDEX(INPUT0, batch, timestep, dir, sub_group_local_id);
+    const uint output_offset    = GET_DATA_INDEX(OUTPUT, batch, timestep, dir, wg_offset + dir_sub_group_id * sub_group_offset);
+
+#if BIAS_TERM
+    //preload output with biases
+    const uint bias_calcuation_offset  = dir * BIAS_SIZE_X + wg_offset + dir_sub_group_id * sub_group_offset;
+    UNIT_TYPE8 dot_prod = UNIT_BLOCK_READ8(biases, bias_calcuation_offset);
+#else
+    UNIT_TYPE8 dot_prod = UNIT_VAL_ZERO;
+#endif
+
+    for(uint x = 0; x < INPUT0_SIZE_X / SIMD_SIZE; ++x)
+    {
+        UNIT_TYPE8 BLOCK_W0 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+        UNIT_TYPE8 BLOCK_W1 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+        UNIT_TYPE8 BLOCK_W2 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+        UNIT_TYPE8 BLOCK_W3 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+        UNIT_TYPE8 BLOCK_W4 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+        UNIT_TYPE8 BLOCK_W5 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+        UNIT_TYPE8 BLOCK_W6 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+        UNIT_TYPE8 BLOCK_W7 = UNIT_BLOCK_READ8(weights, calcuation_offset); INC_OFFSET(calcuation_offset, WEIGHTS_SIZE_Y);
+            
+        UNIT_TYPE input_value = input[input_offset];
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 0), BLOCK_W0);
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 1), BLOCK_W1);
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 2), BLOCK_W2);
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 3), BLOCK_W3);
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 4), BLOCK_W4);
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 5), BLOCK_W5);
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 6), BLOCK_W6);
+        MAD_1X8(dot_prod, intel_sub_group_shuffle(input_value, 7), BLOCK_W7);
+        
+        input_offset += SIMD_SIZE;
+    }
+
+    UNIT_BLOCK_WRITE8(output, output_offset, dot_prod);
+}
+
+#undef SIMD_SIZE
+#undef INC_OFFSET
+#undef MAD_1X8
+#undef OPT
index b79b7b6..6e01f39 100644 (file)
@@ -37,7 +37,7 @@ KERNEL(lstm_dynamic_input_ref)(
     for(uint x = 0; x < INPUT0_SIZE_X; ++x )
     {
         const uint input_idx   = GET_DATA_INDEX(INPUT0, batch, timestep, dir, x);
-        const uint weights_idx = GET_DATA_INDEX(WEIGHTS, 0, dir, y, x);
+        const uint weights_idx = GET_FILTER_INDEX(WEIGHTS, 0, dir, y, x);
         dot_prod += (ACCUMULATOR_TYPE)(input[input_idx] * weights[weights_idx]);
     }
 
index f827229..6c83fc8 100644 (file)
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "include/common.cl"
+#include "include/fetch.cl"
 #include "include/data_types.cl"
 
 
@@ -40,6 +40,13 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
             {
                 for (uint x = 0; x < INPUT0_SIZE_X; x++)
                 {
+#if INPUT0_LAYOUT_BFZYX_F16
+                    input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                    mean += (float)input[input_idx];
+                }
+            }
+        }
+#else
                     mean += (float)input[input_idx];
                     input_idx += INPUT0_X_PITCH;
                 }
@@ -48,8 +55,13 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
             input_idx += INPUT0_Z_PITCH - INPUT0_Y_PITCH*INPUT0_SIZE_Y;
         }
         input_idx += INPUT0_FEATURE_PITCH - INPUT0_Z_PITCH*INPUT0_SIZE_Z;
+#endif
     }
+#if INPUT0_LAYOUT_BFZYX_F16
+    uint output_idx;
+#else
     uint output_idx = OUTPUT_OFFSET + b * OUTPUT_BATCH_PITCH;
+#endif
     mean /= INPUT0_FEATURE_NUM * INPUT0_SIZE_Z * INPUT0_SIZE_Y * INPUT0_SIZE_X;
 
 #if NORMALIZE_VARIANCE == 0
@@ -63,6 +75,14 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
             {
                 for (uint x = 0; x < INPUT0_SIZE_X; x++)
                 {
+#if INPUT0_LAYOUT_BFZYX_F16
+                    input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                    output_idx = GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
+                    output[output_idx] = ACTIVATION(input[input_idx] - UNIT_CVT_FUNC(mean), ACTIVATION_PARAMS);
+                }
+            }
+        }
+#else
                     output[output_idx] = ACTIVATION(input[input_idx] - UNIT_CVT_FUNC(mean), ACTIVATION_PARAMS);
                     input_idx += INPUT0_X_PITCH;
                     output_idx += OUTPUT_X_PITCH;
@@ -75,7 +95,7 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
         }
         input_idx += INPUT0_FEATURE_PITCH - INPUT0_Z_PITCH*INPUT0_SIZE_Z;
         output_idx += OUTPUT_FEATURE_PITCH - INPUT0_SIZE_Z*OUTPUT_Z_PITCH;
-
+#endif
     }
 
 #else //NORMALIZE_VARIANCE
@@ -91,6 +111,14 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
             {
                 for (uint x = 0; x < INPUT0_SIZE_X; x++)
                 {
+#if INPUT0_LAYOUT_BFZYX_F16
+                    input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                    float res = (float)input[input_idx] - mean;
+                    variance = fma(res, res, variance);
+                }
+            }
+        }
+#else
                     float res = (float)input[input_idx] - mean;
                     variance = fma(res, res, variance);
                     input_idx += INPUT0_X_PITCH;
@@ -100,6 +128,7 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
             input_idx += INPUT0_Z_PITCH - INPUT0_Y_PITCH*INPUT0_SIZE_Y;
         }
         input_idx += INPUT0_FEATURE_PITCH - INPUT0_Z_PITCH*INPUT0_SIZE_Z;
+#endif
     }
 
     //normalize variance
@@ -115,6 +144,14 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
             {
                 for (uint x = 0; x < INPUT0_SIZE_X; x++)
                 {
+#if INPUT0_LAYOUT_BFZYX_F16
+                    input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                    output_idx = GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
+                    output[output_idx] = ACTIVATION((input[input_idx] - UNIT_CVT_FUNC(mean)) * UNIT_CVT_FUNC(variance), ACTIVATION_PARAMS);
+                }
+            }
+        }
+#else
                     output[output_idx] = ACTIVATION((input[input_idx] - UNIT_CVT_FUNC(mean)) * UNIT_CVT_FUNC(variance), ACTIVATION_PARAMS);
                     input_idx += INPUT0_X_PITCH;
                     output_idx += OUTPUT_X_PITCH;
@@ -127,6 +164,7 @@ KERNEL (mvn_gpu_ref_accross_channels)(const __global UNIT_TYPE* input, __global
         }
         input_idx += INPUT0_FEATURE_PITCH - INPUT0_Z_PITCH*INPUT0_SIZE_Z;
         output_idx += OUTPUT_FEATURE_PITCH - INPUT0_SIZE_Z*OUTPUT_Z_PITCH;
+#endif
     }
 #endif
 }
index 21daad6..9801105 100644 (file)
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "include/common.cl"
+#include "include/fetch.cl"
 #include "include/data_types.cl"
 
 
@@ -39,17 +39,27 @@ KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global U
         {
             for (uint x = 0; x < INPUT0_SIZE_X; x++)
             {
+#if INPUT0_LAYOUT_BFZYX_F16
+                input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                mean += (float)input[input_idx];
+             }
+        }
+#else
                 mean += (float)input[input_idx];
                 input_idx += INPUT0_X_PITCH;
             }
             input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
         }
         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
+#endif
     }
     mean /= INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z;
 
+#if INPUT0_LAYOUT_BFZYX_F16
+    uint output_idx;
+#else
     uint output_idx = OUTPUT_OFFSET + b * OUTPUT_BATCH_PITCH + f * OUTPUT_FEATURE_PITCH;
-
+#endif
 #if NORMALIZE_VARIANCE == 0
     //subtract mean
     input_idx = input_first;
@@ -59,6 +69,13 @@ KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global U
         {
             for (uint x = 0; x < INPUT0_SIZE_X; x++)
             {
+#if INPUT0_LAYOUT_BFZYX_F16
+                input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                output_idx = GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
+                output[output_idx] = ACTIVATION(input[input_idx] - UNIT_CVT_FUNC(mean), ACTIVATION_PARAMS);
+            }
+        }
+#else
                 output[output_idx] = ACTIVATION(input[input_idx] - UNIT_CVT_FUNC(mean), ACTIVATION_PARAMS);
                 input_idx += INPUT0_X_PITCH;
                 output_idx += OUTPUT_X_PITCH;
@@ -68,7 +85,7 @@ KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global U
         }
         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
         output_idx += OUTPUT_Z_PITCH - INPUT0_SIZE_Y*OUTPUT_Y_PITCH;
-
+#endif
     }
 #else //NORMALIZE_VARIANCE
     float variance = 0.f;
@@ -81,6 +98,13 @@ KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global U
         {
             for (uint x = 0; x < INPUT0_SIZE_X; x++)
             {
+#if INPUT0_LAYOUT_BFZYX_F16
+                input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                float res = (float)input[input_idx] - mean;
+                variance = fma(res, res, variance);
+            }
+        }
+#else
                 float res = (float)input[input_idx] - mean;
                 variance = fma(res, res, variance);
                 input_idx += INPUT0_X_PITCH;
@@ -88,6 +112,7 @@ KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global U
             input_idx += INPUT0_Y_PITCH - INPUT0_SIZE_X*INPUT0_X_PITCH;
         }
         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
+#endif
     }
 
     //normalize variance
@@ -101,6 +126,13 @@ KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global U
         {
             for (uint x = 0; x < INPUT0_SIZE_X; x++)
             {
+#if INPUT0_LAYOUT_BFZYX_F16
+                input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
+                output_idx = GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
+                output[output_idx] = ACTIVATION((input[input_idx] - UNIT_CVT_FUNC(mean)) * UNIT_CVT_FUNC(variance), ACTIVATION_PARAMS);
+            }
+        }
+#else
                 output[output_idx] = ACTIVATION((input[input_idx] - UNIT_CVT_FUNC(mean)) * UNIT_CVT_FUNC(variance), ACTIVATION_PARAMS);
                 input_idx += INPUT0_X_PITCH;
                 output_idx += OUTPUT_X_PITCH;
@@ -110,6 +142,7 @@ KERNEL (mvn_gpu_ref_within_channels)(const __global UNIT_TYPE* input, __global U
         }
         input_idx += INPUT0_Z_PITCH - INPUT0_SIZE_Y*INPUT0_Y_PITCH;
         output_idx += OUTPUT_Z_PITCH - INPUT0_SIZE_Y*OUTPUT_Y_PITCH;
+#endif
     }
 #endif
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/ocl_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/ocl_types.h
new file mode 100644 (file)
index 0000000..332e955
--- /dev/null
@@ -0,0 +1,444 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+// #include "ocl_math_utils.h"
+
+#define for_ for
+
+#define CONCAt2(a, b) a##b
+#define CONCAT2(a, b) CONCAt2(a, b)
+
+#if DT_F32 == 1
+#define DATA_T float
+#define DATA8_T float8
+#define DATA_MAX FLT_MAX
+#define DATA_MIN -DATA_MAX
+#define DATA_ZERO 0.0f
+#define DATA_ONE 1.0f
+#define DEF_ACC_DATA_T float
+#define DEF_ACC_DATA8_T float8
+#define POST_OP_DATA_T float
+#define TO_DATA_T(v) static_cast<float>(v)
+#define TO_DEF_ACC_DATA_T(v) static_cast<float>(v)
+#define DATA_TO_REF convert_float
+#define CONVERT_DATA_T convert_float
+#define CONVERT_DATA8_T convert_float8
+#define CONVERT_FLOAT_T convert_float
+#define CONVERT_FLOAT8_T convert_float8
+#define ROUND
+
+#define BLOCK_READ intel_sub_group_block_read
+#define BLOCK_WRITE intel_sub_group_block_write
+#define BLOCK_READ8 intel_sub_group_block_read8
+#define BLOCK_WRITE8 intel_sub_group_block_write8
+
+#define AS_DATA_T as_float
+#define AS_DATA8_T as_float8
+
+#define AS_UINT_T as_uint
+#define AS_UINT8_T as_uint8
+
+#define BLOCK_DATA_T uint
+#define BLOCK_DATA8_T uint8
+#define AS_BLOCK_DATA_T as_uint
+#define AS_BLOCK_DATA8_T as_uint8
+#elif DT_F16 == 1
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define DATA_T half
+#define DATA8_T half8
+#define DATA_MAX HALF_MAX
+#define DATA_MIN -DATA_MAX
+#define DATA_ZERO 0.0h
+#define DATA_ONE 1.0h
+#define DEF_ACC_DATA_T half
+#define DEF_ACC_DATA8_T half8
+#define POST_OP_DATA_T half
+#define TO_DATA_T(v) (half)(v)
+#define TO_DEF_ACC_DATA_T(v) (half)(v)
+#define DATA_TO_REF convert_half
+#define CONVERT_DATA_T convert_half
+#define CONVERT_DATA8_T convert_half8
+#define CONVERT_FLOAT_T convert_float
+#define CONVERT_FLOAT8_T convert_float8
+#define ROUND
+
+#define BLOCK_READ intel_sub_group_block_read_us
+#define BLOCK_WRITE intel_sub_group_block_write_us
+#define BLOCK_READ8 intel_sub_group_block_read_us8
+#define BLOCK_WRITE8 intel_sub_group_block_write_us8
+#define AS_DATA_T as_half
+#define AS_DATA8_T as_half8
+
+#define AS_UINT_T as_ushort
+#define AS_UINT8_T as_ushort8
+
+#define BLOCK_DATA_T ushort
+#define BLOCK_DATA8_T ushort8
+#define AS_BLOCK_DATA_T as_ushort
+#define AS_BLOCK_DATA8_T as_ushort8
+#elif DT_BF16 == 1
+#define DATA_T ushort
+#define POST_OP_DATA_T float
+#define DATA8_T ushort8
+#define DATA_MAX 3.38953138925153547590470800371487866880e+38F
+#define DATA_MIN (-DATA_MAX)
+#define DATA_ZERO 0.0f
+#define DATA_ONE 1.0f
+#define DEF_ACC_DATA_T float
+#define DEF_ACC_DATA8_T float8
+#define TO_DATA_T(v) convert_f32_to_bf16(v)
+#define TO_DEF_ACC_DATA_T(v) convert_bf16_to_f32(v)
+#define DATA_TO_REF convert_bf16_to_f32
+#define CONVERT_DATA_T convert_f32_to_bf16
+#define CONVERT_DATA8_T convert_f32_to_bf16_vec8
+#define CONVERT_FLOAT_T convert_bf16_to_f32
+#define CONVERT_FLOAT8_T convert_bf16_to_f32_vec8
+#define ROUND
+
+#define BLOCK_READ intel_sub_group_block_read_us
+#define BLOCK_WRITE intel_sub_group_block_write_us
+#define BLOCK_READ8 intel_sub_group_block_read_us8
+#define BLOCK_WRITE8 intel_sub_group_block_write_us8
+#define AS_DATA_T as_ushort
+#define AS_DATA8_T as_ushort8
+
+#define AS_UINT_T as_ushort
+#define AS_UINT8_T as_ushort8
+
+#define BLOCK_DATA_T ushort
+#define BLOCK_DATA8_T ushort8
+#define AS_BLOCK_DATA_T as_ushort
+#define AS_BLOCK_DATA8_T as_ushort8
+#elif DT_S8 == 1
+#define DATA_T char
+#define DATA8_T char8
+#define DATA_MAX CHAR_MAX
+#define DATA_MIN CHAR_MIN
+#define DATA_ZERO 0
+#define DATA_ONE 1
+#define DEF_ACC_DATA_T int
+#define DEF_ACC_DATA8_T int8
+#define POST_OP_DATA_T float
+#define TO_DATA_T(v) static_cast<char>(v)
+#define DATA_TO_REF convert_char
+#define CONVERT_DATA_T convert_char
+#define CONVERT_DATA8_T convert_char8
+#define ROUND rint
+
+#define BLOCK_READ intel_sub_group_block_read_uc
+#define BLOCK_WRITE intel_sub_group_block_write_uc
+#define BLOCK_READ8 intel_sub_group_block_read_uc8
+#define BLOCK_WRITE8 intel_sub_group_block_write_uc8
+#define AS_DATA_T as_char
+#define AS_DATA8_T as_char8
+
+#define AS_UINT_T as_uchar
+#define AS_UINT8_T as_uchar8
+
+#define BLOCK_DATA_T uchar
+#define BLOCK_DATA8_T uchar8
+#define AS_BLOCK_DATA_T as_uchar
+#define AS_BLOCK_DATA8_T as_uchar8
+#elif DT_U8 == 1
+#define DATA_T uchar
+#define DATA8_T uchar8
+#define DATA_MAX UCHAR_MAX
+#define DATA_MIN 0
+#define DATA_ZERO 0
+#define DATA_ONE 1
+#define DEF_ACC_DATA_T int
+#define DEF_ACC_DATA8_T int8
+#define POST_OP_DATA_T float
+#define TO_DATA_T(v) (uchar)(v)
+#define DATA_TO_REF convert_uchar
+#define CONVERT_DATA_T convert_uchar
+#define CONVERT_DATA8_T convert_uchar8
+#define ROUND rint
+
+#define BLOCK_READ intel_sub_group_block_read_uc
+#define BLOCK_WRITE intel_sub_group_block_write_uc
+#define BLOCK_READ8 intel_sub_group_block_read_uc8
+#define BLOCK_WRITE8 intel_sub_group_block_write_uc8
+#define AS_DATA_T as_uchar
+#define AS_DATA8_T as_uchar8
+
+#define AS_UINT_T as_uchar
+#define AS_UINT8_T as_uchar8
+
+#define BLOCK_DATA_T uchar
+#define BLOCK_DATA8_T uchar8
+#define AS_BLOCK_DATA_T as_uchar
+#define AS_BLOCK_DATA8_T as_uchar8
+#elif DT_S32 == 1
+#define DATA_T int
+#define CONVERT_DATA_T convert_int_sat_rte
+#define POST_OP_DATA_T float
+#elif !defined(DT_UNDEF)
+#error "Unexpected data type"
+#endif
+
+#if VECT_DT_N == 1
+#define VECT_DATA_T DATA_T
+#define VECT_DEF_ACC_DATA_T DEF_ACC_DATA_T
+#define AS_VECT_DATA_T AS_DATA_T
+#define VECT_BLOCK_READ BLOCK_READ
+#define VECT_BLOCK_WRITE BLOCK_WRITE
+#define VECT_UINT_READ intel_sub_group_block_read
+#define VECT_UINT_WRITE intel_sub_group_block_write
+#define VECT_BLOCK_DATA_T BLOCK_DATA_T
+#define AS_VECT_BLOCK_DATA_T AS_BLOCK_DATA_T
+#define CONVERT_VECT_FLOAT_T CONVERT_FLOAT_T
+#define CONVERT_VECTOR_DATA_T CONVERT_DATA_T
+#define VECT_INT_T int
+#define VECT_UINT_T uint
+#define VECT_FLOAT_T float
+#define AS_VECT_INT_T as_int
+#define AS_VECT_UINT_T as_uint
+#elif VECT_DT_N == 8
+#define VECT_DATA_T DATA8_T
+#define VECT_DEF_ACC_DATA_T DEF_ACC_DATA8_T
+#define AS_VECT_DATA_T AS_DATA8_T
+#define VECT_BLOCK_READ BLOCK_READ8
+#define VECT_BLOCK_WRITE BLOCK_WRITE8
+#define VECT_UINT_READ intel_sub_group_block_read8
+#define VECT_UINT_WRITE intel_sub_group_block_write8
+#define VECT_BLOCK_DATA_T BLOCK_DATA8_T
+#define AS_VECT_BLOCK_DATA_T AS_BLOCK_DATA8_T
+#define CONVERT_VECT_FLOAT_T CONVERT_FLOAT8_T
+#define CONVERT_VECTOR_DATA_T CONVERT_DATA8_T
+#define VECT_INT_T int8
+#define VECT_UINT_T uint8
+#define VECT_FLOAT_T float8
+#define AS_VECT_INT_T as_int8
+#define AS_VECT_UINT_T as_uint8
+#endif
+
+#ifdef SRC_DATA_T
+#define SRC_DATA8_T CONCAT2(SRC_DATA_T, 8)
+#if SRC_DT_BF16
+#define SRC_TO_REF(x) convert_bf16_to_f32(x)
+#define SRC_TO_REF8(x) convert_bf16_to_f32_vec8(x)
+#else
+#define SRC_TO_REF(x) (x)
+#define SRC_TO_REF8(x) (x)
+#endif
+#if SRC_DT_BF16
+#define TO_SRC(x) convert_f32_to_bf16(x)
+#elif SRC_DT_U8
+#define TO_SRC(x) convert_uchar_sat_rte(x)
+#elif SRC_DT_S8
+#define TO_SRC(x) convert_char_sat_rte(x)
+#elif SRC_DT_S32
+#define TO_SRC(x) convert_int_sat_rte(x)
+#else
+#define TO_SRC(x) (x)
+#endif
+#endif
+
+#ifdef WEI_DATA_T
+#if WEI_DT_BF16
+#define WEI_TO_REF(x) convert_bf16_to_f32(x)
+#define REF_TO_WEI(x) convert_f32_to_bf16(x)
+#else
+#define WEI_TO_REF(x) (x)
+#define REF_TO_WEI(x) (x)
+#endif
+#if WEI_DT_BF16
+#define TO_WEI(x) convert_f32_to_bf16(x)
+#elif WEI_DT_U8
+#define TO_WEI(x) convert_uchar_sat_rte(x)
+#elif WEI_DT_S8
+#define TO_WEI(x) convert_char_sat_rte(x)
+#elif WEI_DT_S32
+#define TO_WEI(x) convert_int_sat_rte(x)
+#else
+#define TO_WEI(x) (x)
+#endif
+#endif
+
+#ifdef BIA_DATA_T
+#if BIA_DT_BF16
+#define BIA_TO_REF(x) convert_bf16_to_f32(x)
+#define REF_TO_BIA(x) convert_f32_to_bf16(x)
+#else
+#define BIA_TO_REF(x) (x)
+#define REF_TO_BIA(x) (x)
+#endif
+#if BIA_DT_BF16
+#define TO_BIA(x) convert_f32_to_bf16(x)
+#elif BIA_DT_U8
+#define TO_BIA(x) convert_uchar_sat_rte(x)
+#elif BIA_DT_S8
+#define TO_BIA(x) convert_char_sat_rte(x)
+#elif BIA_DT_S32
+#define TO_BIA(x) convert_int_sat_rte(x)
+#else
+#define TO_BIA(x) (x)
+#endif
+#endif
+
+#ifdef DST_DATA_T
+#define DST_DATA8_T CONCAT2(DST_DATA_T, 8)
+#if DST_DT_BF16
+#define DST_TO_REF(x) convert_bf16_to_f32(x)
+#define DST_TO_REF8(x) convert_bf16_to_f32_vec8(x)
+#define REF_TO_DST(x) convert_f32_to_bf16(x)
+#define REF_TO_DST8(x) convert_f32_to_bf16_vec8(convert_float8(x))
+#else
+#define DST_TO_REF(x) (x)
+#define DST_TO_REF8(x) (x)
+#define REF_TO_DST(x) (x)
+#define REF_TO_DST8(x) (x)
+#endif
+#if DST_DT_BF16
+#define TO_DST(x) convert_f32_to_bf16(x)
+#define TO_DST8(x) convert_f32_to_bf16_vec8(convert_float8(x))
+#elif DST_DT_F16
+#define TO_DST(x) convert_half(x)
+#define TO_DST8(x) convert_half8(x)
+#elif DST_DT_U8
+#define TO_DST(x) convert_uchar_sat_rte(x)
+#define TO_DST8(x) convert_uchar8_sat_rte(x)
+#elif DST_DT_S8
+#define TO_DST(x) convert_char_sat_rte(x)
+#define TO_DST8(x) convert_char8_sat_rte(x)
+#elif DST_DT_S32
+#define TO_DST(x) convert_int_sat_rte(x)
+#define TO_DST8(x) convert_int8_sat_rte(x)
+#elif DST_DT_F32
+#define TO_DST(x) convert_float(x)
+#define TO_DST8(x) convert_float8(x)
+#else
+#error "Not expected"
+#endif
+#endif
+
+#ifdef ACC_DATA_T
+#if ACC_DT_F16
+#define TO_ACC(x) convert_half(x)
+#elif ACC_DT_F32
+#define TO_ACC(x) convert_float(x)
+#elif ACC_DT_S32
+#define TO_ACC(x) convert_int(x)
+#else
+#error "Unexpected accumulation data type"
+#endif
+#endif
+
+#define OFF_MD(prefix, x0, x1, x2, x3, x4, x5) \
+    ((x0 / prefix##_B0_2) / prefix##_B0_1 * prefix##_S0_0) \
+            + ((x0 / prefix##_B0_2) % prefix##_B0_1 * prefix##_S0_1) \
+            + ((x0 % prefix##_B0_2) * prefix##_S0_2) \
+            + ((x1 / prefix##_B1_2) / prefix##_B1_1 * prefix##_S1_0) \
+            + ((x1 / prefix##_B1_2) % prefix##_B1_1 * prefix##_S1_1) \
+            + ((x1 % prefix##_B1_2) * prefix##_S1_2) \
+            + ((x2 / prefix##_B2_2) / prefix##_B2_1 * prefix##_S2_0) \
+            + ((x2 / prefix##_B2_2) % prefix##_B2_1 * prefix##_S2_1) \
+            + ((x2 % prefix##_B2_2) * prefix##_S2_2) \
+            + ((x3 / prefix##_B3_2) / prefix##_B3_1 * prefix##_S3_0) \
+            + ((x3 / prefix##_B3_2) % prefix##_B3_1 * prefix##_S3_1) \
+            + ((x3 % prefix##_B3_2) * prefix##_S3_2) \
+            + ((x4 / prefix##_B4_2) / prefix##_B4_1 * prefix##_S4_0) \
+            + ((x4 / prefix##_B4_2) % prefix##_B4_1 * prefix##_S4_1) \
+            + ((x4 % prefix##_B4_2) * prefix##_S4_2) \
+            + ((x5 / prefix##_B5_2) / prefix##_B5_1 * prefix##_S5_0) \
+            + ((x5 / prefix##_B5_2) % prefix##_B5_1 * prefix##_S5_1) \
+            + ((x5 % prefix##_B5_2) * prefix##_S5_2)
+
+#if NDIMS == 3
+#define SRC_OFF(x0, x1, d, h, x2) \
+    (((x0) % SRC_B0) * SRC_SB0 + ((x0) / SRC_B0) * SRC_S0 \
+            + ((x1) % SRC_B1) * SRC_SB1 + ((x1) / SRC_B1) * SRC_S1 \
+            + ((x2) % SRC_B2) * SRC_SB2 + ((x2) / SRC_B2) * SRC_S2)
+
+#if WITH_GROUPS == 1
+#define WHT_OFF(x0, x1, x2, d, h, x3) \
+    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
+            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
+            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2 \
+            + ((x3) % WHT_B3) * WHT_SB3 + ((x3) / WHT_B3) * WHT_S3)
+#else
+#define WHT_OFF(g, x0, x1, d, h, x2) \
+    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
+            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
+            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2)
+#endif
+
+#define DST_OFF(x0, x1, d, h, x2) \
+    (((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \
+            + ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1 \
+            + ((x2) % DST_B2) * DST_SB2 + ((x2) / DST_B2) * DST_S2)
+#elif NDIMS == 4
+#define SRC_OFF(x0, x1, d, x2, x3) \
+    (((x0) % SRC_B0) * SRC_SB0 + ((x0) / SRC_B0) * SRC_S0 \
+            + ((x1) % SRC_B1) * SRC_SB1 + ((x1) / SRC_B1) * SRC_S1 \
+            + ((x2) % SRC_B2) * SRC_SB2 + ((x2) / SRC_B2) * SRC_S2 \
+            + ((x3) % SRC_B3) * SRC_SB3 + ((x3) / SRC_B3) * SRC_S3)
+
+#if WITH_GROUPS == 1
+#define WHT_OFF(x0, x1, x2, d, x3, x4) \
+    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
+            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
+            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2 \
+            + ((x3) % WHT_B3) * WHT_SB3 + ((x3) / WHT_B3) * WHT_S3 \
+            + ((x4) % WHT_B4) * WHT_SB4 + ((x4) / WHT_B4) * WHT_S4)
+#else
+#define WHT_OFF(g, x1, x2, d, x3, x4) \
+    (((x1) % WHT_B0) * WHT_SB0 + ((x1) / WHT_B0) * WHT_S0 \
+            + ((x2) % WHT_B1) * WHT_SB1 + ((x2) / WHT_B1) * WHT_S1 \
+            + ((x3) % WHT_B2) * WHT_SB2 + ((x3) / WHT_B2) * WHT_S2 \
+            + ((x4) % WHT_B3) * WHT_SB3 + ((x4) / WHT_B3) * WHT_S3)
+#endif
+
+#define DST_OFF(x0, x1, d, x2, x3) \
+    (((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \
+            + ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1 \
+            + ((x2) % DST_B2) * DST_SB2 + ((x2) / DST_B2) * DST_S2 \
+            + ((x3) % DST_B3) * DST_SB3 + ((x3) / DST_B3) * DST_S3)
+#elif NDIMS == 5
+#define SRC_OFF(x0, x1, x2, x3, x4) \
+    (((x0) % SRC_B0) * SRC_SB0 + ((x0) / SRC_B0) * SRC_S0 \
+            + ((x1) % SRC_B1) * SRC_SB1 + ((x1) / SRC_B1) * SRC_S1 \
+            + ((x2) % SRC_B2) * SRC_SB2 + ((x2) / SRC_B2) * SRC_S2 \
+            + ((x3) % SRC_B3) * SRC_SB3 + ((x3) / SRC_B3) * SRC_S3 \
+            + ((x4) % SRC_B4) * SRC_SB4 + ((x4) / SRC_B4) * SRC_S4)
+
+#if WITH_GROUPS == 1
+#define WHT_OFF(x0, x1, x2, x3, x4, x5) \
+    (((x0) % WHT_B0) * WHT_SB0 + ((x0) / WHT_B0) * WHT_S0 \
+            + ((x1) % WHT_B1) * WHT_SB1 + ((x1) / WHT_B1) * WHT_S1 \
+            + ((x2) % WHT_B2) * WHT_SB2 + ((x2) / WHT_B2) * WHT_S2 \
+            + ((x3) % WHT_B3) * WHT_SB3 + ((x3) / WHT_B3) * WHT_S3 \
+            + ((x4) % WHT_B4) * WHT_SB4 + ((x4) / WHT_B4) * WHT_S4 \
+            + ((x5) % WHT_B5) * WHT_SB5 + ((x5) / WHT_B5) * WHT_S5)
+#else
+#define WHT_OFF(g, x1, x2, x3, x4, x5) \
+    (((x1) % WHT_B0) * WHT_SB0 + ((x1) / WHT_B0) * WHT_S0 \
+            + ((x2) % WHT_B1) * WHT_SB1 + ((x2) / WHT_B1) * WHT_S1 \
+            + ((x3) % WHT_B2) * WHT_SB2 + ((x3) / WHT_B2) * WHT_S2 \
+            + ((x4) % WHT_B3) * WHT_SB3 + ((x4) / WHT_B3) * WHT_S3 \
+            + ((x5) % WHT_B4) * WHT_SB4 + ((x5) / WHT_B4) * WHT_S4)
+#endif
+
+#define DST_OFF(x0, x1, x2, x3, x4) \
+    (((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \
+            + ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1 \
+            + ((x2) % DST_B2) * DST_SB2 + ((x2) / DST_B2) * DST_S2 \
+            + ((x3) % DST_B3) * DST_SB3 + ((x3) / DST_B3) * DST_S3 \
+            + ((x4) % DST_B4) * DST_SB4 + ((x4) / DST_B4) * DST_S4)
+#endif
+
index dfa6a7e..8ff8ba8 100644 (file)
 ///////////////////////// Input Index /////////////////////////
 inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x)
 {
-#if   INPUT0_SIMPLE && INPUT0_DIMS < 6
+#if   INPUT0_SIMPLE && INPUT0_DIMS < 5
     return GET_DATA_INDEX(INPUT0, b, f, y, x);
+#elif INPUT0_SIMPLE && INPUT0_DIMS == 5
+    return GET_DATA_INDEX_5D(INPUT0, b, f, z, y, x);
 #elif INPUT0_SIMPLE && INPUT0_DIMS == 6
     return GET_DATA_INDEX_6D(INPUT0, b, f, w, z, y, x);
 #elif defined INPUT0_LAYOUT_BS_F_BSV8__AF8  || \
@@ -38,6 +40,8 @@ inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x
     return GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, y, x);
 #elif defined INPUT0_LAYOUT_FS_B_YX_FSV32
     return GET_DATA_FS_B_YX_FSV32_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
 #else
 #error permute_ref.cl: input format - not supported
 #endif
@@ -51,8 +55,10 @@ inline uint FUNC(get_input3d_index)(uint b, uint f, uint z, uint y, uint x)
 ///////////////////////// Output Index /////////////////////////
 inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint x)
 {
-#if   OUTPUT_SIMPLE && OUTPUT_DIMS < 6
+#if   OUTPUT_SIMPLE && OUTPUT_DIMS < 5
     return GET_DATA_INDEX(OUTPUT, b, f, y, x);
+#elif INPUT0_SIMPLE && INPUT0_DIMS == 5
+    return GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
 #elif OUTPUT_SIMPLE && OUTPUT_DIMS == 6
     return GET_DATA_INDEX_6D(OUTPUT, b, f, w, z, y, x);
 #elif defined OUTPUT_LAYOUT_BS_F_BSV8__AF8  || \
@@ -72,16 +78,13 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint
     return GET_DATA_B_FS_YX_FSV4_INDEX(OUTPUT, b, f, y, x);
 #elif defined OUTPUT_LAYOUT_FS_B_YX_FSV32
     return GET_DATA_FS_B_YX_FSV32_INDEX(OUTPUT, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
 #else
 #error permute_ref.cl: output format - not supported
 #endif
 }
 
-inline uint FUNC(get_output3d_index)(uint b, uint f, uint z, uint y, uint x)
-{
-    return GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
-}
-
 KERNEL (permute_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
 {
     uint8 input_indices, output_indices;
@@ -107,12 +110,7 @@ KERNEL (permute_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
     uint input_offset;
     uint output_offset;
 
-#if   INPUT0_DIMS == 5
-     input_offset =  FUNC_CALL(get_input3d_index)(input_indices[0], input_indices[1], input_indices[4], input_indices[3], input_indices[2]);
-     output_offset = FUNC_CALL(get_output3d_index)(output_indices[0], output_indices[1], output_indices[4], output_indices[3], output_indices[2]);
-#else
     input_offset =  FUNC_CALL(get_input_index)(input_indices[0], input_indices[1], input_indices[5], input_indices[4], input_indices[3], input_indices[2]);
     output_offset = FUNC_CALL(get_output_index)(output_indices[0], output_indices[1], output_indices[5], output_indices[4], output_indices[3], output_indices[2]);
-#endif
     output[output_offset] = ACTIVATION(input[input_offset], ACTIVATION_PARAMS);
 }
index 97c3a20..2815a34 100644 (file)
@@ -39,9 +39,9 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
 #endif
 )
 {
-#if OUTPUT_LAYOUT_BFYX  || OUTPUT_LAYOUT_BYXF || OUTPUT_LAYOUT_BFZYX
+#if OUTPUT_LAYOUT_BFYX  || OUTPUT_LAYOUT_BYXF || OUTPUT_LAYOUT_BFZYX || OUTPUT_LAYOUT_BFZYX_F16
     const uint x    = (uint)get_global_id(0);
-#if  INPUT0_SIZE_Z == 1
+#if  OUTPUT_DIMS < 5
     const uint y    = (uint)get_global_id(1);
     const uint z = 0;
 #else
@@ -91,7 +91,7 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
 #endif
 
     const uint batch_and_feature_offset = GET_DATA_INDEX(INPUT0, b, f, 0, 0);
-#if  INPUT0_SIZE_Z != 1  // 3D
+#if  OUTPUT_DIMS == 5  // 3D
     for(uint k = 0; k < POOL_SIZE_Z; k++)
     {
         int input_offset_z = offset_z + k;
@@ -111,20 +111,24 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
                 bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
                 if(!zero)
                 {
-#if  INPUT0_SIZE_Z == 1
+#if  OUTPUT_DIMS < 5
                     const uint input_idx = batch_and_feature_offset + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH;
 #else
+  #if OUTPUT_LAYOUT_BFZYX_F16
+                    const uint input_idx = GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, input_offset_z, input_offset_y, input_offset_x);
+  #else
                     const uint input_idx = batch_and_feature_offset + input_offset_z*INPUT0_Z_PITCH + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH;
+  #endif
 #endif
 
 #if MAX_WITH_ARGMAX_POOLING
                     if(input[input_idx] > result)
                     {
-#if  INPUT0_SIZE_Z == 1
+#if  OUTPUT_DIMS < 5
                         const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
 #else
                         const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y *
-                                                               (input_offset_z + INPUT0_SIZE_Z * (f + INPUT0_FEATURE_NUM * b));
+                                                               (input_offset_z + INPUT0_SIZE_Z * (f + INPUT0_FEATURE_NUM * b)));
 #endif
                         arg_max_idx = input_idx_bfyx_no_padding;
                     }
@@ -138,7 +142,7 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
             }
         }
     }
-#if  INPUT0_SIZE_Z != 1 // 3D
+#if  OUTPUT_DIMS == 5 // 3D
         }
     }
 #endif
@@ -155,21 +159,21 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
 #endif
 #endif
 #else
-#if  INPUT0_SIZE_Z != 1  // 3D
+#if  OUTPUT_DIMS == 5  // 3D
     uint input_idx = GET_DATA_INDEX_5D(INPUT0, b, f, offset_z, offset_y, offset_x);
 #else
     uint input_idx = GET_DATA_INDEX(INPUT0, b, f, offset_y, offset_x);
 #endif
 
 #if MAX_WITH_ARGMAX_POOLING
-#if  INPUT0_SIZE_Z == 1
+#if  OUTPUT_DIMS < 5
     uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
 #else
     uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (offset_z + INPUT0_SIZE_Z *(f + INPUT0_FEATURE_NUM * b)));
 #endif
 #endif
 
-#if  INPUT0_SIZE_Z != 1  // 3D
+#if  OUTPUT_DIMS == 5  // 3D
     for(uint k = 0; k < POOL_SIZE_Z; k++)
     {
 #endif
@@ -194,7 +198,7 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
         input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X);
 #endif
     }
-#if  INPUT0_SIZE_Z != 1  // 3D
+#if  OUTPUT_DIMS == 5  // 3D
         input_idx += (INPUT0_Z_PITCH - POOL_SIZE_Y*INPUT0_Y_PITCH);
 #if MAX_WITH_ARGMAX_POOLING
         input_idx_bfyx_no_padding += (INPUT0_SIZE_Y - POOL_SIZE_Y);
@@ -215,7 +219,11 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output
     #endif
 #endif
 
+#if OUTPUT_LAYOUT_BFZYX_F16
+    const uint output_pos = GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
+#else
     const uint output_pos = GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
+#endif
     output[output_pos] = ACTIVATION(TO_UNIT_TYPE(result), ACTIVATION_PARAMS);
 
 #if MAX_WITH_ARGMAX_POOLING
 
 #include "include/common.cl"
 #include "include/data_types.cl"
-
-#if FP16_UNIT_USED
-#define ALIGNED_BLOCK_READ(ptr, byte_offset) as_half(intel_sub_group_block_read_us((const __global uint*)(ptr) + (byte_offset)))
-#else
-#define ALIGNED_BLOCK_READ(ptr, byte_offset) as_float(intel_sub_group_block_read((const __global uint*)(ptr) + (byte_offset)))
-#endif
+#include "include/fetch.cl"
 
 __attribute__((intel_reqd_sub_group_size(16)))
-KERNEL(quantize_ref)(const __global UNIT_TYPE* input,
-                     const __global UNIT_TYPE* input_low,
-                     const __global UNIT_TYPE* input_high,
-                     const __global UNIT_TYPE* output_low,
-                     const __global UNIT_TYPE* output_high,
+KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,
+                     const __global INPUT1_TYPE* input_low,
+                     const __global INPUT2_TYPE* input_high,
+                     const __global INPUT3_TYPE* output_low,
+                     const __global INPUT4_TYPE* output_high,
                            __global OUTPUT_TYPE* output)
 {
     const int b = get_global_id(0);
@@ -36,11 +31,6 @@ KERNEL(quantize_ref)(const __global UNIT_TYPE* input,
     const int y = get_global_id(2) / OUTPUT_SIZE_X;
 
 #if PACKED_BINARY_OUTPUT
-    const int input_offset = INPUT0_OFFSET
-                           + b*INPUT0_BATCH_PITCH
-                           + of*OC_BLOCK_SIZE*INPUT0_FEATURE_PITCH
-                           + y*INPUT0_Y_PITCH
-                           + x*INPUT0_X_PITCH;
     const int output_offset = OUTPUT_OFFSET
                             + b*OUTPUT_FEATURE_NUM_PACKED*OUTPUT_FEATURE_PITCH
                             + of*OUTPUT_FEATURE_PITCH
@@ -60,7 +50,7 @@ KERNEL(quantize_ref)(const __global UNIT_TYPE* input,
     int limit = min((int)OC_BLOCK_SIZE, (int)INPUT0_FEATURE_NUM);
     for (int f = 0; f < limit; f++)
     {
-        UNIT_TYPE val = input[input_offset + f*INPUT0_FEATURE_PITCH];
+        UNIT_TYPE val = input[INPUT0_GET_INDEX(b, of*OC_BLOCK_SIZE + f, y, x)];
         UNIT_TYPE threshold  = input_low[threshold_offset + ((of*OC_BLOCK_SIZE + f) % INPUT1_FEATURE_NUM)*INPUT1_FEATURE_PITCH];
 #if PER_CHANNEL_OUT_VAL
         int high_bit = output_high[of*OC_BLOCK_SIZE + f] == UNIT_VAL_ONE ? 1 : 0;
@@ -106,26 +96,29 @@ KERNEL(quantize_ref)(const __global UNIT_TYPE* input,
                                  + (y % INPUT4_SIZE_Y)*INPUT4_Y_PITCH
                                  + (x % INPUT4_SIZE_X)*INPUT4_X_PITCH;
 
-    UNIT_TYPE val = ALIGNED_BLOCK_READ(input, input_offset);
+    INPUT0_TYPE val = input[input_offset];
     if (x >= OUTPUT_SIZE_X || y >= OUTPUT_SIZE_Y)
         return;
 
-    UNIT_TYPE input_low_val  = input_low[input_low_offset];
-    UNIT_TYPE input_high_val  = input_high[input_high_offset];
-    UNIT_TYPE output_low_val  = output_low[output_low_offset];
-    UNIT_TYPE output_high_val  = output_high[output_high_offset];
+    INPUT0_TYPE input_low_val  = input_low[input_low_offset];
+    INPUT0_TYPE input_high_val  = input_high[input_high_offset];
+    INPUT0_TYPE output_low_val  = output_low[output_low_offset];
+    INPUT0_TYPE output_high_val  = output_high[output_high_offset];
+
+
     if (val <= input_low_val)
     {
-        output[output_offset] = output_low_val;
+        output[output_offset] = TO_OUTPUT_TYPE(output_low_val);
     }
     else if (val > input_high_val)
     {
-        output[output_offset] = output_high_val;
+        output[output_offset] = TO_OUTPUT_TYPE(output_high_val);
     }
     else
     {
-       output[output_offset] = round((val - input_low_val) / (input_high_val - input_low_val) * (LEVELS-1)) /
-                               (LEVELS-1) * (output_high_val - output_low_val) + output_low_val;
+       output[output_offset] = TO_OUTPUT_TYPE(round((val - input_low_val) / (input_high_val - input_low_val) * (LEVELS-1)) /
+                                              (LEVELS-1) * (output_high_val - output_low_val) + output_low_val);
     }
+
 #endif
 }
index 29d8333..d33065b 100644 (file)
 ///////////////////////// Input Index /////////////////////////
 inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x)
 {
-#if   INPUT0_SIMPLE && INPUT0_DIMS < 6
+#if   INPUT0_SIMPLE && INPUT0_DIMS < 5
     return GET_DATA_INDEX(INPUT0, b, f, y, x);
+#elif INPUT0_SIMPLE && INPUT0_DIMS == 5
+    return GET_DATA_INDEX_5D(INPUT0, b, f, z, y, x);
 #elif INPUT0_SIMPLE && INPUT0_DIMS == 6
     return GET_DATA_INDEX_6D(INPUT0, b, f, w, z, y, x);
 #elif defined INPUT0_LAYOUT_BS_F_BSV8__AF8  || \
@@ -42,6 +44,8 @@ inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x
     return GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, y, x);
 #elif defined INPUT0_LAYOUT_FS_B_YX_FSV32
     return GET_DATA_FS_B_YX_FSV32_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
 #else
 #error reorder_data.cl: input format - not supported
 #endif
@@ -51,13 +55,14 @@ inline uint FUNC(get_input3d_index)(uint b, uint f, uint z, uint y, uint x)
 {
     return GET_DATA_INDEX_5D(INPUT0, b, f, z, y, x);
 }
-
 ///////////////////////// Output Index /////////////////////////
 
 inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint x)
 {
-#if   OUTPUT_SIMPLE && OUTPUT_DIMS < 6
+#if   OUTPUT_SIMPLE && OUTPUT_DIMS < 5
     return GET_DATA_INDEX(OUTPUT, b, f, y, x);
+#elif OUTPUT_SIMPLE && OUTPUT_DIMS == 5
+    return GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
 #elif OUTPUT_SIMPLE && OUTPUT_DIMS == 6
     return GET_DATA_INDEX_6D(OUTPUT, b, f, w, z, y, x);
 #elif defined OUTPUT_LAYOUT_BS_F_BSV8__AF8  || \
@@ -77,6 +82,8 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint
     return GET_DATA_B_FS_YX_FSV4_INDEX(OUTPUT, b, f, y, x);
 #elif defined OUTPUT_LAYOUT_FS_B_YX_FSV32
     return GET_DATA_FS_B_YX_FSV32_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
 #else
 #error reorder_data.cl: output format - not supported
 #endif
@@ -86,7 +93,6 @@ inline uint FUNC(get_output3d_index)(uint b, uint f, uint z, uint y, uint x)
 {
     return GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
 }
-
 KERNEL (reorder_data)(
     const __global INPUT_REORDER_TYPE* input, 
     __global OUTPUT_REORDER_TYPE* output
@@ -137,7 +143,6 @@ KERNEL (reorder_data)(
     const uint input_idx  = FUNC_CALL(get_input_index)(b, f, w, z, y, x);
     const uint output_idx = FUNC_CALL(get_output_index)(ov[0],ov[1],ov[2],ov[3], ov[4], ov[5]);
 #endif
-
 #if defined MEAN_SUBTRACT_INSIDE_PARAMS
     float res = TO_MEAN_TYPE(input[input_idx]);
     res = MEAN_OP(res, VALUE_TO_SUBTRACT[f % VALUE_TO_SUBTRACT_SIZE]);
@@ -155,5 +160,5 @@ KERNEL (reorder_data)(
     CALC_TYPE res = TO_CALC_TYPE(input[input_idx]);
 #endif
 
-    output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res), NL_M, NL_N);
+    output[output_idx] = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res), ACTIVATION_PARAMS_TYPED);
 }
index 40e6a17..ba2fdc2 100644 (file)
@@ -22,7 +22,7 @@
 #error "Data binary reorder: unsupported input layout"
 #endif
 
-#if !OUTPUT_LAYOUT_B_FS_YX_32FP
+#if !OUTPUT_LAYOUT_BFYX && !OUTPUT_LAYOUT_B_FS_YX_32FP
 #error "Data binary reorder: unsupported output layout"
 #endif
 
@@ -39,21 +39,27 @@ KERNEL (reorder_data_binary)(const __global INPUT_REORDER_TYPE* input,
     const uint y = ((uint)(get_global_id(2))) / INPUT0_SIZE_X;
     const uint x = ((uint)(get_global_id(2))) % INPUT0_SIZE_X;
 
-    int output_index = OUTPUT_OFFSET
-                     + b * OUTPUT_PACKED_FEATURES_NUM * OUTPUT_FEATURE_PITCH
-                     + f * OUTPUT_FEATURE_PITCH
-                     + y * OUTPUT_Y_PITCH
-                     + x * OUTPUT_X_PITCH;
 
-#if BINARY_INPUT
+#if BINARY_INPUT && BINARY_OUTPUT
     int input_index = INPUT0_OFFSET
                     + b * INPUT_PACKED_FEATURES_NUM * INPUT0_FEATURE_PITCH
                     + f * INPUT0_FEATURE_PITCH
                     + y * INPUT0_Y_PITCH
                     + x * INPUT0_X_PITCH;
+    int output_index = OUTPUT_OFFSET
+                     + b * OUTPUT_PACKED_FEATURES_NUM * OUTPUT_FEATURE_PITCH
+                     + f * OUTPUT_FEATURE_PITCH
+                     + y * OUTPUT_Y_PITCH
+                     + x * OUTPUT_X_PITCH;
 
     output[output_index] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(input[input_index]), NL_M, NL_N);
-#else
+#elif BINARY_OUTPUT
+    int output_index = OUTPUT_OFFSET
+                     + b * OUTPUT_PACKED_FEATURES_NUM * OUTPUT_FEATURE_PITCH
+                     + f * OUTPUT_FEATURE_PITCH
+                     + y * OUTPUT_Y_PITCH
+                     + x * OUTPUT_X_PITCH;
+
     OUTPUT_TYPE res = 0x00000000;
     int limit = min((int)IFM_PACK_SIZE, (int)(INPUT0_FEATURE_NUM - f*IFM_PACK_SIZE));
     for (int c = 0; c < limit; c++)
@@ -65,11 +71,31 @@ KERNEL (reorder_data_binary)(const __global INPUT_REORDER_TYPE* input,
                         + y * INPUT0_Y_PITCH
                         + x * INPUT0_X_PITCH;
 
-        // TODO: make sure this is valid in all cases
         int bit = input[input_index] > UNIT_VAL_ZERO ? 1 : 0;
         res |= (bit << c);
     }
     output[output_index] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(res), NL_M, NL_N);
+#elif BINARY_INPUT
+    int input_index = INPUT0_OFFSET
+                    + b * INPUT_PACKED_FEATURES_NUM * INPUT0_FEATURE_PITCH
+                    + f * INPUT0_FEATURE_PITCH
+                    + y * INPUT0_Y_PITCH
+                    + x * INPUT0_X_PITCH;
+    int res = input[input_index];
+    int limit = min((int)IFM_PACK_SIZE, (int)(INPUT0_FEATURE_NUM - f*IFM_PACK_SIZE));
+    for (int c = 0; c < limit; c++)
+    {
+        int output_index = OUTPUT_OFFSET
+                         + b * OUTPUT_BATCH_PITCH
+                         + (f*IFM_PACK_SIZE + c) * OUTPUT_FEATURE_PITCH
+                         + y * OUTPUT_Y_PITCH
+                         + x * OUTPUT_X_PITCH;
+
+        int bit = (res >> c) & 0x00000001 > 0 ? 1 : -1;
+        output[output_index] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(bit), NL_M, NL_N);
+    }
+#else
+#error "Binary reorder is used without binary tensors"
 #endif
 
 }
index 1c5838f..b062a22 100644 (file)
@@ -120,9 +120,9 @@ KERNEL (reorder_data_byxf_f32_to_byx8_f4_i8)(
 #endif
 
     char4 out_vals;
-    out_vals.s0 = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res.s0), NL_M, NL_N);
-    out_vals.s1 = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res.s1), NL_M, NL_N);
-    out_vals.s2 = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res.s2), NL_M, NL_N);
+    out_vals.s0 = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res.s0), ACTIVATION_PARAMS_TYPED);
+    out_vals.s1 = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res.s1), ACTIVATION_PARAMS_TYPED);
+    out_vals.s2 = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res.s2), ACTIVATION_PARAMS_TYPED);
     out_vals.s3 = 0;
 
     __global uint* dst = (__global uint*)output;
index 055c040..2317180 100644 (file)
 ///////////////////////// Input Index /////////////////////////
 inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x)
 {
-#if   INPUT0_SIMPLE && INPUT0_DIMS < 6
+#if   INPUT0_SIMPLE && INPUT0_DIMS < 5
     return GET_DATA_INDEX(INPUT0, b, f, y, x);
+#elif INPUT0_SIMPLE && INPUT0_DIMS == 5
+    return GET_DATA_INDEX_5D(INPUT0, b, f, z, y, x);
 #elif INPUT0_SIMPLE && INPUT0_DIMS == 6
     return GET_DATA_INDEX_6D(INPUT0, b, f, w, z, y, x);
 #elif defined INPUT0_LAYOUT_BS_F_BSV8__AF8  || \
@@ -32,22 +34,21 @@ inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x
     return GET_DATA_BF8_XY16_INDEX(INPUT0, b, f, y, x);
 #elif defined INPUT0_LAYOUT_BFYX_F16
     return GET_DATA_BFYX_F16_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(INPUT0, b, f, z, y, x);
 #else
 #error reorder_data_fast_b1.cl: input format - not supported
 #endif
 }
 
-inline uint FUNC(get_input3d_index)(uint b, uint f, uint z, uint y, uint x)
-{
-    return GET_DATA_INDEX_5D(INPUT0, b, f, z, y, x);
-}
-
 ///////////////////////// Output Index /////////////////////////
 
 inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint x)
 {
-#if   OUTPUT_SIMPLE && OUTPUT_DIMS < 6
+#if   OUTPUT_SIMPLE && OUTPUT_DIMS < 5
     return GET_DATA_INDEX(OUTPUT, b, f, y, x);
+#elif OUTPUT_SIMPLE && OUTPUT_DIMS == 5
+    return GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
 #elif OUTPUT_SIMPLE && OUTPUT_DIMS == 6
     return GET_DATA_INDEX_6D(OUTPUT, b, f, w, z, y, x);
 #elif defined OUTPUT_LAYOUT_BS_F_BSV8__AF8  || \
@@ -57,16 +58,13 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint
     return GET_DATA_BF8_XY16_INDEX(OUTPUT, b, f, y, x);
 #elif defined OUTPUT_LAYOUT_BFYX_F16
     return GET_DATA_BFYX_F16_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_BFZYX_F16
+    return GET_DATA_BFZYX_F16_INDEX(OUTPUT, b, f, z, y, x);
 #else
 #error reorder_data_fast_b1.cl: output format - not supported
 #endif
 }
 
-inline uint FUNC(get_output3d_index)(uint b, uint f, uint z, uint y, uint x)
-{
-    return GET_DATA_INDEX_5D(OUTPUT, b, f, z, y, x);
-}
-
 KERNEL (reorder_data_fast_b1)(
     const __global INPUT_REORDER_TYPE* input, 
     __global OUTPUT_REORDER_TYPE* output
@@ -178,6 +176,26 @@ KERNEL (reorder_data_fast_b1)(
     tmp_data_idx  = data_idx / OUTPUT_SIZE_Z;
     const uint z = data_idx - tmp_data_idx * OUTPUT_SIZE_Z;
     const uint w = 0;
+#elif defined OUTPUT_LAYOUT_BFZYX_F16
+    uint tmp_data_idx = data_idx / OUTPUT_BATCH_NUM;
+    const uint b = data_idx - tmp_data_idx * OUTPUT_BATCH_NUM;
+    data_idx = tmp_data_idx;
+
+    tmp_data_idx = data_idx / OUTPUT_FEATURE_NUM;
+    const uint f = data_idx - tmp_data_idx * OUTPUT_FEATURE_NUM;
+    data_idx = tmp_data_idx;
+
+    tmp_data_idx = data_idx / OUTPUT_SIZE_X;
+    const uint x = data_idx - tmp_data_idx * OUTPUT_SIZE_X;
+    data_idx = tmp_data_idx;
+
+    tmp_data_idx  = data_idx / OUTPUT_SIZE_Y;
+    const uint y = data_idx - tmp_data_idx * OUTPUT_SIZE_Y;
+    data_idx = tmp_data_idx;
+
+    tmp_data_idx  = data_idx / OUTPUT_SIZE_Z;
+    const uint z = data_idx - tmp_data_idx * OUTPUT_SIZE_Z;
+    const uint w = 0;
 #elif defined OUTPUT_LAYOUT_BFWZYX
     uint tmp_data_idx = data_idx / OUTPUT_BATCH_NUM;
     const uint b = data_idx - tmp_data_idx * OUTPUT_BATCH_NUM;
@@ -225,21 +243,10 @@ KERNEL (reorder_data_fast_b1)(
     const uint input_idx  = data_idx;
     const uint output_idx = data_idx;
 #else
-#if defined OUTPUT_LAYOUT_BFZYX
-    uint8 ov = FUNC_CALL(reshape_dims3d)(b,f,z,y,x, OUTPUT_SIZE_Z, OUTPUT_SIZE_Y, OUTPUT_SIZE_X, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X, INPUT0_DIMS, OUTPUT_DIMS);
-    const uint input_idx  = FUNC_CALL(get_input3d_index)(b, f, z, y, x);
-    const uint output_idx = FUNC_CALL(get_output3d_index)(ov[0],ov[1],ov[2],ov[3],ov[4]);
-#elif INPUT0_DIMS == 5
-    uint8 ov = RESHAPE_DIMS(OUTPUT, INPUT0, b, f, w, z, y, x);
-    const uint input_idx  = FUNC_CALL(get_input3d_index)(ov[0],ov[1], ov[3], ov[4],ov[5]);
-    const uint output_idx  = FUNC_CALL(get_output_index)(b, f, w, z, y, x);
-#else
     uint8 ov = RESHAPE_DIMS(OUTPUT, INPUT0, b, f, w, z, y, x);
     const uint input_idx = FUNC_CALL(get_input_index)(ov[0],ov[1], ov[2], ov[3], ov[4],ov[5]);
     const uint output_idx  = FUNC_CALL(get_output_index)(b, f, w, z, y, x);
 #endif
-
-#endif
     
 #if   defined MEAN_SUBTRACT_INSIDE_PARAMS
     float res = TO_MEAN_TYPE(input[input_idx]);
@@ -252,5 +259,5 @@ KERNEL (reorder_data_fast_b1)(
     CALC_TYPE res = TO_CALC_TYPE(input[input_idx]);
 #endif
 
-    output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res), NL_M, NL_N);
+    output[output_idx] = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res), ACTIVATION_PARAMS_TYPED);
 }
index 870ff65..72ef21d 100644 (file)
@@ -94,6 +94,6 @@ KERNEL (reorder_data_to_yxfb_batched)(
         CALC_TYPE res = TO_CALC_TYPE(input[input_idx]);
     #endif
 
-        output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res), NL_M, NL_N);
+        output[output_idx] = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res), ACTIVATION_PARAMS_TYPED);
     }
 }
index d7efcaa..a3e3e6b 100644 (file)
 
 
 ///////////////////////// Input Index /////////////////////////
-inline uint FUNC(get_input_index)(uint o, uint i, uint y, uint x)
+inline uint FUNC(get_input_index)(uint o, uint i, uint z, uint y, uint x)
 {
-#if   INPUT0_SIMPLE
+#if   INPUT0_SIMPLE && INPUT0_DIMS <= 4
     return GET_FILTER_INDEX(INPUT0, o, i, y, x);
+#elif INPUT0_SIMPLE && INPUT0_DIMS == 5
+    return GET_FILTER_INDEX_5D(INPUT0, o, i, z, y, x);
 #elif defined INPUT0_LAYOUT_OS_IYX_OSV16    || \
       defined INPUT0_LAYOUT_OS_I_OSV16      || \
       defined INPUT0_LAYOUT_OS_I_OSV8__AI8  || \
@@ -54,6 +56,10 @@ inline uint FUNC(get_input_index)(uint o, uint i, uint y, uint x)
     return GET_FILTER_O_I_YX_I16_O16_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
 #elif defined INPUT0_LAYOUT_OIYX_O16
     return GET_FILTER_OIYX_O16(INPUT0, o, i, y, x);
+#elif defined INPUT0_LAYOUT_O_I_ZYX_I16_O16
+    return GET_FILTER_O_I_ZYX_I16_O16_INDEX(INPUT0, o, i, z, y, x, SUB_GROUP_SIZE);
+#elif defined INPUT0_LAYOUT_I_O_ZYX_O16_I16
+    return GET_FILTER_I_O_ZYX_O16_I16_INDEX(INPUT0, o, i, z, y, x, SUB_GROUP_SIZE);
 #else
 #error reorder_weights.cl: input format - not supported
 #endif
@@ -61,10 +67,12 @@ inline uint FUNC(get_input_index)(uint o, uint i, uint y, uint x)
 
 ///////////////////////// Output Index /////////////////////////
 
-inline uint FUNC(get_output_index)(uint o, uint i, uint y, uint x)
+inline uint FUNC(get_output_index)(uint o, uint i, uint z, uint y, uint x)
 { 
-#if   OUTPUT_SIMPLE
+#if   OUTPUT_SIMPLE && OUTPUT_DIMS <= 4
     return GET_FILTER_INDEX(OUTPUT, o, i, y, x);
+#elif OUTPUT_SIMPLE && OUTPUT_DIMS == 5
+    return GET_FILTER_INDEX_5D(OUTPUT, o, i, z, y, x);
 #elif defined OUTPUT_LAYOUT_OS_IYX_OSV16    || \
       defined OUTPUT_LAYOUT_OS_I_OSV16      || \
       defined OUTPUT_LAYOUT_OS_I_OSV8__AI8  || \
@@ -100,6 +108,10 @@ inline uint FUNC(get_output_index)(uint o, uint i, uint y, uint x)
     return GET_FILTER_O_I_YX_I16_O16_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
 #elif defined OUTPUT_LAYOUT_OIYX_O16
     return GET_FILTER_OIYX_O16(OUTPUT, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_O_I_ZYX_I16_O16
+    return GET_FILTER_O_I_ZYX_I16_O16_INDEX(OUTPUT, o, i, z, y, x, SUB_GROUP_SIZE);
+#elif defined OUTPUT_LAYOUT_I_O_ZYX_O16_I16
+    return GET_FILTER_I_O_ZYX_O16_I16_INDEX(OUTPUT, o, i, z, y, x, SUB_GROUP_SIZE);
 #else
 #error reorder_weights.cl: output format - not supported
 #endif
@@ -117,7 +129,7 @@ KERNEL (reorder_weights)(const __global INPUT0_TYPE* input, write_only image2d_t
     MAKE_VECTOR_TYPE(UNIT_TYPE, 4) input_val = (MAKE_VECTOR_TYPE(UNIT_TYPE, 4))(UNIT_VAL_ZERO, UNIT_VAL_ZERO, UNIT_VAL_ZERO, UNIT_VAL_ZERO);
     const int2 coord = (int2)(o, iyx);
     uint8 ir = RESHAPE_WEIGHT_DIMS(OUTPUT, INPUT0, o, i, 0, 0, y, x);
-    input_val.s0 = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir[0],ir[1],ir[4],ir[5])]);
+    input_val.s0 = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir[0],ir[1],ir[3],ir[4],ir[5])]);
     IMAGE_WRITE(output, coord, input_val);
 }
 #else
@@ -126,13 +138,20 @@ KERNEL (reorder_weights)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE
     const unsigned o = get_global_id(0);
     const unsigned i = get_global_id(1);
 #if   OUTPUT_DIMS == 2
+    const unsigned z = 0;
     const unsigned y = 0;
     const unsigned x = 0;
 #elif OUTPUT_DIMS == 4
+    const unsigned z = 0;
     const unsigned y = get_global_id(2) / INPUT0_SIZE_X;
     const unsigned x = get_global_id(2) % INPUT0_SIZE_X;
+#elif OUTPUT_DIMS == 5
+    const unsigned zyx = get_global_id(2);
+    const unsigned x = zyx % INPUT0_SIZE_X;
+    const unsigned y = (zyx / INPUT0_SIZE_X) % INPUT0_SIZE_Y;
+    const unsigned z = (zyx / INPUT0_SIZE_X) / INPUT0_SIZE_Y;
 #endif
-    uint8 ir = RESHAPE_WEIGHT_DIMS(OUTPUT, INPUT0, o, i, 0, 0, y, x);
-    output[FUNC_CALL(get_output_index)(o, i, y, x)] = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir[0],ir[1],ir[4],ir[5])]);
+    uint8 ir = RESHAPE_WEIGHT_DIMS(OUTPUT, INPUT0, o, i, 0, z, y, x);
+    output[FUNC_CALL(get_output_index)(o, i, z, y, x)] = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir[0],ir[1],ir[3],ir[4],ir[5])]);
 }
 #endif
index 51b5c9e..8b5820f 100644 (file)
@@ -350,9 +350,22 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
         };
         bool is_common_5d_layout = is_common_nd_layout(base_5d_channels, layout);
         if (is_common_5d_layout) {
-            definitions.push_back({ _name + "_GET_INDEX(b, f, z, y, x)",  "GET_DATA_INDEX_5D("+_name+", b, f, z, y, x)" });
-            definitions.push_back({ _name + "_GET_INDEX_SAFE(b, f, z, y, x)",  "GET_DATA_INDEX_5D_SAFE("+_name+", b, f, z, y, x)" });
-            definitions.push_back({ _name + "_GET_INDEX_RAW(b, f, z, y, x)",  "GET_DATA_INDEX_5D_RAW("+_name+", b, f, z, y, x)" });
+            auto index_func_name = _name + "_GET_INDEX(b, f, z, y, x)";
+            auto safe_index_func_name = _name + "_GET_INDEX_SAFE(b, f, z, y, x)";
+            auto raw_index_func_name = _name + "_GET_INDEX_RAW(b, f, z, y, x)";
+            if (_tensor.SimpleLayout()) {
+                definitions.push_back({ index_func_name,  "GET_DATA_INDEX_5D("+_name+", b, f, z, y, x)" });
+                definitions.push_back({ safe_index_func_name,  "GET_DATA_INDEX_5D_SAFE("+_name+", b, f, z, y, x)" });
+                definitions.push_back({ raw_index_func_name,  "GET_DATA_INDEX_5D_RAW("+_name+", b, f, z, y, x)" });
+            } else if (layout == DataLayout::bfzyx_f16) {
+                definitions.push_back({ index_func_name, "GET_DATA_BFZYX_F16_INDEX(" + _name + ", b, f, z, y, x)" });
+                definitions.push_back({ raw_index_func_name, "GET_DATA_BFZYX_F16_INDEX(" + _name + ", b, f, z, y, x)" });
+                definitions.push_back({ safe_index_func_name, "GET_DATA_BFZYX_F16_INDEX(" + _name + ", b, f, z, y, x)" });
+            } else {
+                definitions.push_back({ index_func_name,  "GET_DATA_INDEX_5D_RAW(" + _name + ", b, f, z, y, x)" });
+                definitions.push_back({ safe_index_func_name,  "GET_DATA_INDEX_5D_RAW(" + _name + ", b, f, z, y, x)" });
+                definitions.push_back({ raw_index_func_name,  "GET_DATA_INDEX_5D_RAW(" + _name + ", b, f, z, y, x)" });
+            }
         } else {
             // TODO: implement support of non-default layouts with 5 channels
             assert(0);
@@ -428,7 +441,8 @@ std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, const Weig
 
 JitConstants MakeActivationJitConstants(ActivationFunction activation_function,
                                         const std::string& suffix,
-                                        bool use_type_parameter) {
+                                        bool use_type_parameter,
+                                        bool disable_type_conversion) {
     std::string name = "ACTIVATION_FUNC" + suffix;
     JitConstants jitConstants = {};
 
@@ -480,267 +494,193 @@ JitConstants MakeActivationJitConstants(ActivationFunction activation_function,
     std::string macro_def = name + (use_type_parameter ? "(jit_type, input, m, n)" : "(input, m, n)");
     std::string macro_def_grad = name + (use_type_parameter ? "(jit_type, input_grad, input, m, n)"
                                                             : "(input_grad, input, m, n)");
+    std::string macro_def_params = use_type_parameter ? "(jit_type, input, params)" : "(input, params)";
+
+    jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
+
     // TODO: use native_exp and use cast for APL
     switch (activation_function) {
         case ActivationFunction::LOGISTIC:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, (one / (one + exp(neg(input)))).str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::HYPERBOLIC_TAN:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(tanh(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::RELU:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, max_func(zero, input).str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::RELU_NEGATIVE_SLOPE: {
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
-            const JitTerm slope = to_type("m"_jit);
+            const JitTerm slope = disable_type_conversion ? "m"_jit : to_type("m"_jit);
             jitConstants.AddConstant(MakeJitConstant(
                 macro_def,
                 ternary(isinf(slope),
                         ternary(input.ge(zero), input, neg(slope)),
                         max_func(input, zero) + (slope * min_func(input, zero)))
                     .str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         }
         case ActivationFunction::ELU: {
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
-            auto alpha = "m"_jit;
+            auto alpha = disable_type_conversion ? "m"_jit : to_type("m"_jit);
             jitConstants.AddConstant(MakeJitConstant(
                 macro_def,
-                (max_func(input, zero) + (to_type(alpha) * (exp(min_func(input, zero)) - one)))
+                (max_func(input, zero) + (alpha * (exp(min_func(input, zero)) - one)))
                     .str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         }
-        case ActivationFunction::CLAMP:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
+        case ActivationFunction::CLAMP: {
+            const JitTerm m = disable_type_conversion ? "m"_jit : to_type("m"_jit);
+            const JitTerm n = disable_type_conversion ? "n"_jit : to_type("n"_jit);
             jitConstants.AddConstant(MakeJitConstant(
-                macro_def,
-                max_func(to_type("m"_jit), min_func(to_type("n"_jit), input)).str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
+                 macro_def,
+                 max_func(m, min_func(n, input)).str()));
             break;
+        }
         case ActivationFunction::SOFTRELU:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, log(one + exp(input)).str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ABS:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(fabs(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
-        case ActivationFunction::LINEAR:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
-            jitConstants.AddConstant(MakeJitConstant(macro_def, (to_type("m"_jit) * input + to_type("n"_jit)).str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
+        case ActivationFunction::LINEAR: {
+            const JitTerm m = disable_type_conversion ? "m"_jit : to_type("m"_jit);
+            const JitTerm n = disable_type_conversion ? "n"_jit : to_type("n"_jit);
+            jitConstants.AddConstant(MakeJitConstant(macro_def, (m * input + n).str()));
             break;
+        }
         case ActivationFunction::SQUARE:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(input*input)"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::SQRT:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(sqrt(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::SIN:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(sin(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ASIN:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(asin(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::SINH:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(sinh(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ASINH:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(asinh(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::COS:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(cos(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ACOS:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(acos(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::COSH:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(cosh(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ACOSH:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(acosh(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::LOG:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(log(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::LOG2:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(log2(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::EXP:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(exp(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
-        case ActivationFunction::POW:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
-            jitConstants.AddConstant(MakeJitConstant(macro_def, "(pow(input," + (to_type("m"_jit).str()) + "))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
+        case ActivationFunction::POW: {
+            const JitTerm m = disable_type_conversion ? "m"_jit : to_type("m"_jit);
+            jitConstants.AddConstant(MakeJitConstant(macro_def, "(pow(input," + m.str() + "))"));
             break;
+        }
         case ActivationFunction::RELU_GRAD:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(
                 macro_def_grad,
                 ("input_grad"_jit * ternary(input.gt(zero), one, zero)).str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input_grad, input, params)", name + "(input_grad, input, params)"));
+            macro_def_params = use_type_parameter ? "(jit_type, input_grad, input, params)" : "(input_grad, input, params)";
             break;
         case ActivationFunction::RELU_NEGATIVE_SLOPE_GRAD: {
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
-            auto slope = "m"_jit;
+            const JitTerm slope = disable_type_conversion ? "m"_jit : to_type("m"_jit);
             jitConstants.AddConstant(MakeJitConstant(
                 macro_def_grad,
                 ("input_grad"_jit * (ternary(input.gt(zero), one, zero) + (to_type(slope) * ternary(input.le(zero), one, zero))))
                     .str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input_grad, input, params)", name + "(input_grad, input, params)"));
+            macro_def_params = use_type_parameter ? "(jit_type, input_grad, input, params)" : "(input_grad, input, params)";
             break;
         }
         case ActivationFunction::NONE_GRAD:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def_grad, "input_grad"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input_grad, input, params)", name + "(input_grad, input, params)"));
+            macro_def_params = use_type_parameter ? "(jit_type, input_grad, input, params)" : "(input_grad, input, params)";
             break;
         case ActivationFunction::TAN:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(tan(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ATAN:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(atan(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ATANH:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(atanh(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::FLOOR:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(floor(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::CEIL:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(ceil(input))"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::NEGATIVE:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "(-input)"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::ERF:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "erf(input)"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::HARD_SIGMOID: {
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
-            auto alpha = "m"_jit;
-            auto beta = "n"_jit;
+            auto alpha = disable_type_conversion ? "m"_jit : to_type("m"_jit);
+            auto beta =  disable_type_conversion ? "n"_jit : to_type("n"_jit);
             jitConstants.AddConstant(MakeJitConstant(
                     macro_def,
                     max_func(zero, min_func(one, (JitTerm)((alpha * input + beta).str()))).str()));
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         }
         case ActivationFunction::SIGN:
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(
                     macro_def,
                     ternary(input.gt(zero), one, ternary(input.eq(zero), zero, neg(one))).str()));
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::RECIPROCAL:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, (one / input).str()));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::SELU: {
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
-            auto alpha = "m"_jit;
-            auto gamma = "n"_jit;
+            auto alpha = disable_type_conversion ? "m"_jit : to_type("m"_jit);
+            auto gamma = disable_type_conversion ? "n"_jit : to_type("n"_jit);
             jitConstants.AddConstant(MakeJitConstant(
                     macro_def,
                     ternary(input.le(zero), gamma * (alpha * exp(input) - alpha), gamma * input).str()));
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         }
         case ActivationFunction::SOFTPLUS: {
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(
                     macro_def,
                     log(exp(input) + one).str()));
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         }
         case ActivationFunction::SOFTSIGN: {
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(
                     macro_def,
                     (input / (one + abs_func(input))).str()));
-            jitConstants.AddConstant(
-                    MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         }
         case ActivationFunction::NOT:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(
                 macro_def,
                 ternary(input.eq(zero), one, zero)
                     .str()));  // the workaround for OpenCL's vector type result (!input)
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
         case ActivationFunction::NONE:
         default:
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "NL_M" + suffix + ", NL_N" + suffix));
             jitConstants.AddConstant(MakeJitConstant(macro_def, "input"));
-            jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + "(input, params)", name + "(input, params)"));
             break;
     }
+
+    jitConstants.AddConstant(MakeJitConstant("ACTIVATION" + suffix + macro_def_params, name + macro_def_params));
+
     return jitConstants;
 }
 
@@ -924,14 +864,56 @@ JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& ma
 
 JitConstants MakeActivationJitConstants(const base_activation_params& params,
                                         const std::string& suffix,
-                                        bool use_type_parameter) {
+                                        bool use_type_parameter,
+                                        bool disable_type_conversion) {
     auto jitConstants = JitConstants{MakeJitConstant("NL_M" + suffix, params.m),
                                      MakeJitConstant("NL_N" + suffix, params.n)};
     jitConstants.Merge(MakeActivationJitConstants(
-        params.function, suffix, use_type_parameter));
+        params.function, suffix, use_type_parameter, disable_type_conversion));
     return jitConstants;
 }
 
+JitConstants MakeActivationJitConstants(std::vector<kernel_selector::base_activation_params> params,
+                                        const std::string& suffix,
+                                        bool use_type_parameter,
+                                        bool disable_type_conversion) {
+    JitConstants res = {};
+    if (params.empty()) {
+        return MakeActivationJitConstants({ActivationFunction::NONE, 0.f, 0.f}, suffix, use_type_parameter, disable_type_conversion);
+    }
+    std::string res_activation = "";
+    std::string activation_params = "";
+    for (size_t i = 0; i < params.size(); i++) {
+        std::string activation_suffix = suffix + "_" + std::to_string(i);
+        auto jitConstants = JitConstants{MakeJitConstant("NL_M" + activation_suffix, params[i].m),
+                                         MakeJitConstant("NL_N" + activation_suffix, params[i].n)};
+        jitConstants.Merge(MakeActivationJitConstants(
+                params[i].function, activation_suffix, use_type_parameter, disable_type_conversion));
+        res.Merge(jitConstants);
+
+        if (i == 0) {
+            if (params[i].gradient) {
+                activation_params = use_type_parameter ? "(jit_type, input_grad, input, params)" : "(input_grad, input, params)";
+            } else {
+                activation_params = use_type_parameter ? "(jit_type, input, params)" : "(input, params)";
+            }
+            res_activation = "ACTIVATION_FUNC" + activation_suffix + activation_params;
+        } else {
+            res_activation = "ACTIVATION" + activation_suffix + "(" + (use_type_parameter ? "jit_type, " : "") +
+                             (params[i].gradient ? "input_grad, " : "") +
+                             res_activation + ", ACTIVATION_PARAMS" + activation_suffix + ")";
+        }
+    }
+    if (params[params.size() - 1].gradient) {
+        activation_params = use_type_parameter ? "(jit_type, input_grad, input, params)" : "(input_grad, input, params)";
+    } else {
+        activation_params = use_type_parameter ? "(jit_type, input, params)" : "(input, params)";
+    }
+    res.AddConstant(MakeJitConstant("ACTIVATION_PARAMS" + suffix, "ACTIVATION_PARAMS" + suffix + "_0"));
+    res.AddConstant(MakeJitConstant("ACTIVATION" + suffix + activation_params, res_activation));
+    return res;
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // MakeLoopUnrollParamsJitConstants
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
index 7d66bcd..c6890f7 100644 (file)
@@ -275,7 +275,16 @@ public:
 // the logic.
 JitConstants MakeActivationJitConstants(const base_activation_params& params,
                                         const std::string& suffix = "",
-                                        bool use_type_parameter = false);
+                                        bool use_type_parameter = false,
+                                        bool disable_type_conversion = false);
+JitConstants MakeActivationJitConstants(ActivationFunction activation_function,
+                                        const std::string& suffix,
+                                        bool use_type_parameter,
+                                        bool disable_type_conversion = false);
+JitConstants MakeActivationJitConstants(std::vector<kernel_selector::base_activation_params> params,
+                                        const std::string& suffix = "",
+                                        bool use_type_parameter = false,
+                                        bool disable_type_conversion = false);
 JitConstants MakeBaseParamsJitConstants(const base_params& params);
 JitConstants MakeLoopUnrollParamsJitConstants(uint32_t loopCount);
 JitConstants MakeTypeJitConstants(Datatype dataType, const std::string& macroName);
index 72eefb4..fda7821 100644 (file)
@@ -27,8 +27,8 @@ struct training_params : public weight_bias_params {
     explicit training_params(KernelType kt) : weight_bias_params(kt) {}
 
     bool use_momentum = false;
-    float weights_decay;
-    float momentum_factor;
+    float weights_decay = 0.0;
+    float momentum_factor = 0.0;
 
     ParamsKey GetParamsKey() const override;
 };
index 34d0420..971e17f 100644 (file)
@@ -58,7 +58,7 @@ JitConstants KernelBase::MakeBaseParamsJitConstants(const base_params& params) c
 
     // for activation function
     jit.Merge(MakeUnitTypeJitConstants(unitType));
-    jit.Merge(MakeActivationJitConstants(params.activation));
+    jit.Merge(MakeActivationJitConstants(params.activations));
 
     for (size_t i = 0; i < params.inputs.size(); i++) {
         jit.AddConstant(MakeJitConstant("INPUT" + toCodeString(i), params.inputs[i]));
@@ -69,4 +69,72 @@ JitConstants KernelBase::MakeBaseParamsJitConstants(const base_params& params) c
     return jit;
 }
 
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// MakeBaseParamsJitConstants
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_params &params,
+                                                  const std::vector<FusedOpsConfiguration> &conf) const {
+    JitConstants jit = {};
+
+    if (conf.empty())
+        return jit;
+
+    std::string input_decls = "";
+    std::vector<std::string> fused_ops;
+    std::vector<std::string> names;
+    for (const auto &c : conf) {
+        fused_ops.emplace_back("");
+        names.push_back(c.input_var_name);
+    }
+
+    for (size_t i = 0; i < params.fused_ops.size(); i++) {
+        auto& fused_dep = params.fused_ops[i];
+        for (size_t j = 0; j < conf.size(); j++) {
+            std::string out_var = "";
+            jit.Merge(fused_dep.MakeLoadJitConstants(conf[j]));
+            jit.Merge(fused_dep.MakeOpJitConstants(conf[j], names[j], out_var));
+            names[j] = out_var;
+
+            fused_ops[j] += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + conf[j].suffix;
+            fused_ops[j] += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + conf[j].suffix;
+        }
+    }
+
+    jit.Merge(MakeFusedOpsDeclsJitConstants(params, conf));
+
+    for (size_t j = 0; j < conf.size(); j++) {
+        jit.AddConstant(MakeJitConstant("FUSED_OPS" + conf[j].suffix, fused_ops[j]));
+        jit.AddConstant(MakeJitConstant("FINAL_NAME" + conf[j].suffix, names[j]));
+    }
+
+    return jit;
+}
+
+JitConstants KernelBase::MakeFusedOpsDeclsJitConstants(const kernel_selector::base_params &params,
+                                                       const std::vector<FusedOpsConfiguration> &conf) const {
+    JitConstants jit = {};
+
+    if (conf.empty())
+        return jit;
+
+    std::string input_decls = "";
+    for (size_t i = 0; i < params.fused_ops.size(); i++) {
+        auto& fused_dep = params.fused_ops[i];
+        std::string op_type = fused_dep.GetTypeStr();
+
+        jit.Merge(fused_dep.MakeFusedTensorJitConstants(conf[0]));
+        jit.Merge(fused_dep.MakeInputDeclsJitConstants(conf[0]));
+        if (!params.fused_ops[i].tensors.empty()) {
+            std::string optional_comma = (!input_decls.empty() ? "," : "");
+            input_decls += optional_comma + "\\\n\tFUSED_OP" + std::to_string(i) + "_DECLS";
+        }
+    }
+
+    jit.AddConstant(MakeJitConstant("FUSED_OPS_DECLS", input_decls));
+    jit.AddConstant(MakeJitConstant("HAS_FUSED_OPS", true));
+    jit.AddConstant(MakeJitConstant("HAS_FUSED_OPS_DECLS", !input_decls.empty()));
+
+    return jit;
+}
+
 }  // namespace kernel_selector
index 30a36f6..f33306f 100644 (file)
@@ -21,6 +21,7 @@
 #include "jitter.h"
 #include "primitive_db.h"
 #include <string>
+#include <vector>
 
 namespace kernel_selector {
 using primitive_db = kernel_selector::gpu::cache::primitive_db;
@@ -52,6 +53,8 @@ protected:
     static size_t UniqeID() { return counter++; }  // TODO: use interlocked
     virtual Datatype GetUnitType(const base_params& params) const;
     JitConstants MakeBaseParamsJitConstants(const base_params& params) const;
+    virtual JitConstants MakeFusedOpsJitConstants(const base_params &params, const std::vector<FusedOpsConfiguration> &conf) const;
+    virtual JitConstants MakeFusedOpsDeclsJitConstants(const base_params &params, const std::vector<FusedOpsConfiguration> &conf) const;
 
 private:
     static size_t counter;
index f1747e4..694d59a 100644 (file)
 
 #pragma once
 #include <vector>
+#include <chrono>
 
 namespace kernel_selector {
 class KernelRunnerInterface {
 public:
     // Gets a list of kernels, executes them and returns the run time of each kernel (in nano-seconds).
-    virtual std::vector<uint64_t> run_kernels(const kernel_selector::KernelsData& kernelsData) = 0;
+    virtual std::vector<std::chrono::nanoseconds> run_kernels(const kernel_selector::KernelsData& kernelsData) = 0;
 
     virtual ~KernelRunnerInterface() = default;
 };
-}  // namespace kernel_selector
\ No newline at end of file
+}  // namespace kernel_selector
index 829e18d..07bbf96 100644 (file)
@@ -188,10 +188,10 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params,
             if (implKey.Support(requireKey) && implKey.TuningSupport()) {
                 try {
                     KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
-                    std::vector<uint64_t> runTimes = options.tuningParams.runner->run_kernels(kds);
+                    auto runTimes = options.tuningParams.runner->run_kernels(kds);
 
                     for (size_t i = 0; i < kds.size(); i++) {
-                        kds[i].runTime = runTimes[i];
+                        kds[i].runTime = runTimes[i].count();
                         if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime) {
                             kernelsData = {kds[i]};
                             kernelName = implementation->GetName();
@@ -211,10 +211,10 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params,
                 if (implKey.Support(requireKey) && !implKey.TuningSupport()) {
                     try {
                         KernelsData kds = implementation->GetKernelsDataForAutoTune(params, options);
-                        std::vector<uint64_t> runTimes = options.tuningParams.runner->run_kernels(kds);
+                        auto runTimes = options.tuningParams.runner->run_kernels(kds);
 
                         for (size_t i = 0; i < kds.size(); i++) {
-                            kds[i].runTime = runTimes[i];
+                            kds[i].runTime = runTimes[i].count();
                             if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime) {
                                 kernelsData = {kds[i]};
                                 kernelName = implementation->GetName();
index a108d53..568da16 100644 (file)
@@ -110,6 +110,7 @@ std::string toString(DataLayout l) {
         case kernel_selector::DataLayout::bfzyx:                return "BFZYX";
         case kernel_selector::DataLayout::fs_b_yx_fsv32:        return "FS_B_YX_FSV32";
         case kernel_selector::DataLayout::bfwzyx:               return "BFWZYX";
+        case kernel_selector::DataLayout::bfzyx_f16:            return "BFZYX_F16";
         default:
             return "";
     }
@@ -311,6 +312,7 @@ std::string toString(WeightsLayout layout) {
         case WeightsLayout::winograd_6x3_s1_fused_weights:          return "WINOGRAD_6x3_S1_FUSED_WEIGHTS";
         case WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb: return "IMAGE_2D_WEIGHTS_WINOGRAD_6x3_S1_FBXYB";
         case WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb: return "IMAGE_2D_WEIGHTS_WINOGRAD_6x3_S1_XFBYB";
+        case WeightsLayout::dlstm_dir_io:                           return "DLSTM_DIR_IO";
         case WeightsLayout::os_is_yx_isa8_osv8_isv4:                return "OS_IS_YX_ISA8_OSV8_ISV4";
         case WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4:  return "OS_IS_YX_ISA8_OSV8_ISV4_SWIZZLED_BY_4";
         case WeightsLayout::is_o_yx_isv32:                          return "IS_O_YX_ISV32";
@@ -321,6 +323,8 @@ std::string toString(WeightsLayout layout) {
         case WeightsLayout::os_is_yx_osv32_isv32p:                  return "OS_IS_YX_OSV32_ISV32P";
         case WeightsLayout::oizyx:                                  return "OIZYX";
         case WeightsLayout::bf_lyx_yx:                              return "BF_LYX_YX";
+        case WeightsLayout::o_i_zyx_i16_o16:                        return "O_I_ZYX_I16_O16";
+        case WeightsLayout::i_o_zyx_o16_i16:                        return "I_O_ZYX_O16_I16";
         default: throw std::invalid_argument("Failed to convert WeightsLayout " + std::to_string(layout) + " to string");
     }
 }
index c693449..112a0c1 100644 (file)
@@ -17,6 +17,7 @@
 #include "kernel_selector_common.h"
 #include <sstream>
 #include <string>
+#include "jitter.h"
 
 namespace kernel_selector {
 
@@ -526,7 +527,13 @@ std::string base_activation_params::to_string() const {
 std::string base_params::to_string() const {
     std::stringstream s;
     s << Params::to_string() << "_";
-    s << activation.to_string() << "_";
+    // TODO: here should be loop through all fused Activations but it affects on result hash, which used by autoTune
+    // option, that why only first Activation is used
+    if (activations.size() >= 1)
+        s << activations[0].to_string() << "_";
+    if (activations.empty()) {
+        s << "m" << 0.f << "_n" << 0.f << "_" << toString(ActivationFunction::NONE) << "_";
+    }
 
     for (auto input : inputs) {
         s << toString(input) << "_";
@@ -535,4 +542,243 @@ std::string base_params::to_string() const {
 
     return s.str();
 }
+
+
+std::string base_params::fused_operation_desc::GetTypeStr() const {
+    switch (type) {
+        case Type::ELTWISE: return "eltwise";
+        case Type::SCALE: return "scale";
+        case Type::QUANTIZE: return "quantize";
+        case Type::ACTIVATION: return "activation";
+        case Type::UNDEFINED: return "";
+        default: return "";
+    }
+}
+
+JitConstants base_params::fused_operation_desc::MakeFusedTensorJitConstants(const FusedOpsConfiguration& /*conf*/) const {
+    JitConstants jit{};
+    for (size_t op_input_id = 0; op_input_id < tensors.size(); op_input_id++) {
+        std::string name = GetInputTensorName(op_input_id);
+        jit.AddConstant(MakeJitConstant(name, tensors[op_input_id]));
+    }
+    jit.AddConstant(MakeJitConstant(GetOutputTensorName(), output_tensor));
+    return jit;
+}
+
+JitConstants base_params::fused_operation_desc::MakeInputDeclsJitConstants(const FusedOpsConfiguration& /*conf*/) const {
+    JitConstants jit = {};
+
+    std::string input_decls = "";
+    for (size_t op_input_id = 0; op_input_id < tensors.size(); op_input_id++) {
+        std::string ptr_name = GetInputPtrName(op_input_id);
+        input_decls += "\\\n\tconst __global " + toCLType(tensors[op_input_id].GetDType()) +
+                       "* " + ptr_name + (op_input_id == tensors.size() - 1 ? "" : ",");
+    }
+
+    jit.AddConstant(MakeJitConstant("FUSED_OP"+std::to_string(op_id)+"_DECLS", input_decls));
+    return jit;
+}
+
+JitConstants base_params::fused_operation_desc::MakeLoadJitConstants(const FusedOpsConfiguration& conf) const {
+    JitConstants jit = {};
+
+    auto vec_size = conf.vec_size;
+    auto idx = conf.bfyx_idx_order;
+
+    std::string load_decls = "";
+    static int i = 0;
+    bool reuse_index = type == Type::QUANTIZE;
+    std::string reused_idx = "reused_idx_" + std::to_string(i++);
+    if (reuse_index) {
+        load_decls += "\\\n\tint " + reused_idx + " = " +  GetIdx(0, idx_desc{idx}, conf.safe_load) + ";";
+    }
+
+    for (size_t op_input_id = 0; op_input_id < tensors.size(); op_input_id++) {
+        if (type == Type::QUANTIZE && tensors.size() > 4 &&(op_input_id == 2 || op_input_id == 3))
+            continue;
+        load_decls += "\\\n\t" + GetInputTypeName(op_input_id, vec_size) + " " + GetInputVarName(op_input_id) + " = " +
+                      GetJitLoad(conf, op_input_id, reuse_index, reused_idx) + ";";
+    }
+    jit.AddConstant(MakeJitConstant("FUSED_OP"+std::to_string(op_id)+"_LOAD" + conf.suffix, load_decls));
+
+    return jit;
+}
+
+JitConstants base_params::fused_operation_desc::MakeOpJitConstants(const FusedOpsConfiguration& conf,
+                                                                   std::string in_var, std::string& out_var) const {
+    JitConstants jit = {};
+
+    std::string op_decls = "";
+    auto typed_activation = conf.typed_activation;
+    auto vec_size = conf.vec_size;
+    auto idx = conf.bfyx_idx_order;
+
+    out_var = GetOutputVarName(in_var);
+    switch (type) {
+        case Type::SCALE:
+            op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + GetInputVarName(0) + " * " + in_var + ";";
+            if (tensors.size() > 1)
+                op_decls += "\\\n\t" + out_var + " += " + GetInputVarName(1) + ";";
+            break;
+        case Type::ELTWISE:
+            op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(GetInputVarName(0), vec_size) +
+                        " + " + ConvertToOutputType(in_var, vec_size) + ";";
+            break;
+        case Type::QUANTIZE:
+            op_decls += "\\\n\t" + in_var + " = min(max(" + GetInputVarName(0) + ", " + in_var + "), " + GetInputVarName(1)+");";
+            op_decls += "\\\n\t" + in_var + " = round(" + in_var + "*" + GetInputVarName(4) + " + " + GetInputVarName(5) + ");";
+            op_decls += "\\\n\t" + in_var + " = " + in_var + "*" + GetInputVarName(6) + " + " + GetInputVarName(7) + ";";
+            op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputTypeSat(in_var, vec_size) +";";
+            break;
+        case Type::ACTIVATION:
+            op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_var + ";";
+            // Do nothing since activation call will be added later
+            break;
+        default: break;
+    }
+
+    if (activation.function != ActivationFunction::NONE) {
+        auto suffix = "_FUSED_OP"+std::to_string(op_id) + conf.suffix;
+        if (tensors.size() == 1) {
+            jit.Merge(JitConstants{MakeJitConstant("NL_M" + suffix, GetInputVarName(0)),
+                                   MakeJitConstant("NL_N" + suffix, activation.n)});
+        } else {
+            jit.Merge(JitConstants{MakeJitConstant("NL_M" + suffix, activation.m),
+                                   MakeJitConstant("NL_N" + suffix, activation.n)});
+        }
+        // Disable type casts in activation, since current jit generator for activation don't respect vector size of parameters.
+        // So conversion is explicitly done in params declaration
+        jit.Merge(MakeActivationJitConstants(activation.function, suffix, false, true));
+        if (typed_activation) {
+            std::string params = ConvertToOutputType("NL_M" + suffix, vec_size) + ","+ ConvertToOutputType("NL_N" + suffix, vec_size);
+            op_decls += "\\\n\t" + out_var + " = ACTIVATION_FUNC" + suffix + "(" + out_var + ", " + params + ");";
+        } else {
+            op_decls += "\\\n\t" + out_var + " = ACTIVATION" + suffix + "(" + out_var + ", ACTIVATION_PARAMS" + suffix + ");";
+        }
+    }
+
+    jit.AddConstant(MakeJitConstant("FUSED_OP"+std::to_string(op_id)+"_ACTION" + conf.suffix, op_decls));
+
+    return jit;
+}
+
+std::string base_params::fused_operation_desc::GetInputTensorName(size_t input_id) const {
+    return "FUSED_OP_" + std::to_string(op_id) + "_INPUT" + std::to_string(input_id);
+}
+
+std::string base_params::fused_operation_desc::GetOutputTensorName() const {
+    return "FUSED_OP_" + std::to_string(op_id) + "_OUTPUT";
+}
+
+std::string base_params::fused_operation_desc::GetInputTypeName(size_t input_id, size_t vec_size) const {
+    if (vec_size == 0 || vec_size > 8)
+        throw std::invalid_argument("Invalid vector size in jit definitions: " + std::to_string(vec_size));
+    std::string scalar_type = GetInputTensorName(input_id) + "_TYPE";
+    if (vec_size > 1)
+        return "MAKE_VECTOR_TYPE(" + scalar_type + "," + std::to_string(vec_size) + ")";
+    else
+        return scalar_type;
+}
+
+std::string base_params::fused_operation_desc::GetIdx(size_t input_id, idx_desc idx, bool should_be_safe) const {
+    std::string idx_order = "";
+    if (tensors[input_id].Batch().v == 1) {
+        idx.b = "0";
+    }
+    if (tensors[input_id].Feature().v == 1) {
+        idx.f = "0";
+    }
+    if (tensors[input_id].Y().v == 1) {
+        idx.y = "0";
+    }
+    if (tensors[input_id].X().v == 1) {
+        idx.x = "0";
+    }
+    if (idx.dims == 4) {
+        idx_order = idx.b + "," + idx.f + "," + idx.y + "," + idx.x;
+    } else if (idx.dims == 5) {
+        idx_order = idx.b + "," + idx.f + "," + idx.z + "," + idx.y + "," + idx.x;
+    }
+
+    if (should_be_safe)
+        return GetInputTensorName(input_id) + "_GET_INDEX_SAFE(" + idx_order +")";
+    else
+        return GetInputTensorName(input_id) + "_GET_INDEX(" + idx_order +")";
+}
+
+std::string base_params::fused_operation_desc::GetJitLoad(const FusedOpsConfiguration& conf, size_t input_id,
+                                                          bool reuse_index, std::string reused_idx) const {
+    auto vec_size = 1;
+    // TODO: Need to check input tensors here to make sure that they have the same layout as output
+    if (type == Type::ELTWISE) {
+        vec_size = conf.vec_size;
+    }
+
+    auto aligned = conf.aligned_load;
+    auto idx = conf.bfyx_idx_order;
+    if (vec_size == 0 || vec_size > 8)
+        throw std::invalid_argument("Invalid vector size in jit definitions: " + std::to_string(vec_size));
+
+    std::string index_func_call_vec = reuse_index ? reused_idx : GetIdx(input_id, idx_desc{idx}, conf.safe_load);
+    std::string index_func_call = reuse_index ? reused_idx : GetIdx(input_id, idx_desc{idx}, conf.safe_load);
+    if (conf.simple_offset) {
+        std::string offset = conf.bfyx_idx_order[0];
+        if (conf.safe_load)
+            offset = "(" + offset + " % " + std::to_string(tensors[input_id].LogicalSize()) + ")";
+        if (vec_size > 1)
+            return "((const __global " + toCLType(tensors[input_id].GetDType()) + std::to_string(vec_size) + "*)(" +
+                   GetInputPtrName(input_id) + " + " + offset + "))[0]";
+        else
+            return GetInputPtrName(input_id) + "[" + offset + "]";
+    } else {
+        if (aligned) {
+            if (vec_size > 1)
+                return " UNIT_BLOCK_READ" + std::to_string(vec_size) + "(" + GetInputPtrName(input_id) + ", " + index_func_call_vec + ")";
+            else
+                return " UNIT_BLOCK_READ(" + GetInputPtrName(input_id) + ", " + index_func_call + ")";
+        } else {
+            if (vec_size > 1)
+                return "((const __global " + toCLType(tensors[input_id].GetDType()) + std::to_string(vec_size) + "*)(" +
+                       GetInputPtrName(input_id) + " + " + index_func_call_vec + "))[0]";
+            else
+                return GetInputPtrName(input_id) + "[" + index_func_call + "]";
+        }
+    }
+}
+
+std::string base_params::fused_operation_desc::GetInputPtrName(size_t input_id) const {
+    return GetTypeStr() + std::to_string(op_id) + "_input" + std::to_string(input_id);
+}
+
+std::string base_params::fused_operation_desc::GetInputVarName(size_t input_id) const {
+    return GetTypeStr() + std::to_string(op_id) + "_data" + std::to_string(input_id);
+}
+
+std::string base_params::fused_operation_desc::GetOutputVarName(std::string input_var) const {
+    static int i = 0;
+    std::replace(input_var.begin(), input_var.end(), '[', '_');
+    std::replace(input_var.begin(), input_var.end(), ']', '_');
+    std::replace(input_var.begin(), input_var.end(), ' ', '_');
+    return input_var + "_" + std::to_string(i++);
+}
+
+std::string base_params::fused_operation_desc::GetOutputType(size_t vec_size) const {
+    if (vec_size > 1)
+        return toCLType(output_tensor.GetDType()) + std::to_string(vec_size);
+    else
+        return toCLType(output_tensor.GetDType());
+}
+
+std::string base_params::fused_operation_desc::ConvertToOutputType(std::string var, size_t vec_size) const {
+    return "convert_" + GetOutputType(vec_size) + "(" + var + ")";
+}
+
+std::string base_params::fused_operation_desc::ConvertToOutputTypeSat(std::string var, size_t vec_size) const {
+    if (output_tensor.GetDType() == Datatype::F32 || output_tensor.GetDType() == Datatype::F16)
+        return "convert_" + GetOutputType(vec_size) + "(" + var + ")";
+    else
+        return "convert_" + GetOutputType(vec_size) + "_sat(" + var + ")";
+}
+
+
 }  // namespace kernel_selector
index 4bba996..a3d8c02 100644 (file)
@@ -31,6 +31,8 @@ using WeightsTensor = Tensor::WeightsTensor;
 using DataLayout = Tensor::DataLayout;
 using WeightsLayout = Tensor::WeightsLayout;
 using MultiDataTensor = std::vector<DataTensor>;
+
+class JitConstants;
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // ParamsKey
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -194,6 +196,9 @@ public:
                         // fused conv eltw
                         uint32_t rw_out_opt : 1;
                     } fused_conv_eltw;
+                    struct quantize_t {
+                        uint32_t packed_binary_output : 1;
+                    } quantize;
                 } dedicated;
             } val;
             uint64_t raw;
@@ -310,6 +315,8 @@ public:
     void EnableFusedConvEltwOutputCalibration() { key.restrict.val.dedicated.fused_conv_eltw.calibration = 1; }
     void EnableFusedConvEltwEltwiseStride();
 
+    void EnableQuantizePackedBinaryOutput() { key.restrict.val.dedicated.quantize.packed_binary_output = 1; }
+
     void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; }
     void EnableSoftmaxDim(SoftmaxDim d);
     void EnableConcatAxis(ConcatAxis a);
@@ -394,20 +401,159 @@ struct base_activation_params {
     ActivationFunction function = ActivationFunction::NONE;
     float m = 1.f;
     float n = 0.f;
+    bool gradient = false;
 
     base_activation_params() = default;
     base_activation_params(const float m, const float n) : m(m), n(n) {}
+    base_activation_params(const ActivationFunction f, const float m, const float n, const bool gradinet = false) : function(f),
+                                                                                                                    m(m),
+                                                                                                                    n(n),
+                                                                                                                    gradient(gradinet) {}
 
     virtual std::string to_string() const;
 };
 
+struct FusedOpsConfiguration {
+    std::string suffix;
+    std::vector<std::string> bfyx_idx_order;
+    std::string input_var_name;
+    size_t vec_size;
+    bool aligned_load;
+    bool typed_activation;
+    bool safe_load;
+    bool simple_offset;
+
+    FusedOpsConfiguration(std::string suffix,
+                          std::vector<std::string> bfyx_idx_order,
+                          std::string input_var_name,
+                          size_t vec_size = 1,
+                          bool aligned_load = false,
+                          bool typed_activation = false,
+                          bool safe_load = true,
+                          bool simple_offset = false)
+      : suffix(suffix)
+      , bfyx_idx_order(bfyx_idx_order)
+      , input_var_name(input_var_name)
+      , vec_size(vec_size)
+      , aligned_load(aligned_load)
+      , typed_activation(typed_activation)
+      , safe_load(safe_load)
+      , simple_offset(simple_offset) { }
+};
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // base_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct base_params : public Params {
     virtual ~base_params() {}
 
+    // Instance of fused_operation_desc is added to fused_ops vector if a node has been fused to current one using program_impl::fuse_nodes
+    // method. In order to process fused ops following modifications should be done in a kernel:
+    // option 1 - using common generator:
+    //     - create FusedOpsConfiguration object that contains configuration for common code generator.
+    //       Multiple objects can be created if a kernel uses different data types at the same time. E.g. kernels that contains scalar and
+    //       vector branches that are chosen in runtime. To handle this case, create 2 configurations with different suffixes, like
+    //       "_SCALAR" and "_VEC" and then use generated macros accordingly.
+    //     - add jit constants returned by KernelBase::MakeFusedOpsJitConstants method to the kernel's constants.
+    //     - insert generated macros in the ocl code:
+    //       in kernel declaration:
+    //         #if HAS_FUSED_OPS_DECLS
+    //           FUSED_OPS_DECLS,
+    //         #endif
+    //       in kernel body:
+    //         #if HAS_FUSED_OPS
+    //           FUSED_OPS<OPTIONAL_SUFFIX>;
+    //           <SOME_VARIABLE> = FINAL_NAME<OPTIONAL_SUFFIX>;
+    //         #endif
+    //   In this case common generator creates set of definitions for each op which are called sequentially in FUSED_OP<OPTIONAL_SUFFIX>
+    //   macro. Example:
+    //     #define FUSED_OPS
+    //       FUSED_OP0_LOAD_VEC
+    //       FUSED_OP0_ACTION_VEC
+    //       FUSED_OP1_LOAD_VEC
+    //       FUSED_OP1_ACTION_VEC
+    //     #define FUSED_OP0_LOAD_VEC
+    //       MAKE_VECTOR_TYPE(FUSED_OP_0_INPUT0_TYPE,2) activation0_data0 = UNIT_BLOCK_READ(activation0_input0,
+    //                                                                      FUSED_OP_0_INPUT0_GET_INDEX_SAFE(0,(f_block*16),0,0));
+    //     #define FUSED_OP0_ACTION_VEC
+    //       float2 dst_0 = dst;
+    //       dst_0 = ACTIVATION_FUSED_OP0_VEC(dst_0, ACTIVATION_PARAMS_FUSED_OP0_VEC);
+    //     #define FUSED_OP1_LOAD_VEC
+    //       MAKE_VECTOR_TYPE(FUSED_OP_1_INPUT0_TYPE,2) eltwise1_data0 = UNIT_BLOCK_READ2(eltwise1_input0,
+    //                                                                   FUSED_OP_1_INPUT0_GET_INDEX_SAFE(0,(f_block*16),y,x));
+    //     #define FUSED_OP1_ACTION_VEC
+    //       float2 dst_0_2 = convert_float2(eltwise1_data0) + convert_float2(dst_0);
+    //     #define FINAL_NAME_VEC dst_0_2
+    // option 2 - using custom generator in a kernel. It can be used if performance is not optimal in the common one or to handle
+    //            some difficult cases that can't be unified. Custom processing of fused ops can be written absolutely independently
+    //            in a kernel, but to make it easier set of helper functions exist:
+    //     - KernelBase::MakeFusedOpsDeclsJitConstants that creates arguments for kernel declaration and macro for all tensors used in
+    //       a fused op (requires FusedOpsConfiguration instance).
+    //     - fused_operation_desc contains a bunch of methods to generate variable/pointer names, type conversions, data loads
+    //  If you need an example of custom code generation for fused ops, check BinaryConvolutionKernelGeneric::GetFusedPrimitivesJitConstants
+    //  method in binary_convolution_kernel_generic.cpp.
+    struct fused_operation_desc {
+        enum class Type : uint8_t {
+            ELTWISE = 0,
+            SCALE = 1,
+            QUANTIZE = 2,
+            ACTIVATION = 3,
+            UNDEFINED
+        };
+
+        struct idx_desc {
+            std::string b;
+            std::string f;
+            std::string z;
+            std::string y;
+            std::string x;
+            size_t dims;
+            explicit idx_desc(std::vector<std::string> idx) : b(""), f(""), z(""), y(""), x(""), dims(0) {
+                dims = idx.size();
+                switch (dims) {
+                    case 1: f = idx[0]; break;
+                    case 2: b = idx[0]; f = idx[1]; break;
+                    case 3: b = idx[0]; f = idx[1]; y = idx[2]; break;
+                    case 4: b = idx[0]; f = idx[1]; y = idx[2]; x = idx[3]; break;
+                    case 5: b = idx[0]; f = idx[1]; z = idx[2]; y = idx[3]; x = idx[4]; break;
+                    default: throw std::runtime_error("More than 5 dimenstions is not supported in fused op generator");
+                }
+            }
+        };
+
+        Type type;
+        size_t dep_idx_start;
+        size_t dep_size;
+        MultiDataTensor tensors;
+        DataTensor output_tensor;
+        base_activation_params activation;
+        size_t op_id;
+
+        JitConstants MakeFusedTensorJitConstants(const FusedOpsConfiguration& conf) const;
+        JitConstants MakeInputDeclsJitConstants(const FusedOpsConfiguration& conf) const;
+        JitConstants MakeLoadJitConstants(const FusedOpsConfiguration& conf) const;
+        JitConstants MakeOpJitConstants(const FusedOpsConfiguration& conf, std::string in_var, std::string& out_var) const;
+
+        // Helper functions for operation generation
+        std::string GetTypeStr() const;
+        std::string GetInputTensorName(size_t input_id) const;
+        std::string GetOutputTensorName() const;
+        std::string GetInputTypeName(size_t input_id, size_t vec_size) const;
+        std::string GetJitLoad(const FusedOpsConfiguration& conf, size_t input_id,
+                               bool reuse_index = false, std::string reused_idx = "") const;
+        std::string GetIdx(size_t input_id, idx_desc idx, bool should_be_safe) const;
+        std::string GetInputPtrName(size_t input_id) const;
+        std::string GetInputVarName(size_t input_id) const;
+        std::string GetOutputVarName(std::string input_var_name) const;
+        std::string ConvertToOutputType(std::string var, size_t vec_size = 1) const;
+        std::string ConvertToOutputTypeSat(std::string var, size_t vec_size = 1) const;
+        std::string GetOutputType(size_t vec_size = 1) const;
+    };
+
     base_activation_params activation;
+
+    std::vector<base_activation_params> activations;
+    std::vector<fused_operation_desc> fused_ops = {};
     MultiDataTensor inputs;
     DataTensor output;
     bool gradient = false;
index 132c6e4..99e947e 100644 (file)
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # ====================================== Helper constant variables =====================================
 
 # Order of scan for special capabilities files (.inc files with capabilities description).
@@ -49,8 +48,6 @@ intel_config_flag_apply_settings(
 # ================================== Compiler preprocessor definitions =================================
 
 set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
-    CLDNN_EXPORTS
-    EXPORT_NEURAL_SYMBOLS
     "CLDNN_VERSION_MAJOR=${CLDNN__VERSION_MAJOR}"
     "CLDNN_VERSION_MINOR=${CLDNN__VERSION_MINOR}"
     "CLDNN_VERSION_BUILD=${CLDNN__VERSION_BUILD}"
@@ -65,40 +62,11 @@ file(GLOB __CLDNN_Headers__api
     "${CLDNN__API_DIR}/*.hpp"
   )
 
-set(__CLDNN_Directory__api__cpp "${CLDNN__API_DIR}/CPP")
-set(__CLDNN_Label__api__cpp     "${__CLDNN_Label__api}\\CPP")
-file(GLOB __CLDNN_Headers__api__cpp
-    "${__CLDNN_Directory__api__cpp}/*.h"
-    "${__CLDNN_Directory__api__cpp}/*.hpp"
-  )
-
-set(__CLDNN_Directory__api__c "${CLDNN__API_DIR}/C")
-set(__CLDNN_Label__api__c     "${__CLDNN_Label__api}\\C")
-file(GLOB __CLDNN_Headers__api__c
-    "${__CLDNN_Directory__api__c}/*.h"
-    "${__CLDNN_Directory__api__c}/*.hpp"
-  )
-
 set(__CLDNN_Label__api_extension       "api_extension")
 file(GLOB __CLDNN_Headers__api_extension
-    "${CLDNN__API_EXTENSION_DIR}/*.h"
     "${CLDNN__API_EXTENSION_DIR}/*.hpp"
   )
 
-set(__CLDNN_Directory__api_extension__cpp "${CLDNN__API_EXTENSION_DIR}/CPP")
-set(__CLDNN_Label__api_extension__cpp     "${__CLDNN_Label__api_extension}\\CPP")
-file(GLOB __CLDNN_Headers__api_extension__cpp
-    "${__CLDNN_Directory__api_extension__cpp}/*.h"
-    "${__CLDNN_Directory__api_extension__cpp}/*.hpp"
-  )
-
-set(__CLDNN_Directory__api_extension__c "${CLDNN__API_EXTENSION_DIR}/C")
-set(__CLDNN_Label__api_extension__c     "${__CLDNN_Label__api_extension}\\C")
-file(GLOB __CLDNN_Headers__api_extension__c
-    "${__CLDNN_Directory__api_extension__c}/*.h"
-    "${__CLDNN_Directory__api_extension__c}/*.hpp"
-  )
-
 set(__CLDNN_Label__main                "")
 file(GLOB __CLDNN_Sources__main
     "${CMAKE_CURRENT_SOURCE_DIR}/*.h"
@@ -179,11 +147,7 @@ set(__CLDNN_AllSources
     ${__CLDNN_Sources__graph_opt}
     ${__CLDNN_Headers__include}
     ${__CLDNN_Sources__caps}
-    ${__CLDNN_Headers__api__cpp}
-    ${__CLDNN_Headers__api__c}
     ${__CLDNN_Headers__api_extension}
-    ${__CLDNN_Headers__api_extension__c}
-    ${__CLDNN_Headers__api_extension__cpp}
     ${__CLDNN_Sources__main}
     ${__CLDNN_Sources__gpu}
     ${__CLDNN_Sources__cache}
@@ -196,11 +160,7 @@ set_property(SOURCE ${__CLDNN_Sources__cg_cache} PROPERTY GENERATED TRUE)
 # =============================================== Filters ==============================================
 
 source_group("${__CLDNN_Label__api}"                  FILES ${__CLDNN_Headers__api})
-source_group("${__CLDNN_Label__api__cpp}"             FILES ${__CLDNN_Headers__api__cpp})
-source_group("${__CLDNN_Label__api__c}"               FILES ${__CLDNN_Headers__api__c})
 source_group("${__CLDNN_Label__api_extension}"        FILES ${__CLDNN_Headers__api_extension})
-source_group("${__CLDNN_Label__api_extension__cpp}"   FILES ${__CLDNN_Headers__api_extension__cpp})
-source_group("${__CLDNN_Label__api_extension__c}"     FILES ${__CLDNN_Headers__api_extension__c})
 source_group("${__CLDNN_Label__include}"              FILES ${__CLDNN_Headers__include})
 source_group("${__CLDNN_Label__graph_opt}"            FILES ${__CLDNN_Sources__graph_opt})
 source_group("${__CLDNN_Label__caps}"                 FILES ${__CLDNN_Sources__caps})
@@ -225,16 +185,15 @@ include_directories(
 
 # =================================== Link targets and dependencies ====================================
 
-# Main shared library.
-add_library("${CLDNN_BUILD__PROJ}" SHARED
+# Main static library.
+add_library("${CLDNN_BUILD__PROJ}" STATIC
     ${__CLDNN_AllSources}
   )
 set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}")
 set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME   "${CLDNN_BUILD__PROJ_OUTPUT_NAME}")
 
-
 target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE
-    OpenCL
+    clDNN_OpenCL
     cldnn_kernel_selector
   )
 
index a28463a..302b321 100644 (file)
@@ -22,7 +22,7 @@
 #include <vector>
 
 namespace cldnn {
-primitive_type_id activation_type_id() {
+primitive_type_id activation::type_id() {
     static primitive_type_base<activation> instance;
     return &instance;
 }
@@ -32,13 +32,13 @@ layout activation_inst::calc_output_layout(activation_node const& node) {
            "Output data type forcing is not supported for activation_node!");
 
     auto input_node_layout = node.input().get_non_padded_output_layout();
-    auto func = node.get_primitive()->activation_func;
+    auto func = node.get_primitive()->activation_function;
 
-    std::vector<cldnn_activation_func> activations_int8 = {
-        activation_none,
-        activation_negative,
-        activation_not,
-        activation_relu};
+    std::vector<activation_func> activations_int8 = {
+        activation_func::none,
+        activation_func::negative,
+        activation_func::negation,
+        activation_func::relu};
 
     if (input_node_layout.data_type == data_types::i8) {
         if (std::find(activations_int8.begin(), activations_int8.end(), func) == activations_int8.end())
@@ -55,7 +55,7 @@ std::string activation_inst::to_string(activation_node const& node) {
     std::stringstream primitive_description;
 
     json_composite activation_info;
-    activation_info.add("activation_func", desc->activation_func);
+    activation_info.add("activation_func", static_cast<int>(desc->activation_function));
     activation_info.add("additional_params.a", desc->additional_params.a);
     activation_info.add("additional_params.b", desc->additional_params.b);
     activation_info.add("additional_params input", desc->additional_params_input);
@@ -85,7 +85,7 @@ activation_inst::typed_primitive_inst(network_impl& network, activation_node con
 
         CLDNN_ERROR_LESS_THAN(node.id(),
                               "Slope x size",
-                              slope_input_size.spatial[0],
+                              slope_input_size.feature[0],
                               "input feature size",
                               input_feature_size,
                               "Dimensions mismatch between input and slope input in Activation layer(slope x size "
@@ -96,7 +96,7 @@ activation_inst::typed_primitive_inst(network_impl& network, activation_node con
                               "Slope input size count",
                               slope_input_size.count(),
                               "Slope input size x",
-                              slope_input_size.spatial[0],
+                              slope_input_size.feature[0],
                               "Dimensions mismatch of slope input in Activation layer!");
     }
 }
index 1700499..3be8933 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id activation_grad_type_id() {
+primitive_type_id activation_grad::type_id() {
     static primitive_type_base<activation_grad> instance;
     return &instance;
 }
@@ -40,7 +40,7 @@ std::string activation_grad_inst::to_string(activation_grad_node const& node) {
     std::stringstream primitive_description;
 
     json_composite activation_grad_info;
-    activation_grad_info.add("activation_grad_func", desc->activation_grad_func);
+    activation_grad_info.add("activation_grad_func", static_cast<int>(desc->activation_grad_function));
     activation_grad_info.add("additional_params.a", desc->additional_params.a);
     activation_grad_info.add("additional_params.b", desc->additional_params.b);
     activation_grad_info.add("additional_params input", desc->additional_params_input);
index 96504ab..89f0f8d 100644 (file)
@@ -22,7 +22,7 @@
 #include <memory>
 
 namespace cldnn {
-primitive_type_id apply_adam_type_id() {
+primitive_type_id apply_adam::type_id() {
     static primitive_type_base<apply_adam> instance;
     return &instance;
 }
index 806e9ee..1238696 100644 (file)
@@ -24,7 +24,7 @@
 #include <limits>
 
 namespace cldnn {
-primitive_type_id arg_max_min_type_id() {
+primitive_type_id arg_max_min::type_id() {
     static primitive_type_base<arg_max_min> instance;
     return &instance;
 }
index 6e681cc..df37d36 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id average_unpooling_type_id() {
+primitive_type_id average_unpooling::type_id() {
     static primitive_type_base<average_unpooling> instance;
     return &instance;
 }
index 8822fa0..0a9bdcb 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id batch_norm_type_id() {
+primitive_type_id batch_norm::type_id() {
     static primitive_type_base<batch_norm> instance;
     return &instance;
 }
index c5c6697..18e057f 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id batch_norm_grad_type_id() {
+primitive_type_id batch_norm_grad::type_id() {
     static primitive_type_base<batch_norm_grad> instance;
     return &instance;
 }
index 3f4659b..344bb3c 100644 (file)
@@ -16,6 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include "binary_convolution_inst.h"
+#include "convolution_inst.h"
+#include "reorder_inst.h"
 #include "primitive_type_base.h"
 #include "sliding_window_utils.h"
 #include "error_handler.h"
@@ -23,7 +25,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id binary_convolution_type_id() {
+primitive_type_id binary_convolution::type_id() {
     static primitive_type_base<binary_convolution> instance;
     return &instance;
 }
@@ -31,8 +33,27 @@ primitive_type_id binary_convolution_type_id() {
 layout binary_convolution_inst::calc_output_layout(binary_convolution_node const& node) {
     auto desc = node.get_primitive();
 
-    auto odt = *node.get_primitive()->output_data_type;
-    return {odt, format::bfyx, desc->output_size};
+    auto output_type = *node.get_primitive()->output_data_type;
+    auto output_size = desc->output_size;
+    auto layout = cldnn::layout{output_type, format::bfyx, output_size};
+    if (node.has_fused_primitives()) {
+        layout = node.get_fused_output_layout();
+    }
+
+    auto users = node.get_users();
+    if (users.size() == 1 && users.front()->is_type<convolution>()) {
+        auto conv_split = users.front()->as<convolution>().get_split();
+        auto conv_groups = (int32_t)users.front()->as<convolution>().get_groups();
+
+        bool next_is_dw = ((conv_split > 1 && conv_split == output_size.feature[0]) ||
+                           (conv_groups > 1 && conv_groups == output_size.feature[0]));
+
+        if ((layout.data_type == data_types::f16 || layout.data_type == data_types::f32) && next_is_dw) {
+            layout.format = cldnn::format::bfyx_f16;
+        }
+    }
+
+    return layout;
 }
 
 std::string binary_convolution_inst::to_string(binary_convolution_node const& node) {
index f322949..2eacdd0 100644 (file)
@@ -21,7 +21,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id border_type_id() {
+primitive_type_id border::type_id() {
     static primitive_type_base<border> instance;
     return &instance;
 }
@@ -33,12 +33,21 @@ layout border_inst::calc_output_layout(border_node const& node) {
     auto desc = node.get_primitive();
 
     auto&& new_size = input_layout.size;
-    new_size += desc->left_top_sizes.sub({0, 0, 0, 0});
-    new_size += desc->right_bottom_sizes.sub({0, 0, 0, 0});
-
-    return {input_layout.data_type,
-            input_layout.format,
-            {new_size.batch[0], new_size.feature[0], new_size.spatial[0], new_size.spatial[1]}};
+    new_size += desc->left_top_sizes.sub(tensor(0));
+    new_size += desc->right_bottom_sizes.sub(tensor(0));
+
+    auto ret_data_t = input_layout.data_type;
+    auto ret_format = input_layout.format;
+
+    if (ret_format == format::bfwzyx) {
+        return layout{ ret_data_t, ret_format, tensor(batch(new_size.batch[0]), feature(new_size.feature[0]),
+            spatial(new_size.spatial[0], new_size.spatial[1], new_size.spatial[2], new_size.spatial[3])) };
+    } else if (ret_format == format::bfzyx) {
+        return layout{ ret_data_t, ret_format, tensor(batch(new_size.batch[0]), feature(new_size.feature[0]),
+            spatial(new_size.spatial[0], new_size.spatial[1], new_size.spatial[2])) };
+    }
+    return layout{ ret_data_t, ret_format, tensor(batch(new_size.batch[0]), feature(new_size.feature[0]),
+        spatial(new_size.spatial[0], new_size.spatial[1])) };
 }
 
 std::string border_inst::to_string(border_node const& node) {
@@ -50,6 +59,9 @@ std::string border_inst::to_string(border_node const& node) {
 
     const char* border_type_str = "unknown";
     switch (desc->type) {
+        case border_type::zero:
+            border_type_str = "zero";
+            break;
         case border_type::constant:
             border_type_str = "constant";
             break;
@@ -62,6 +74,9 @@ std::string border_inst::to_string(border_node const& node) {
         case border_type::mirror_101:
             border_type_str = "mirror-101";
             break;
+        default:
+            border_type_str = "unknown";
+            break;
     }
 
     auto node_info = node.desc_to_json();
@@ -85,8 +100,8 @@ border_inst::typed_primitive_inst(network_impl& network, border_node const& node
     const auto input_format = input_layout.format;
     const auto& input_sizes = input_layout.size;
 
-    auto lt_sizes = argument.left_top_sizes.sub({0, 0, 0, 0});
-    auto rb_sizes = argument.right_bottom_sizes.sub({0, 0, 0, 0});
+    auto lt_sizes = argument.left_top_sizes.sub(tensor(0));
+    auto rb_sizes = argument.right_bottom_sizes.sub(tensor(0));
     auto b_type = argument.type;
 
     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(),
@@ -95,9 +110,11 @@ border_inst::typed_primitive_inst(network_impl& network, border_node const& node
                                   "supported border primitive input formats",
                                   format::bfyx,
                                   format::yxfb,
-                                  format::byxf);
+                                  format::byxf,
+                                  format::bfzyx,
+                                  format::bfwzyx);
 
-    tensor null_tensor { 0, 0, 0, 0 };
+    tensor null_tensor = tensor(0);
 
     // Check if sizes of border are in proper range.
     CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(),
index 11f6734..09a2a8b 100644 (file)
@@ -22,7 +22,7 @@
 #include <set>
 
 namespace cldnn {
-primitive_type_id broadcast_type_id() {
+primitive_type_id broadcast::type_id() {
     static primitive_type_base<broadcast> instance;
     return &instance;
 }
index b60a7da..2deb999 100644 (file)
 */
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
-#include "api/C/cldnn.h"
-#include "api_impl.h"
-#include "engine_impl.h"
-#include "topology_impl.h"
-#include "program_impl.h"
-#include "primitive_type.h"
-#include "network_impl.h"
-#include "memory_impl.h"
-#include "primitive_inst.h"
+#include "api/cldnn.hpp"
+#include <memory>
 #include <string>
 #include <vector>
 
-namespace cldnn {
-last_err& last_err::instance() {
-    thread_local static last_err _instance;
-    return _instance;
-}
-}  // namespace cldnn
-
-#define SHOULD_NOT_BE_NULL(arg, msg_prefix) \
-    if (arg == nullptr)                     \
-        throw std::invalid_argument(std::string(msg_prefix) + " should not be null.");
-#define SHOULD_NOT_EQUAL_0(arg, msg_prefix) \
-    if (arg == 0)                           \
-        throw std::invalid_argument(std::string(msg_prefix) + " should not equals 0.");
-
-extern "C" {
-
 #ifndef CLDNN_VERSION_MAJOR
 #define CLDNN_VERSION_MAJOR (0)
 #endif
@@ -59,820 +36,10 @@ extern "C" {
 #define CLDNN_VERSION_REVISION (0)
 #endif
 
-cldnn_version cldnn_get_version(cldnn_status* status) {
-    return exception_handler<cldnn_version>(CLDNN_ERROR, status, {}, []() -> cldnn_version {
-        return {CLDNN_VERSION_MAJOR, CLDNN_VERSION_MINOR, CLDNN_VERSION_BUILD, CLDNN_VERSION_REVISION};
-    });
-}
-
-cldnn_topology cldnn_create_topology(cldnn_status* status) {
-    return exception_handler<cldnn_topology>(CLDNN_ERROR, status, nullptr, [&]() {
-        return api_cast(new cldnn::topology_impl());
-    });
-}
-
-void cldnn_add_primitive(cldnn_topology topology, const CLDNN_PRIMITIVE_DESC(primitive) * dto, cldnn_status* status) {
-    return exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(topology, "Topology");
-        SHOULD_NOT_BE_NULL(dto, "Primitive");
-        SHOULD_NOT_BE_NULL(dto->id, "Primitive id");
-        SHOULD_NOT_BE_NULL(dto->type, "Primitive type");
-        api_cast(topology)->add(dto->type->from_dto(dto));
-    });
-}
-
-void cldnn_change_input_layout(cldnn_topology topology,
-                               cldnn_primitive_id id,
-                               cldnn_layout new_layout,
-                               cldnn_status* status) {
-    return exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(topology, "Topology");
-        SHOULD_NOT_BE_NULL(id, "Input layout id");
-        if (new_layout.format < cldnn_format_any || new_layout.format >= cldnn_format_format_num)
-            throw std::invalid_argument("Unknown format of layout.");
-        if (new_layout.data_type != cldnn_data_type::cldnn_f16 && new_layout.data_type != cldnn_data_type::cldnn_f32 &&
-            new_layout.data_type != cldnn_data_type::cldnn_i8 && new_layout.data_type != cldnn_data_type::cldnn_bin &&
-            new_layout.data_type != cldnn_data_type::cldnn_u8 && new_layout.data_type != cldnn_data_type::cldnn_i32 &&
-            new_layout.data_type != cldnn_data_type::cldnn_i64)
-            throw std::invalid_argument("Unknown data_type of layout.");
-        api_cast(topology)->change_input_layout(id, (layout) new_layout);
-    });
-}
-
-static void primitive_id_vector_to_char_array(char* names,
-                                              size_t size,
-                                              size_t* size_ret,
-                                              cldnn_status* status,
-                                              const std::vector<primitive_id>& vec) {
-    *size_ret = std::accumulate(std::begin(vec),
-                                std::end(vec),
-                                size_t(1),  // final zero symbol
-                                [](size_t acc, const cldnn::primitive_id& id) {
-                                    return acc + id.size() + 1;  // plus zero symbol
-                                });
-
-    if (size < *size_ret) {
-        if (status)
-            *status = CLDNN_INVALID_ARG;
-        return;
-    }
-
-    size_t i = 0;
-    for (auto& id : vec) {
-        // workaround for Microsoft VC++
-#if defined _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
-        i += id.copy(names + i, size - i - 2);
-#if defined _MSC_VER
-#pragma warning(pop)
-#endif
-        names[i++] = 0;  // plus zero symbol
-        assert(i < size);
-    }
-    names[i] = 0;  // final zero symbol
-}
-
-void cldnn_get_primitive_ids(cldnn_topology topology, char* ids, size_t size, size_t* size_ret, cldnn_status* status) {
-    return exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(topology, "Topology");
-        auto ids_size = api_cast(topology)->get_primitives().size();
-        SHOULD_NOT_EQUAL_0(ids_size, "Primitives number");
-        auto&& primitives_ids = api_cast(topology)->get_primitives_id();
-        primitive_id_vector_to_char_array(ids, size, size_ret, status, primitives_ids);
-    });
-}
-
-void cldnn_retain_topology(cldnn_topology topology, cldnn_status* status) {
-    return exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(topology, "Topology");
-        api_cast(topology)->add_ref();
-    });
-}
-void cldnn_release_topology(cldnn_topology topology, cldnn_status* status) {
-    return exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(topology, "Topology");
-        api_cast(topology)->release();
-    });
-}
-
-uint32_t cldnn_get_engine_count(/*cldnn_engine_type*/ int32_t type, cldnn_status* status) {
-    if (type == cldnn_engine_type::cldnn_engine_ocl) {
-        if (status)
-            *status = CLDNN_SUCCESS;
-        return 1;
-    } else {
-        if (status)
-            *status = CLDNN_DEVICE_ERROR;
-        return 0;
-    }
-}
-
-void cldnn_release_pending_memory(cldnn_engine engine, uint16_t stream_id, cldnn_status* status) {
-    return exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "engine");
-        api_cast(engine)->release_pending_memory(stream_id);
-    });
-}
-
-cldnn_engine cldnn_create_engine(/*cldnn_engine_type*/ int32_t type,
-                                 uint32_t engine_num,
-                                 const cldnn_engine_configuration* configuration,
-                                 cldnn_status* status) {
-    if (engine_num > 0 || (type != cldnn_engine_type::cldnn_engine_ocl)) {
-        if (status)
-            *status = CLDNN_DEVICE_ERROR;
-        return nullptr;
-    }
-
-    return exception_handler<cldnn_engine>(CLDNN_ERROR, status, nullptr, [&]() {
-        return api_cast(new cldnn::engine_impl(configuration ? cldnn::engine_configuration(*configuration)
-                                                             : cldnn::engine_configuration()));
-    });
-}
-
-void cldnn_retain_engine(cldnn_engine engine, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        api_cast(engine)->add_ref();
-    });
-}
-
-void cldnn_release_engine(cldnn_engine engine, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        api_cast(engine)->release();
-    });
-}
-
-cldnn_engine_info cldnn_get_engine_info(cldnn_engine engine, cldnn_status* status) {
-    return exception_handler<cldnn_engine_info>(CLDNN_ERROR,
-                                                status,
-                                                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-                                                [&]() -> cldnn_engine_info {
-                                                    SHOULD_NOT_BE_NULL(engine, "Engine");
-                                                    auto info = api_cast(engine)->get_engine_info();
-                                                    cldnn_engine_info res = {info.cores_count,
-                                                            info.core_frequency,
-                                                            info.max_work_group_size,
-                                                            info.max_local_mem_size,
-                                                            info.max_global_mem_size,
-                                                            info.max_alloc_mem_size,
-                                                            info.max_image2d_width,
-                                                            info.max_image2d_height,
-                                                            info.supports_fp16,
-                                                            info.supports_fp16_denorms,
-                                                            info.supports_subgroups_short,
-                                                            info.supports_image,
-                                                            info.supports_imad,
-                                                            info.supports_immad
-                                                            };
-                                                    strncpy(res.ocl_device_name, info.dev_name.c_str(), CLDNN_API_STRING_SIZE_MAX);
-                                                    strncpy(res.ocl_driver_version, info.driver_version.c_str(), CLDNN_API_STRING_SIZE_MAX);
-                                                    return res;
-                                                });
-}
-
-/*cldnn_engine_type*/ int32_t cldnn_get_engine_type(cldnn_engine engine, cldnn_status* status) {
-    return exception_handler<int32_t>(CLDNN_ERROR, status, cldnn_engine_ocl, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        return static_cast<int32_t>(api_cast(engine)->type());
-    });
-}
-
-int64_t cldnn_get_max_used_device_memory_size(cldnn_engine engine, cldnn_status* status) {
-    return exception_handler<int32_t>(CLDNN_ERROR, status, cldnn_engine_ocl, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        return static_cast<int32_t>(api_cast(engine)->get_max_used_device_memory());
-    });
-}
-
-int64_t cldnn_get_temp_used_device_memory_size(cldnn_engine engine, cldnn_status* status) {
-    return exception_handler<int32_t>(CLDNN_ERROR, status, cldnn_engine_ocl, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        return static_cast<int32_t>(api_cast(engine)->get_used_device_memory());
-    });
-}
-
-cldnn_event cldnn_create_user_event(cldnn_engine engine, uint16_t stream_id, cldnn_status* status) {
-    return exception_handler<cldnn_event>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        event_impl* e = api_cast(engine)->create_user_event(stream_id).detach();
-        return api_cast(e);
-    });
-}
-
-CLDNN_API int32_t cldnn_is_user_event(cldnn_event event, cldnn_status* status) {
-    return exception_handler<int32_t>(CLDNN_ERROR, status, 0, [&]() {
-        SHOULD_NOT_BE_NULL(event, "Event");
-        auto user_ev = dynamic_cast<user_event*>(api_cast(event));
-        return (user_ev != nullptr);
-    });
-}
-
-void cldnn_retain_event(cldnn_event event, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(event, "Event");
-        api_cast(event)->add_ref();
-    });
-}
-
-void cldnn_release_event(cldnn_event event, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(event, "Event");
-        api_cast(event)->release();
-    });
-}
-
-void cldnn_wait_for_event(cldnn_event event, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(event, "Event");
-        api_cast(event)->wait();
-    });
-}
-
-void cldnn_set_event(cldnn_event event, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(event, "Event");
-        if (auto user_ev = dynamic_cast<user_event*>(api_cast(event)))
-            user_ev->set();
-        else
-            throw std::invalid_argument("Event passed to cldnn_set_event should be an user event");
-    });
-}
-
-void cldnn_add_event_handler(cldnn_event event, cldnn_event_handler handler, void* param, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(handler, "Handler");
-        SHOULD_NOT_BE_NULL(event, "Event");
-        api_cast(event)->add_event_handler(handler, param);
-    });
-}
-
-void cldnn_get_event_profiling_info(cldnn_event event,
-                                    cldnn_profiling_interval* profiling,
-                                    size_t size,
-                                    size_t* size_ret,
-                                    cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(event, "Event");
-        if (!profiling && !size_ret) {
-            if (status)
-                *status = CLDNN_INVALID_ARG;
-            return;
-        }
-        auto& profiling_info = api_cast(event)->get_profiling_info();
-        if (size_ret)
-            *size_ret = profiling_info.size();
-        if (profiling != nullptr) {
-            if (size != profiling_info.size()) {
-                if (status)
-                    *status = CLDNN_INVALID_ARG;
-                return;
-            }
-            size_t i = 0;
-            for (auto& info : profiling_info) {
-                profiling[i].name = info.name;
-                profiling[i].nanoseconds = info.nanoseconds;
-                ++i;
-            }
-        }
-    });
-}
-
-void cldnn_get_primitives_info(cldnn_network network,
-                               const cldnn_primitive_info** info,
-                               size_t size,
-                               size_t* size_ret,
-                               cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        if (!info && !size_ret) {
-            if (status)
-                *status = CLDNN_INVALID_ARG;
-            return;
-        }
-        auto& primitives_info = api_cast(network)->get_primitives_info();
-        if (size_ret)
-            *size_ret = primitives_info.size();
-
-        if (info != nullptr) {
-            if (size != primitives_info.size()) {
-                if (status)
-                    *status = CLDNN_INVALID_ARG;
-                return;
-            }
-            size_t i = 0;
-            for (auto& pi : primitives_info) {
-                info[i] = pi.get_dto();
-                ++i;
-            }
-        }
-    });
-}
-
-void cldnn_get_optimizer_passes_info(cldnn_network network,
-                                     const cldnn_primitive_info** info,
-                                     int* pass_sizes,
-                                     char* pass_names,
-                                     size_t total_size,
-                                     size_t* total_size_ret,
-                                     size_t* pass_count_ret,
-                                     size_t* pass_names_total_size_ret,
-                                     cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        if ((!info || !pass_sizes || !pass_names) &&
-            (!total_size_ret || !pass_count_ret || !pass_names_total_size_ret)) {
-            if (status)
-                *status = CLDNN_INVALID_ARG;
-            return;
-        }
-
-        auto& opt_passes_info = api_cast(network)->get_optimizer_passes_info();
-        size_t pi_total_size = 0;
-        size_t names_total_size = 0;
-        std::vector<primitive_id> names;
-        for (auto& step : opt_passes_info) {
-            pi_total_size += step.second.size();
-            names_total_size += step.first.size() + 1;
-            names.push_back(step.first);
-        }
-
-        if (total_size_ret && pass_count_ret && pass_names_total_size_ret) {
-            *total_size_ret = pi_total_size;
-            *pass_count_ret = opt_passes_info.size();
-
-            primitive_id_vector_to_char_array(pass_names, 0, pass_names_total_size_ret, status, names);
-            // Function should return invalid arg when it is used to get output size, so reset it to success
-            *status = CLDNN_SUCCESS;
-        }
-
-        if (info != nullptr && pass_sizes != nullptr && pass_names != nullptr) {
-            if (total_size != pi_total_size) {
-                if (status)
-                    *status = CLDNN_INVALID_ARG;
-                return;
-            }
-
-            primitive_id_vector_to_char_array(pass_names,
-                                              *pass_names_total_size_ret,
-                                              pass_names_total_size_ret,
-                                              status,
-                                              names);
-
-            if (*status != CLDNN_SUCCESS)
-                return;
-
-            size_t step_idx = 0;
-            size_t global_off = 0;
-            for (auto& step : opt_passes_info) {
-                for (auto& pi : step.second) {
-                    info[global_off] = pi.get_dto();
-                    global_off++;
-                }
-                pass_sizes[step_idx++] = static_cast<int>(step.second.size());
-            }
-        }
-    });
-}
-
-cldnn_program cldnn_build_program(cldnn_engine engine,
-                                  cldnn_topology topology,
-                                  cldnn_build_option* options,
-                                  size_t options_num,
-                                  cldnn_status* status) {
-    return exception_handler<cldnn_program>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        SHOULD_NOT_BE_NULL(topology, "Topology");
-        cldnn::build_options options_obj(cldnn::array_ref<cldnn_build_option>(options, options_num));
-
-        cldnn::program_impl* prog = api_cast(engine)->build_program(*api_cast(topology), options_obj).detach();
-        return api_cast(prog);
-    });
-}
-
-void cldnn_retain_program(cldnn_program program, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(program, "Program");
-        api_cast(program)->add_ref();
-    });
-}
-
-void cldnn_release_program(cldnn_program program, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(program, "Program");
-        api_cast(program)->release();
-    });
-}
-
-cldnn_network cldnn_allocate_network(cldnn_program program, uint16_t stream_id, cldnn_status* status) {
-    return exception_handler<cldnn_network>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(program, "Program");
-        network_impl* p = api_cast(program)->get_engine().allocate_network(*api_cast(program), stream_id).detach();
-        return api_cast(p);
-    });
-}
-
-cldnn_network cldnn_build_network(cldnn_engine engine,
-                                  cldnn_topology topology,
-                                  cldnn_build_option* options,
-                                  size_t options_num,
-                                  cldnn_status* status) {
-    cldnn_program program = cldnn_build_program(engine, topology, options, options_num, status);
-    if (!program)
-        return nullptr;
-
-    cldnn_network network = cldnn_allocate_network(program, 0, status);
-    cldnn_release_program(program, nullptr);
-    return network;
-}
-
-void cldnn_retain_network(cldnn_network network, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        api_cast(network)->add_ref();
-    });
-}
-
-void cldnn_release_network(cldnn_network network, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        api_cast(network)->release();
-    });
-}
-
-void cldnn_set_network_input(cldnn_network network, cldnn_primitive_id id, cldnn_memory mem, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(mem, "Mem");
-        auto mem_size = api_cast(mem)->size();
-        SHOULD_NOT_BE_NULL(network, "Network");
-        SHOULD_NOT_BE_NULL(id, "Id");
-        SHOULD_NOT_EQUAL_0(mem_size, "Memory size");
-        api_cast(network)->set_input_data(id, *api_cast(mem));
-    });
-}
-
-void cldnn_set_learning_rate(cldnn_network network, float lr, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() { api_cast(network)->set_learning_rate(lr); });
-}
-
-float cldnn_get_learning_rate(cldnn_network network, cldnn_status* status) {
-    return exception_handler<float>(CLDNN_ERROR, status, 0, [&]() { return api_cast(network)->get_learning_rate(); });
-}
-
-cldnn_engine cldnn_get_network_engine(cldnn_network network, cldnn_status* status) {
-    return exception_handler<cldnn_engine>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        refcounted_obj_ptr<cldnn::engine_impl> ptr{&api_cast(network)->get_engine()};
-        return api_cast(ptr.detach());
-    });
-}
-
-cldnn_program cldnn_get_network_program(cldnn_network network, cldnn_status* status) {
-    return exception_handler<cldnn_program>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        refcounted_obj_ptr<cldnn::program_impl> ptr{
-            const_cast<cldnn::program_impl*>(&api_cast(network)->get_program())};
-        return api_cast(ptr.detach());
-    });
-}
-
-void cldnn_get_primitive_info(cldnn_network network,
-                              cldnn_primitive_id prim_id,
-                              char* info,
-                              size_t size,
-                              size_t* size_ret,
-                              cldnn_status* status) {
-    return exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        const auto& prim_info = api_cast(network)->get_primitive_info(prim_id);
-        *size_ret = prim_info.size() + 1;
-
-        if (size < *size_ret) {
-            if (status)
-                *status = CLDNN_INVALID_ARG;
-            return;
-        }
-
-        size_t i = 0;
-        for (const auto c : prim_info) {
-            info[i++] = c;
-            assert(i < size);
-        }
-        info[i] = 0;  // final zero symbol
-    });
-}
-
-void cldnn_get_network_output_names(cldnn_network network,
-                                    char* names,
-                                    size_t size,
-                                    size_t* size_ret,
-                                    cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        auto&& output_ids = api_cast(network)->get_output_ids();
-        SHOULD_NOT_EQUAL_0(output_ids.size(), "Output size");
-        primitive_id_vector_to_char_array(names, size, size_ret, status, output_ids);
-    });
-}
-
-void cldnn_get_network_executed_primitive_names(cldnn_network network,
-                                                char* names,
-                                                size_t size,
-                                                size_t* size_ret,
-                                                cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        auto&& primitive_ids = api_cast(network)->get_executed_primitive_ids();
-        primitive_id_vector_to_char_array(names, size, size_ret, status, primitive_ids);
-    });
-}
-
-void cldnn_get_network_all_primitive_names(cldnn_network network,
-                                           char* names,
-                                           size_t size,
-                                           size_t* size_ret,
-                                           cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        auto&& primitive_ids = api_cast(network)->get_all_primitive_ids();
-        SHOULD_NOT_EQUAL_0(primitive_ids.size(), "Primitives size");
-        primitive_id_vector_to_char_array(names, size, size_ret, status, primitive_ids);
-    });
-}
-
-void cldnn_get_network_all_primitive_org_names(cldnn_network network,
-                                               char* names,
-                                               size_t size,
-                                               size_t* size_ret,
-                                               cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        auto&& primitive_ids = api_cast(network)->get_all_primitive_org_ids();
-        SHOULD_NOT_EQUAL_0(primitive_ids.size(), "Primitives size");
-        primitive_id_vector_to_char_array(names, size, size_ret, status, primitive_ids);
-    });
-}
-
-void cldnn_execute_network(cldnn_network network, cldnn_event* dependencies, size_t deps_num, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        std::vector<cldnn::refcounted_obj_ptr<cldnn::event_impl>> deps;
-        deps.reserve(deps_num);
-        for (size_t i = 0; i < deps_num; i++) {
-            deps.emplace_back(api_cast(dependencies[i]));
-        }
-
-        api_cast(network)->execute(deps);
-    });
-}
-
-cldnn_network_output cldnn_get_network_output(cldnn_network network, const char* name, cldnn_status* status) {
-    cldnn_network_output error_result = {nullptr, nullptr};
-    return exception_handler<cldnn_network_output>(CLDNN_ERROR, status, error_result, [&]() -> cldnn_network_output {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        SHOULD_NOT_BE_NULL(name, "ID of primitive");
-        cldnn::primitive_id id(name);
-        auto event = api_cast(network)->get_primitive_event(id);
-        auto& mem_result = api_cast(network)->get_primitive(id)->output_memory();
-        refcounted_obj_ptr<cldnn::memory_impl> mem_ptr{&mem_result};
-        return {api_cast(event.detach()), api_cast(mem_ptr.detach())};
-    });
-}
-
-cldnn_memory cldnn_get_network_output_memory(cldnn_network network, const char* name, cldnn_status* status) {
-    cldnn_memory error_result = nullptr;
-    return exception_handler<cldnn_memory>(CLDNN_ERROR, status, error_result, [&]() -> cldnn_memory {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        SHOULD_NOT_BE_NULL(name, "ID of primitive");
-        cldnn::primitive_id id(name);
-        auto& mem_result = api_cast(network)->get_primitive(id)->output_memory();
-        refcounted_obj_ptr<cldnn::memory_impl> mem_ptr{&mem_result};
-        return api_cast(mem_ptr.detach());
-    });
-}
-
-cldnn_event cldnn_get_network_output_event(cldnn_network network, const char* name, cldnn_status* status) {
-    cldnn_event error_result = nullptr;
-    return exception_handler<cldnn_event>(CLDNN_ERROR, status, error_result, [&]() -> cldnn_event {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        SHOULD_NOT_BE_NULL(name, "ID of primitive");
-        cldnn::primitive_id id(name);
-        auto event = api_cast(network)->get_primitive_event(id);
-        return api_cast(event.detach());
-    });
-}
-
-cldnn_memory cldnn_allocate_memory(cldnn_engine engine, cldnn_layout layout, uint16_t stream_id, cldnn_status* status) {
-    return exception_handler<cldnn_memory>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(engine, "Engine");
-        if (layout.format < cldnn_format_any || layout.format >= cldnn_format_format_num)
-            throw std::invalid_argument("Unknown format of layout.");
-        if (layout.data_type != cldnn_data_type::cldnn_f16 && layout.data_type != cldnn_data_type::cldnn_f32 &&
-            layout.data_type != cldnn_data_type::cldnn_i8 && layout.data_type != cldnn_data_type::cldnn_u8 &&
-            layout.data_type != cldnn_data_type::cldnn_bin && layout.data_type != cldnn_data_type::cldnn_i32 &&
-            layout.data_type != cldnn_data_type::cldnn_i64)
-            throw std::invalid_argument("Unknown data_type of layout.");
-
-        cldnn::memory_impl* mem_ptr = api_cast(engine)->allocate_memory((cldnn::layout)layout, stream_id).detach();
-        return api_cast(mem_ptr);
-    });
-}
-
-cldnn_memory cldnn_attach_memory(cldnn_layout layout,
-                                 void* pointer,
-                                 size_t size,
-                                 uint16_t stream_id,
-                                 cldnn_status* status) {
-    return exception_handler<cldnn_memory>(CLDNN_ERROR, status, nullptr, [&]() {
-        cldnn::layout layout_obj(layout);
-        if (layout_obj.bytes_count() > size)
-            throw std::invalid_argument("buffer size does not match layout size");
-        return api_cast(new cldnn::simple_attached_memory(layout_obj, pointer, stream_id));
-    });
-}
-
-CLDNN_API int32_t cldnn_is_the_same_buffer(cldnn_memory mem1, cldnn_memory mem2, cldnn_status* status) {
-    return static_cast<int32_t>(exception_handler<bool>(CLDNN_ERROR, status, false, [&]() {
-        SHOULD_NOT_BE_NULL(mem1, "Memory");
-        SHOULD_NOT_BE_NULL(mem2, "Memory");
-
-        if (mem1 == mem2)
-            return true;
-
-        if (api_cast(mem1)->get_engine() != api_cast(mem2)->get_engine())
-            return false;
-
-        // memories were allocated by the user so just check if pointers match
-        if (!api_cast(mem1)->get_engine())
-            return api_cast(mem1)->lock() == api_cast(mem2)->lock();
-
-        // memories were allocated by the engine so let it decide whether they refer to the same buffer
-        return api_cast(mem1)->get_engine()->is_the_same_buffer(*api_cast(mem1), *api_cast(mem2));
-    }));
-}
-
-void cldnn_retain_memory(cldnn_memory memory, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(memory, "Memory");
-        api_cast(memory)->add_ref();
-    });
-}
-
-void cldnn_release_memory(cldnn_memory memory, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(memory, "Memory");
-        api_cast(memory)->release();
-    });
-}
-
-void* cldnn_lock_memory(cldnn_memory memory, cldnn_status* status) {
-    return exception_handler<void*>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(memory, "Memory");
-        return api_cast(memory)->lock();
-    });
-}
-
-void cldnn_unlock_memory(cldnn_memory memory, cldnn_status* status) {
-    exception_handler(CLDNN_ERROR, status, [&]() {
-        SHOULD_NOT_BE_NULL(memory, "Memory");
-        api_cast(memory)->unlock();
-    });
-}
-
-cldnn_layout cldnn_get_memory_layout(cldnn_memory memory, cldnn_status* status) {
-    cldnn_layout error_result = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, {0, 0, 0, 0});
-
-    return exception_handler<cldnn_layout>(CLDNN_ERROR, status, error_result, [&]() {
-        SHOULD_NOT_BE_NULL(memory, "Memory");
-        auto memory_size = api_cast(memory)->size();
-        SHOULD_NOT_EQUAL_0(memory_size, "Memory size");
-        return api_cast(memory)->get_layout();
-    });
-}
-
-uint16_t cldnn_get_memory_stream_id(cldnn_memory memory, cldnn_status* status) {
-    return exception_handler<uint16_t>(CLDNN_ERROR, status, 0, [&]() {
-        SHOULD_NOT_BE_NULL(memory, "Memory");
-        return api_cast(memory)->get_stream_id();
-    });
-}
-
-uint16_t cldnn_get_network_stream_id(cldnn_network network, cldnn_status* status) {
-    return exception_handler<uint16_t>(CLDNN_ERROR, status, 0, [&]() {
-        SHOULD_NOT_BE_NULL(network, "Network");
-        return api_cast(network)->get_stream_id();
-    });
-}
-
-cldnn_engine cldnn_get_memory_engine(cldnn_memory memory, cldnn_status* status) {
-    return exception_handler<cldnn_engine>(CLDNN_ERROR, status, nullptr, [&]() {
-        SHOULD_NOT_BE_NULL(memory, "Memory");
-        auto engine = api_cast(memory)->get_engine();
-        return api_cast(engine.detach());
-    });
-}
-
-const char* cldnn_get_last_error_message() {
-    try {
-        return cldnn::last_err::instance().get_last_error_message().c_str();
-    } catch (...) {
-        return "Reading error message failed.";
-    }
-}
+namespace cldnn {
 
-CLDNN_API uint16_t cldnn_float_to_half(float value, cldnn_status* status) {
-    return exception_handler<uint16_t>(CLDNN_ERROR, status, 0, [&]() { return cldnn::float_to_half(value); });
+version_t get_version() {
+    return { CLDNN_VERSION_MAJOR, CLDNN_VERSION_MINOR, CLDNN_VERSION_BUILD, CLDNN_VERSION_REVISION };
 }
 
-CLDNN_API float cldnn_half_to_float(uint16_t value, cldnn_status* status) {
-    return exception_handler<float>(CLDNN_ERROR, status, 0.0f, [&]() { return cldnn::half_to_float(value); });
 }
-
-} /* extern "C" */
-
-#define PRIMITIVE_TYPE_ID_CALL_IMPL(PType)                                                       \
-    namespace cldnn {                                                                            \
-    primitive_type_id PType##_type_id();                                                         \
-    }                                                                                            \
-    extern "C" CLDNN_API cldnn_primitive_type_id cldnn_##PType##_type_id(cldnn_status* status) { \
-        return exception_handler<cldnn_primitive_type_id>(CLDNN_ERROR, status, nullptr, []() {   \
-            return cldnn::PType##_type_id();                                                     \
-        });                                                                                      \
-    }
-
-PRIMITIVE_TYPE_ID_CALL_IMPL(activation)
-PRIMITIVE_TYPE_ID_CALL_IMPL(activation_grad)
-PRIMITIVE_TYPE_ID_CALL_IMPL(arg_max_min)
-PRIMITIVE_TYPE_ID_CALL_IMPL(average_unpooling)
-PRIMITIVE_TYPE_ID_CALL_IMPL(batch_norm)
-PRIMITIVE_TYPE_ID_CALL_IMPL(batch_norm_grad)
-PRIMITIVE_TYPE_ID_CALL_IMPL(border)
-PRIMITIVE_TYPE_ID_CALL_IMPL(broadcast)
-PRIMITIVE_TYPE_ID_CALL_IMPL(convolution)
-PRIMITIVE_TYPE_ID_CALL_IMPL(crop)
-PRIMITIVE_TYPE_ID_CALL_IMPL(data)
-PRIMITIVE_TYPE_ID_CALL_IMPL(embed)
-PRIMITIVE_TYPE_ID_CALL_IMPL(mutable_data)
-PRIMITIVE_TYPE_ID_CALL_IMPL(deconvolution)
-PRIMITIVE_TYPE_ID_CALL_IMPL(concatenation)
-PRIMITIVE_TYPE_ID_CALL_IMPL(eltwise)
-PRIMITIVE_TYPE_ID_CALL_IMPL(fully_connected)
-PRIMITIVE_TYPE_ID_CALL_IMPL(fused_conv_bn_scale)
-PRIMITIVE_TYPE_ID_CALL_IMPL(fused_conv_eltwise)
-PRIMITIVE_TYPE_ID_CALL_IMPL(input_layout)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lookup_table)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lrn)
-PRIMITIVE_TYPE_ID_CALL_IMPL(max_unpooling)
-PRIMITIVE_TYPE_ID_CALL_IMPL(permute)
-PRIMITIVE_TYPE_ID_CALL_IMPL(pooling)
-PRIMITIVE_TYPE_ID_CALL_IMPL(reorder)
-PRIMITIVE_TYPE_ID_CALL_IMPL(reshape)
-PRIMITIVE_TYPE_ID_CALL_IMPL(scale)
-PRIMITIVE_TYPE_ID_CALL_IMPL(scale_grad_input)
-PRIMITIVE_TYPE_ID_CALL_IMPL(scale_grad_weights)
-PRIMITIVE_TYPE_ID_CALL_IMPL(softmax)
-PRIMITIVE_TYPE_ID_CALL_IMPL(region_yolo)
-PRIMITIVE_TYPE_ID_CALL_IMPL(reorg_yolo)
-PRIMITIVE_TYPE_ID_CALL_IMPL(proposal)
-PRIMITIVE_TYPE_ID_CALL_IMPL(roi_pooling)
-PRIMITIVE_TYPE_ID_CALL_IMPL(prior_box)
-PRIMITIVE_TYPE_ID_CALL_IMPL(detection_output)
-PRIMITIVE_TYPE_ID_CALL_IMPL(detection_output_sort)
-PRIMITIVE_TYPE_ID_CALL_IMPL(normalize)
-PRIMITIVE_TYPE_ID_CALL_IMPL(generic_layer)
-PRIMITIVE_TYPE_ID_CALL_IMPL(custom_gpu_primitive)
-PRIMITIVE_TYPE_ID_CALL_IMPL(split)
-PRIMITIVE_TYPE_ID_CALL_IMPL(upsampling)
-PRIMITIVE_TYPE_ID_CALL_IMPL(convolution_grad_weights)
-PRIMITIVE_TYPE_ID_CALL_IMPL(apply_adam)
-PRIMITIVE_TYPE_ID_CALL_IMPL(mvn)
-PRIMITIVE_TYPE_ID_CALL_IMPL(fully_connected_grad_input)
-PRIMITIVE_TYPE_ID_CALL_IMPL(fully_connected_grad_weights)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lstm)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lstm_gemm)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lstm_elt)
-PRIMITIVE_TYPE_ID_CALL_IMPL(softmax_loss_grad)
-PRIMITIVE_TYPE_ID_CALL_IMPL(tile)
-PRIMITIVE_TYPE_ID_CALL_IMPL(gemm)
-PRIMITIVE_TYPE_ID_CALL_IMPL(select)
-PRIMITIVE_TYPE_ID_CALL_IMPL(index_select)
-PRIMITIVE_TYPE_ID_CALL_IMPL(condition)
-PRIMITIVE_TYPE_ID_CALL_IMPL(pyramid_roi_align)
-PRIMITIVE_TYPE_ID_CALL_IMPL(contract)
-PRIMITIVE_TYPE_ID_CALL_IMPL(one_hot)
-PRIMITIVE_TYPE_ID_CALL_IMPL(gather)
-PRIMITIVE_TYPE_ID_CALL_IMPL(depth_to_space)
-PRIMITIVE_TYPE_ID_CALL_IMPL(shuffle_channels)
-PRIMITIVE_TYPE_ID_CALL_IMPL(strided_slice)
-PRIMITIVE_TYPE_ID_CALL_IMPL(reverse_sequence)
-PRIMITIVE_TYPE_ID_CALL_IMPL(binary_convolution)
-PRIMITIVE_TYPE_ID_CALL_IMPL(quantize)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lstm_dynamic)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lstm_dynamic_input)
-PRIMITIVE_TYPE_ID_CALL_IMPL(lstm_dynamic_timeloop)
-PRIMITIVE_TYPE_ID_CALL_IMPL(reduce)
-PRIMITIVE_TYPE_ID_CALL_IMPL(deformable_interp)
-PRIMITIVE_TYPE_ID_CALL_IMPL(deformable_conv)
index 3c0f465..16ae97c 100644 (file)
@@ -24,7 +24,7 @@
 #include <list>
 
 namespace cldnn {
-primitive_type_id concatenation_type_id() {
+primitive_type_id concatenation::type_id() {
     static primitive_type_base<concatenation> instance;
     return &instance;
 }
index 7104f71..75dc52c 100644 (file)
@@ -20,7 +20,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id condition_type_id() {
+primitive_type_id condition::type_id() {
     static primitive_type_base<condition> instance;
     return &instance;
 }
index 25fe80f..28a90c6 100644 (file)
@@ -22,7 +22,7 @@
 #include <set>
 
 namespace cldnn {
-primitive_type_id contract_type_id() {
+primitive_type_id contract::type_id() {
     static primitive_type_base<contract> instance;
     return &instance;
 }
index b15d070..1e2ef2a 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id convolution_type_id() {
+primitive_type_id convolution::type_id() {
     static primitive_type_base<convolution> instance;
     return &instance;
 }
@@ -46,6 +46,10 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
     auto input_type = input_layout.data_type;
     auto output_type = node.get_primitive()->output_data_type ? *node.get_primitive()->output_data_type : input_type;
 
+    if (node.has_fused_primitives()) {
+        output_type = node.get_fused_output_layout().data_type;
+    }
+
     // TODO: Consider moving general parameter verification to arguments constructor.
     CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(),
                                    "Stride spatial X",
@@ -173,11 +177,6 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
             CLDNN_ERROR_MESSAGE(node.id(),
                                 "Number of filters (OFM) for winograd 2x3 convolution should be divisable by 32");
 
-        if (node.get_primitive()->with_activation)
-            CLDNN_ERROR_MESSAGE(node.id(),
-                                "Winograd 2x3 convolution should not have activation fused - activation should be "
-                                "performed at transformation from winograd domain stage");
-
         CLDNN_ERROR_LESS_THAN(node.id(),
                               "input width",
                               input_layout.size.spatial[0],
@@ -235,6 +234,10 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
                            desc->output_size.spatial[0],
                            desc->output_size.spatial[1],
                            desc->output_size.spatial[2]);
+        if (output_type == data_types::bin) {
+            return {output_type, format::b_fs_yx_32fp, output_size};
+        }
+
         return {output_type, input_layout.format, output_size};
     }
 
@@ -254,6 +257,11 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
                                 output_range.spatial[1],
                                 output_range.spatial[2]);
 
+
+    if (output_type == data_types::bin) {
+        return {output_type, format::b_fs_yx_32fp, output_size};
+    }
+
     // due to performance reason for using fs_bs_yx_bsv4_fsv32 first convolution have 3 features, so first conv layer
     // will take byxf and return fs_bs_yx_bsv4_fsv32
     if (input_layout.data_type == data_types::i8 && input_layout.format == format::byx8_f4 &&
@@ -271,7 +279,6 @@ std::string convolution_inst::to_string(convolution_node const& node) {
     auto groups = node.get_groups();
     auto dilation = desc->dilation;
     auto node_info = node.desc_to_json();
-    auto activation = desc->with_activation ? " true" : "false";
 
     std::stringstream primitive_description;
 
@@ -283,20 +290,9 @@ std::string convolution_inst::to_string(convolution_node const& node) {
     conv_info.add("split", split);
     conv_info.add("groups", groups);
     conv_info.add("dilation", dilation.to_string());
-    conv_info.add("with activation", activation);
-    conv_info.add("slope", desc->activation_negative_slope);
     conv_info.add("deformable_groups", desc->deformable_groups);
     conv_info.add("groups", desc->groups);
 
-    size_t index = 0;
-    for (auto& fused_desc :  node.get_fused_primitives()) {
-        json_composite fused_node_info;
-        fused_node_info.add("id", fused_desc.prim->id);
-        fused_node_info.add("dependencies", fused_desc.deps);
-        fused_node_info.add("dep start_idx", fused_desc.dep_start_idx);
-        conv_info.add("fused primitive idx " + std::to_string(index++), fused_node_info);
-    }
-
     if (desc->with_output_size) {
         json_composite ud_out_size_info;
         ud_out_size_info.add("size", desc->output_size.to_string());
@@ -343,7 +339,13 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias feature[0]",
                                   bias_inst.size.feature[0],
-                                  "expected size of feature",
+                                  "expected feature map number",
+                                  output_size.feature[0] / split,
+                                  "Bias/fm mismatch");
+            CLDNN_ERROR_NOT_EQUAL(node.id(),
+                                  "Bias spatial[2]",
+                                  bias_inst.size.spatial[2],
+                                  "expected size of spatial[2]",
                                   1,
                                   "Biases isn't 1D vector.");
             CLDNN_ERROR_NOT_EQUAL(node.id(),
@@ -353,18 +355,11 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
                                   1,
                                   "Biases isn't 1D vector.");
             CLDNN_ERROR_NOT_EQUAL(node.id(),
-                                  "Bias spatial[2]",
-                                  bias_inst.size.spatial[2],
-                                  "expected size of spatial[2]",
-                                  1,
-                                  "Biases isn't 1D vector.");
-
-            CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[0]",
                                   bias_inst.size.spatial[0],
-                                  "expected feature map number",
-                                  output_size.feature[0] / split,
-                                  "Bias/fm mismatch");
+                                  "expected size of spatial[0]",
+                                  1,
+                                  "Biases isn't 1D vector.");
         }
 
         auto input_offset = argument.input_offset;
index 4db541e..3a5a02a 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id convolution_grad_weights_type_id() {
+primitive_type_id convolution_grad_weights::type_id() {
     static primitive_type_base<convolution_grad_weights> instance;
     return &instance;
 }
@@ -156,7 +156,7 @@ convolution_grad_weights_inst::typed_primitive_inst(network_impl& network, convo
                                   "Bias feature[0]",
                                   bias_inst.size.feature[0],
                                   "dimension size",
-                                  1,
+                                  input_grad_inst.size.feature[0] / split,
                                   "Feature[0] of bias should be 1. Bias isn't 1D vector.");
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[1]",
@@ -169,7 +169,7 @@ convolution_grad_weights_inst::typed_primitive_inst(network_impl& network, convo
                                   "Bias spatial[0]",
                                   bias_inst.size.spatial[0],
                                   "input_grad feature size / split",
-                                  input_grad_inst.size.feature[0] / split,
+                                  1,
                                   "Biases/output feature maps number does not match.");
         }
         CLDNN_ERROR_NOT_EQUAL(node.id(),
index 1a6ee1a..b60643b 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id crop_type_id() {
+primitive_type_id crop::type_id() {
     static primitive_type_base<crop> instance;
     return &instance;
 }
@@ -84,20 +84,10 @@ crop_inst::typed_primitive_inst(network_impl& network, crop_node const& node) :
     const auto& ref_in_sizes = argument.reference_input;
     const auto in_layout = node.input().get_output_layout();
     const auto& in_sizes = in_layout.size;
-    const auto in_format = in_layout.format;
     const auto& offsets = argument.offsets;
     tensor null_tensor {};
     tensor value_tensor { 1, 1, 1, 1, 1 };
 
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(),
-                                  "Input format",
-                                  in_format.value,
-                                  "supported crop input formats",
-                                  format::yxfb,
-                                  format::bfyx,
-                                  format::fyxb,
-                                  format::bfzyx);
-
     // Check for borders variant of crop.
     if (ref_in_sizes.batch[0] < 0 || ref_in_sizes.feature[0] < 0 || ref_in_sizes.spatial[0] < 0 ||
         ref_in_sizes.spatial[1] < 0 || ref_in_sizes.spatial[2] < 0) {
index a64a69e..53aba6f 100644 (file)
@@ -23,7 +23,7 @@
 
 namespace cldnn {
 
-primitive_type_id custom_gpu_primitive_type_id() {
+primitive_type_id custom_gpu_primitive::type_id() {
     static primitive_type_base<custom_gpu_primitive> instance;
     return &instance;
 }
@@ -36,7 +36,7 @@ std::string custom_gpu_primitive_inst::to_string(custom_gpu_primitive_node const
 
     json_composite custom_gpu_prim_info;
     custom_gpu_prim_info.add("entry point", desc->kernel_entry_point);
-    custom_gpu_prim_info.add("kernels code", desc->kernels_code.ref());
+    custom_gpu_prim_info.add("kernels code", desc->kernels_code);
     custom_gpu_prim_info.add("build options", desc->build_options);
     custom_gpu_prim_info.add("gws", desc->gws);
     custom_gpu_prim_info.add("lws", desc->lws);
index b2c818a..a2d2d03 100644 (file)
@@ -25,7 +25,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id data_type_id() {
+primitive_type_id data::type_id() {
     static primitive_type_base<data> instance;
     return &instance;
 }
@@ -45,7 +45,7 @@ memory_impl::ptr attach_or_copy_data(network_impl& network, memory_impl& mem) {
 }  // namespace
 
 data_node::typed_program_node(const std::shared_ptr<data> dprim, program_impl& prog)
-    : parent(dprim, prog), mem(api_cast(dprim->mem.get())) {
+    : parent(dprim, prog), mem(dprim->mem.get()) {
     constant = true;
     can_share_buffer(false);
     recalc_output_layout(false);
index 1b50c41..26880df 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id deconvolution_type_id() {
+primitive_type_id deconvolution::type_id() {
     static primitive_type_base<deconvolution> instance;
     return &instance;
 }
@@ -106,7 +106,6 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) {
     auto strd = desc->stride;
     auto split = desc->split();
     auto node_info = node.desc_to_json();
-    auto activation = desc->with_activation ? " true" : "false";
 
     std::stringstream primitive_description;
     std::stringstream ss_weights, ss_biases;
@@ -133,8 +132,6 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) {
     deconv_info.add("stride", strd.to_string());
     deconv_info.add("input offset", desc->input_offset.to_string());
     deconv_info.add("split", split);
-    deconv_info.add("with activation", activation);
-    deconv_info.add("slope", desc->activation_negative_slope);
     if (desc->with_output_size) {
         json_composite ud_out_size_info;
         ud_out_size_info.add("size", desc->output_size.to_string());
@@ -182,9 +179,15 @@ deconvolution_inst::typed_primitive_inst(network_impl& network, deconvolution_no
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias feature[0]",
                                   bias_inst.size.feature[0],
+                                  "output feature size / split",
+                                  output_size.feature[0] / split,
+                                  "Biases/output feature maps number does not match.");
+            CLDNN_ERROR_NOT_EQUAL(node.id(),
+                                  "Bias spatial[2]",
+                                  bias_inst.size.spatial[2],
                                   "dimension size",
                                   1,
-                                  "Feature[0] of bias should be 1. Bias isn't 1D vector.");
+                                  "Spatial[2] of bias should be 1. Bias isn't 1D vector.");
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[1]",
                                   bias_inst.size.spatial[1],
@@ -192,18 +195,11 @@ deconvolution_inst::typed_primitive_inst(network_impl& network, deconvolution_no
                                   1,
                                   "Spatial[1] of bias should be 1. Bias isn't 1D vector.");
             CLDNN_ERROR_NOT_EQUAL(node.id(),
-                                  "Bias spatial[2]",
-                                  bias_inst.size.spatial[1],
-                                  "dimension size",
-                                  1,
-                                  "Spatial[2] of bias should be 1. Bias isn't 1D vector.");
-
-            CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[0]",
                                   bias_inst.size.spatial[0],
-                                  "output feature size / split",
-                                  output_size.feature[0] / split,
-                                  "Biases/output feature maps number does not match.");
+                                  "dimension size",
+                                  1,
+                                  "Spatial[0] of bias should be 1. Bias isn't 1D vector.");
         }
         CLDNN_ERROR_NOT_EQUAL(node.id(),
                               "deconvolution padding filling value",
index 4d4c54b..25aabb6 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id deformable_conv_type_id() {
+primitive_type_id deformable_conv::type_id() {
     static primitive_type_base<deformable_conv> instance;
     return &instance;
 }
@@ -69,8 +69,7 @@ std::string deformable_conv_inst::to_string(deformable_conv_node const& node) {
 deformable_conv_inst::typed_primitive_inst(network_impl& network, deformable_conv_node const& node) : parent(network, node) {
 }
 
-
-primitive_type_id deformable_interp_type_id() {
+primitive_type_id deformable_interp::type_id() {
     static primitive_type_base<deformable_interp> instance;
     return &instance;
 }
index c9d1cdf..5949313 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id depth_to_space_type_id() {
+primitive_type_id depth_to_space::type_id() {
     static primitive_type_base<depth_to_space> instance;
     return &instance;
 }
index 6b83ca6..a14ee45 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id detection_output_type_id() {
+primitive_type_id detection_output::type_id() {
     static primitive_type_base<detection_output> instance;
     return &instance;
 }
@@ -205,7 +205,7 @@ detection_output_inst::typed_primitive_inst(network_impl& network, detection_out
 
 /************************ Detection Output keep_top_k part ************************/
 
-primitive_type_id detection_output_sort_type_id() {
+primitive_type_id detection_output_sort::type_id() {
     static primitive_type_base<detection_output_sort> instance;
     return &instance;
 }
index 9bd5baa..9cfddcb 100644 (file)
@@ -24,7 +24,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id eltwise_type_id() {
+primitive_type_id eltwise::type_id() {
     static primitive_type_base<eltwise> instance;
     return &instance;
 }
@@ -36,10 +36,14 @@ layout eltwise_inst::calc_output_layout(eltwise_node const& node) {
     auto input_node_layout = node.input().get_non_padded_output_layout();
 
     auto size = input_node_layout.size;
+    auto format = input_node_layout.format;
     for (size_t i = 1; i < node.inputs_count(); i++) {
-        size = tensor::max(size, node.input(i).get_non_padded_output_layout().size);
+        auto l = node.input(i).get_non_padded_output_layout();
+        size = tensor::max(size, l.size);
+        if (l.format == format::bfzyx_f16)  // use optimized 5D
+            format = format::bfzyx_f16;
     }
-    auto output_layout = layout(input_node_layout.data_type, input_node_layout.format, size);
+    auto output_layout = layout(input_node_layout.data_type, format, size);
     auto mode = node.get_primitive()->mode;
     // list of operations supported for integer types
     if (input_node_layout.data_type == data_types::i8 || input_node_layout.data_type == data_types::u8 ||
@@ -76,8 +80,6 @@ layout eltwise_inst::calc_output_layout(eltwise_node const& node) {
                                                     eltwise_mode::logic_xor};
     if (std::find(eltwise_bool_modes.begin(), eltwise_bool_modes.end(), mode) != eltwise_bool_modes.end()) {
         output_layout.data_type = data_types::i8;
-        if (node.get_primitive()->with_activation)
-            CLDNN_ERROR_MESSAGE(node.id(), "Activations are not supported for logical operations.");
     }
 
     auto eltw = std::static_pointer_cast<const eltwise>((node.get_primitive()));
@@ -111,7 +113,6 @@ static inline std::string stringify_vector(const std::vector<float>& v) {
 std::string eltwise_inst::to_string(eltwise_node const& node) {
     auto node_info = node.desc_to_json();
     auto desc = node.get_primitive();
-    auto activation = desc->with_activation ? " true" : "false";
 
     std::stringstream primitive_description;
     std::string str_mode;
@@ -187,10 +188,6 @@ std::string eltwise_inst::to_string(eltwise_node const& node) {
     if (desc->mode == eltwise_mode::sum) {
         eltwise_info.add("coefficients", stringify_vector(desc->coefficients));
     }
-    if (desc->with_activation) {
-        eltwise_info.add("with activation", activation);
-        eltwise_info.add("slope", desc->activation_negative_slope);
-    }
     node_info->add("eltwise info", eltwise_info);
     node_info->dump(primitive_description);
 
index d7b793b..e45b289 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id embed_type_id() {
+primitive_type_id embed::type_id() {
     static primitive_type_base<embed> instance;
     return &instance;
 }
index d99592a..77617ff 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 #include "gpu/ocl_toolkit.h"
 #include "gpu/memory_gpu.h"
 #include "gpu/ocl_user_event.h"
+#include "gpu/register_gpu.hpp"
 #include <string>
 #include <vector>
 #include <memory>
 #include <set>
+#include <stdexcept>
 
 namespace cldnn {
+
+engine::engine(engine_types type, uint32_t engine_num, const engine_configuration& configuration)
+    : _impl(new engine_impl(configuration)) {
+    if (type != engine_types::ocl)
+        throw std::invalid_argument("Invalid engine type, should be ocl.");
+
+    if (engine_num > 0)
+        throw std::invalid_argument("Invalid engine index, should be 0.");
+}
+
+uint32_t engine::engine_count(engine_types type) {
+    if (type == engine_types::ocl) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+void engine::release_pending_memory(uint16_t stream_id) const {
+    _impl->release_pending_memory(stream_id);
+}
+
+engine_info engine::get_info() const {
+    auto info = _impl->get_engine_info();
+
+    return { info.cores_count,
+             info.core_frequency,
+             info.max_work_group_size,
+             info.max_local_mem_size,
+             info.max_global_mem_size,
+             info.max_alloc_mem_size,
+             info.max_image2d_width,
+             info.max_image2d_height,
+             info.supports_fp16,
+             info.supports_fp16_denorms,
+             info.supports_subgroups_short,
+             info.supports_image,
+             info.supports_imad,
+             info.supports_immad,
+             info.dev_name,
+             info.driver_version
+    };
+}
+
+uint64_t engine::get_max_used_device_memory_size() const {
+    return _impl->get_max_used_device_memory();
+}
+
+uint64_t engine::get_temp_used_device_memory_size() const {
+    return _impl->get_used_device_memory();
+}
+
+engine_types engine::get_type() const {
+    return _impl->type();
+}
+
+void engine::retain() {
+    _impl->add_ref();
+}
+void engine::release() {
+    _impl->release();
+}
+
 using gpu_toolkit_config = gpu::configuration;
 
 gpu_toolkit_config convert_configuration(const engine_configuration conf) {
@@ -40,8 +105,8 @@ gpu_toolkit_config convert_configuration(const engine_configuration conf) {
     result.host_out_of_order = true;  // TODO: enable when barriers in driver will be fixed
     result.log = conf.engine_log;
     result.ocl_sources_dumps_dir = conf.sources_dumps_dir;
-    result.priority_mode = static_cast<cldnn_priority_mode_type>(conf.priority_mode);
-    result.throttle_mode = static_cast<cldnn_throttle_mode_type>(conf.throttle_mode);
+    result.priority_mode = conf.priority_mode;
+    result.throttle_mode = conf.throttle_mode;
     result.queues_num = conf.n_streams;
     result.user_context = static_cast<cl::Context*>(conf.context);
     result.tuning_cache_path = conf.tuning_cache_path;
@@ -49,7 +114,9 @@ gpu_toolkit_config convert_configuration(const engine_configuration conf) {
 }
 
 engine_impl::engine_impl(const engine_configuration& conf)
-    : _configuration(conf), _context(gpu_toolkit::create(convert_configuration(conf))), _memory_pool(*this) {}
+    : _configuration(conf), _context(gpu_toolkit::create(convert_configuration(conf))), _memory_pool(*this) {
+    gpu::register_implementations_gpu();
+}
 
 engine_impl::~engine_impl() {
     /*
@@ -59,7 +126,7 @@ engine_impl::~engine_impl() {
     for (uint16_t s = 0; s < _configuration.n_streams; s++) _context->release_events_pool(s);
 }
 
-memory_impl::ptr engine_impl::allocate_memory(layout layout, uint16_t stream_id) {
+memory_impl::ptr engine_impl::allocate_memory(const layout& layout, uint16_t stream_id) {
     if (stream_id >= this->configuration().n_streams)
         throw std::invalid_argument("Unable to allocate memory object with stream_id=" + std::to_string(stream_id) +
                                     " (available streams num is " + std::to_string(this->configuration().n_streams));
@@ -67,7 +134,7 @@ memory_impl::ptr engine_impl::allocate_memory(layout layout, uint16_t stream_id)
     return _memory_pool.get_memory(layout, stream_id);
 }
 
-memory_impl::ptr engine_impl::allocate_memory(layout layout,
+memory_impl::ptr engine_impl::allocate_memory(const layout& layout,
                                               primitive_id id,
                                               uint32_t network_id,
                                               std::set<primitive_id> dependencies,
@@ -78,15 +145,15 @@ memory_impl::ptr engine_impl::allocate_memory(layout layout,
     return _memory_pool.get_memory(layout, stream_id);
 }
 
-memory_impl::ptr engine_impl::reinterpret_buffer(const memory_impl& memory, layout new_layout) {
+memory_impl::ptr engine_impl::reinterpret_buffer(const memory_impl& memory, const layout& new_layout) {
     if (memory.get_engine() != (const refcounted_obj_ptr<engine_impl>) this)
-        throw error("trying to reinterpret buffer allocated by a different engine", CLDNN_ERROR);
+        throw std::runtime_error("trying to reinterpret buffer allocated by a different engine");
 
     if (new_layout.format.is_image() && !memory.get_layout().format.is_image())
-        throw error("trying to reinterpret non-image buffer as image", CLDNN_ERROR);
+        throw std::runtime_error("trying to reinterpret non-image buffer as image");
 
     if (!new_layout.format.is_image() && memory.get_layout().format.is_image())
-        throw error("trying to reinterpret image buffer as non-image buffer", CLDNN_ERROR);
+        throw std::runtime_error("trying to reinterpret image buffer as non-image buffer");
 
     try {
         if (new_layout.format.is_image_2d()) {
index 76f1b78..f96ce66 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 */
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "api/event.hpp"
 #include "event_impl.h"
 #include "engine_impl.h"
 #include <list>
+#include <vector>
+#include <algorithm>
 
 namespace cldnn {
 
+event event::create_user_event(const engine& engine, uint16_t stream_id) {
+    return event(engine.get()->create_user_event(stream_id).detach());
+}
+
+void event::wait() const {
+    _impl->wait();
+}
+
+void event::set() const {
+    if (auto user_ev = dynamic_cast<user_event*>(_impl))
+        user_ev->set();
+    else
+        throw std::invalid_argument("Event passed to cldnn_set_event should be an user event");
+}
+
+void event::set_event_handler(event_handler handler, void* param) const {
+    _impl->add_event_handler(handler, param);
+}
+
+std::vector<instrumentation::profiling_interval> event::get_profiling_info() const {
+    auto interval_list = _impl->get_profiling_info();
+    std::vector<instrumentation::profiling_interval> result(interval_list.size());
+    std::copy(interval_list.begin(), interval_list.end(), result.begin());
+    return result;
+}
+
+void event::retain() {
+    _impl->add_ref();
+}
+
+void event::release() {
+    _impl->release();
+}
+
 void event_impl::wait() {
     if (_set)
         return;
@@ -40,7 +77,7 @@ bool event_impl::is_set() {
     return _set;
 }
 
-bool event_impl::add_event_handler(cldnn_event_handler handler, void* data) {
+bool event_impl::add_event_handler(event_handler handler, void* data) {
     if (is_set()) {
         handler(data);
         return true;
@@ -55,7 +92,7 @@ bool event_impl::add_event_handler(cldnn_event_handler handler, void* data) {
     return ret;
 }
 
-const std::list<cldnn_profiling_interval>& event_impl::get_profiling_info() {
+const std::list<instrumentation::profiling_interval>& event_impl::get_profiling_info() {
     if (_profiling_captured)
         return _profiling_info;
 
index cbb282c..8460ae6 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id fully_connected_type_id() {
+primitive_type_id fully_connected::type_id() {
     static primitive_type_base<fully_connected> instance;
     return &instance;
 }
@@ -85,14 +85,12 @@ std::string fully_connected_inst::to_string(fully_connected_node const& node) {
     auto node_info = node.desc_to_json();
     auto bias_id = desc->bias != "" ? desc->bias : "no bias";
     auto weights_id = desc->weights;
-    auto activation = desc->with_activation ? " true" : "false";
 
     std::stringstream primitive_description;
 
     json_composite fc_info;
     fc_info.add("weights id", weights_id);
     fc_info.add("bias id", bias_id);
-    fc_info.add("with activation", activation);
 
     node_info->add("fully connected info", fc_info);
     node_info->dump(primitive_description);
index 996cdcd..7c2fc44 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id fully_connected_grad_input_type_id() {
+primitive_type_id fully_connected_grad_input::type_id() {
     static primitive_type_base<fully_connected_grad_input> instance;
     return &instance;
 }
index 44c13ef..7c694e1 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id fully_connected_grad_weights_type_id() {
+primitive_type_id fully_connected_grad_weights::type_id() {
     static primitive_type_base<fully_connected_grad_weights> instance;
     return &instance;
 }
index 33e668a..41e88b3 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id fused_conv_bn_scale_type_id() {
+primitive_type_id fused_conv_bn_scale::type_id() {
     static primitive_type_base<fused_conv_bn_scale> instance;
     return &instance;
 }
@@ -119,7 +119,6 @@ std::string fused_conv_bn_scale_inst::to_string(fused_conv_bn_scale_node const&
     auto strd = desc->stride;
     auto split = node.get_split();
     auto node_info = node.desc_to_json();
-    auto activation = desc->with_activation ? " true" : "false";
 
     std::stringstream primitive_description;
 
@@ -127,8 +126,6 @@ std::string fused_conv_bn_scale_inst::to_string(fused_conv_bn_scale_node const&
     fuse_info.add("stride", strd.to_string());
     fuse_info.add("input offset", desc->input_offset.to_string());
     fuse_info.add("split", split);
-    fuse_info.add("with activation", activation);
-    fuse_info.add("slope", desc->activation_negative_slope);
 
     node_info->add("fused_conv_bn_scale info", fuse_info);
     node_info->dump(primitive_description);
@@ -172,8 +169,8 @@ fused_conv_bn_scale_inst::typed_primitive_inst(network_impl& network, fused_conv
                                   "Bias feature[0]",
                                   bias_inst.size.feature[0],
                                   "expected size of feature",
-                                  1,
-                                  "Biases isn't 1D vector.");
+                                  output_size.feature[0] / split,
+                                  "Bias/fm mismtach");
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[1]",
                                   bias_inst.size.spatial[1],
@@ -184,9 +181,9 @@ fused_conv_bn_scale_inst::typed_primitive_inst(network_impl& network, fused_conv
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[0]",
                                   bias_inst.size.spatial[0],
-                                  "expected feature map number",
-                                  output_size.feature[0] / split,
-                                  "Bias/fm mismtach");
+                                  "expected size of spatial[0]",
+                                  1,
+                                  "Biases isn't 1D vector.");
         }
 
         auto input_offset = argument.input_offset;
index 41dd7d2..c25a100 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id fused_conv_eltwise_type_id() {
+primitive_type_id fused_conv_eltwise::type_id() {
     static primitive_type_base<fused_conv_eltwise> instance;
     return &instance;
 }
@@ -201,11 +201,18 @@ layout fused_conv_eltwise_inst::calc_output_layout(fused_conv_eltwise_node const
                                        "value",
                                        0,
                                        "must be positive(>= 1)");
+        CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(),
+            "User defined output spatial Z",
+            desc->conv.output_size.spatial[2],
+            "value",
+            0,
+            "must be positive(>= 1)");
 
         tensor output_size(input_layout.size.batch[0],
                            number_of_features,
                            desc->conv.output_size.spatial[0],
-                           desc->conv.output_size.spatial[1]);
+                           desc->conv.output_size.spatial[1],
+                           desc->conv.output_size.spatial[2]);
         return {output_type, input_layout.format, output_size};
     }
 
@@ -220,7 +227,8 @@ layout fused_conv_eltwise_inst::calc_output_layout(fused_conv_eltwise_node const
     tensor output_size(input_layout.size.batch[0],
                        number_of_features,
                        output_range.spatial[0],
-                       output_range.spatial[1]);
+                       output_range.spatial[1],
+                       output_range.spatial[2]);
 
     // due to performance reason for using fs_bs_yx_bsv4_fsv32 first convolution have 3 features, so first conv layer
     // will take byxf and return fs_bs_yx_bsv4_fsv32
@@ -296,22 +304,21 @@ fused_conv_eltwise_inst::typed_primitive_inst(network_impl& network, fused_conv_
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias feature[0]",
                                   bias_inst.size.feature[0],
-                                  "expected size of feature",
-                                  1,
-                                  "Biases isn't 1D vector.");
+                                  "expected feature map number",
+                                  output_size.feature[0] / split,
+                                  "Bias/fm mismatch");
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[1]",
                                   bias_inst.size.spatial[1],
                                   "expected size of spatial[1]",
                                   1,
                                   "Biases isn't 1D vector.");
-
             CLDNN_ERROR_NOT_EQUAL(node.id(),
                                   "Bias spatial[0]",
                                   bias_inst.size.spatial[0],
-                                  "expected feature map number",
-                                  output_size.feature[0] / split,
-                                  "Bias/fm mismatch");
+                                  "expected size of spatial[0]",
+                                  1,
+                                  "Biases isn't 1D vector.");
         }
 
         auto input_offset = argument.conv.input_offset;
index 31e6b47..8b0417c 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id gather_type_id() {
+primitive_type_id gather::type_id() {
     static primitive_type_base<gather> instance;
     return &instance;
 }
diff --git a/inference-engine/thirdparty/clDNN/src/gather_tree.cpp b/inference-engine/thirdparty/clDNN/src/gather_tree.cpp
new file mode 100644 (file)
index 0000000..fcf439e
--- /dev/null
@@ -0,0 +1,71 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gather_tree_inst.h"
+
+#include "error_handler.h"
+#include "json_object.h"
+#include "primitive_type_base.h"
+#include <string>
+#include <algorithm>
+
+namespace cldnn {
+primitive_type_id gather_tree::type_id() {
+    static primitive_type_base<gather_tree> instance;
+    return &instance;
+}
+
+layout gather_tree_inst::calc_output_layout(gather_tree_node const& node) {
+    assert(static_cast<bool>(node.get_primitive()->output_data_type) == false &&
+        "Output data type forcing is not supported for gather_tree_node!");
+    auto input_layout = node.input().get_output_layout();
+    return input_layout;
+}
+
+std::string gather_tree_inst::to_string(gather_tree_node const& node) {
+    std::stringstream primitive_description;
+    node.desc_to_json()->dump(primitive_description);
+    return primitive_description.str();
+}
+
+gather_tree_inst::typed_primitive_inst(network_impl& network, gather_tree_node const& node) : parent(network, node) {
+    auto input_layout = node.input().get_output_layout();
+
+    const auto input_format = input_layout.format;
+
+    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(),
+        "Input format",
+        input_format.value,
+        "supported border primitive input formats",
+        format::bfyx,
+        format::yxfb,
+        format::byxf);
+
+    auto dependencies = node.get_dependencies();
+
+    // check input dims
+    CLDNN_ERROR_NOT_EQUAL(node.id(),
+        "input0 size", dependencies.at(0)->get_output_layout().size, "output size", input_layout.size,
+        "mismatch");
+    CLDNN_ERROR_NOT_EQUAL(node.id(),
+        "input1 size", dependencies.at(1)->get_output_layout().size, "output size", input_layout.size,
+        "mismatch");
+    CLDNN_ERROR_NOT_EQUAL(node.id(),
+        "input2 size", dependencies.at(2)->get_output_layout().count(), "node's feature size", input_layout.size.feature.at(0),
+        "There can't be more than one end_token");
+    CLDNN_ERROR_NOT_EQUAL(node.id(),
+        "input3 size", dependencies.at(3)->get_output_layout().size.count(), "one", 1,
+        "There can't be more than one end_token");
+}
+}  // namespace cldnn
index 5944a65..0d4a657 100644 (file)
@@ -24,7 +24,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id gemm_type_id() {
+primitive_type_id gemm::type_id() {
     static primitive_type_base<gemm> instance;
     return &instance;
 }
index 66c63bb..4866481 100644 (file)
@@ -26,7 +26,7 @@
 
 namespace cldnn {
 
-primitive_type_id generic_layer_type_id() {
+primitive_type_id generic_layer::type_id() {
     static primitive_type_base<generic_layer> instance;
     return &instance;
 }
index 7a903f6..674c953 100644 (file)
@@ -21,7 +21,8 @@
 #include "kernel_selector_helper.h"
 #include "activation/activation_kernel_selector.h"
 #include "activation/activation_kernel_base.h"
-#include "api/CPP/activation.hpp"
+#include "api/activation.hpp"
+#include "register_gpu.hpp"
 
 namespace cldnn {
 namespace gpu {
@@ -46,14 +47,14 @@ struct activation_gpu : typed_primitive_gpu_impl<activation> {
         auto activation_optional_params =
             get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
 
-        convert_new_activation_func(arg.get_primitive(), activation_params.activation);
+        convert_new_activation_func(arg.get_primitive(), activation_params.activations);
 
         if (arg.is_parameterized()) {
             const auto& slope_layout = arg.slope_input().get_output_layout();
             const auto& output_layout = arg.get_output_layout();
 
             const auto params_num =
-                kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activation.function);
+                kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
 
             CLDNN_ERROR_LESS_THAN(arg.id(),
                                   "Slope layout size count",
@@ -78,36 +79,37 @@ struct activation_gpu : typed_primitive_gpu_impl<activation> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = activation_gpu::create;
-
-        implementation_map<activation>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw},
-            // block f16 format
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), val_fw},
-            // 3D
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_activation_gpu::attach_activation_gpu() {
+    auto val_fw = activation_gpu::create;
+
+    implementation_map<activation>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw},
+        // block f16 format
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), val_fw},
+        // 3D
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw},
+        { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16), val_fw },
+        { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16), val_fw },
+        { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx_f16), val_fw },
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index b8d3a03..eef28f6 100644 (file)
@@ -21,7 +21,8 @@
 #include "kernel_selector_helper.h"
 #include "activation/activation_kernel_selector.h"
 #include "activation/activation_kernel_base.h"
-#include "api/CPP/activation_grad.hpp"
+#include "api/activation_grad.hpp"
+#include "register_gpu.hpp"
 
 namespace cldnn {
 namespace gpu {
@@ -50,17 +51,13 @@ struct activation_grad_gpu : typed_primitive_gpu_impl<activation_grad> {
 
         activation_grad_params.gradient = true;
         activation_grad_params.inputs.push_back(convert_data_tensor(arg.get_dependency(1).get_output_layout()));
-        activation_grad_params.activation.function =
-            get_kernel_selector_activation_grad_param(primitive->activation_grad_func);
-        activation_grad_params.activation.m = primitive->additional_params.a;
-        activation_grad_params.activation.n = primitive->additional_params.b;
-
+        convert_new_activation_grad_func(primitive, activation_grad_params.activations);
         if (arg.is_parameterized()) {
             const auto& slope_layout = arg.slope_input().get_output_layout();
             const auto& output_layout = arg.get_output_layout();
 
             const auto params_num =
-                kernel_selector::GetActivationAdditionalParamsNumber(activation_grad_params.activation.function);
+                kernel_selector::GetActivationAdditionalParamsNumber(activation_grad_params.activations[0].function);
 
             CLDNN_ERROR_LESS_THAN(arg.id(),
                                   "Slope layout size count",
@@ -85,23 +82,21 @@ struct activation_grad_gpu : typed_primitive_gpu_impl<activation_grad> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = activation_grad_gpu::create;
-
-        implementation_map<activation_grad>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_activation_grad_gpu::attach_activation_grad_gpu() {
+    auto val_fw = activation_grad_gpu::create;
+
+    implementation_map<activation_grad>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index b2a68e2..d7ec074 100644 (file)
@@ -163,21 +163,19 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = apply_adam_gpu::create;
-
-        implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-        implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-        implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_apply_adam_gpu::attach_apply_adam_gpu() {
+    auto val_fw = apply_adam_gpu::create;
+
+    implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+    implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+    implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+    implementation_map<apply_adam>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 7f6244d..91551a8 100644 (file)
@@ -113,9 +113,9 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
+namespace detail {
+
+    attach_arg_max_min_gpu::attach_arg_max_min_gpu() {
         implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
                                              arg_max_min_gpu::create);
         implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
@@ -135,9 +135,7 @@ struct attach {
         implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
                                              arg_max_min_gpu::create);
     }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 998ebcb..b169e4f 100644 (file)
@@ -67,31 +67,29 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                                   average_unpooling_gpu::create);
-        implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                                   average_unpooling_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_average_unpooling_gpu::attach_average_unpooling_gpu() {
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                                average_unpooling_gpu::create);
+    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
+                                                average_unpooling_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 3480e91..3ecdb1e 100644 (file)
@@ -138,21 +138,19 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = batch_norm_gpu::create;
-
-        implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-        implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-        implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_batch_norm_gpu::attach_batch_norm_gpu() {
+    auto val_fw = batch_norm_gpu::create;
+
+    implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+    implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+    implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+    implementation_map<batch_norm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 5dc0c8d..f24054f 100644 (file)
@@ -63,27 +63,25 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = batch_norm_grad_gpu::create;
+namespace detail {
 
-        implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                                 val_fw);
-        implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                 val_fw);
-        implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                 val_fw);
-        implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                 val_fw);
-        implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                                 val_fw);
-        implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                                 val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+attach_batch_norm_grad_gpu::attach_batch_norm_grad_gpu() {
+    auto val_fw = batch_norm_grad_gpu::create;
+
+    implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                                val_fw);
+    implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                                val_fw);
+    implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                                val_fw);
+    implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                                val_fw);
+    implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                                val_fw);
+    implementation_map<batch_norm_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                                val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 314239b..e0b7f13 100644 (file)
@@ -14,8 +14,8 @@
 // limitations under the License.
 */
 
-#include <api/CPP/scale.hpp>
-#include <api/CPP/quantize.hpp>
+#include <api/scale.hpp>
+#include <api/quantize.hpp>
 #include "binary_convolution_inst.h"
 #include "primitive_gpu_base.h"
 #include "implementation_map.h"
@@ -63,12 +63,6 @@ protected:
         kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
 
         args.weights = (memory_impl::cptr) &instance.weights_memory(split);
-        if (instance.has_fused_primitives()) {
-            size_t count = instance.get_fused_mem_count();
-            for (size_t i = 0; i < count; i++) {
-                args.fused_op_inputs.push_back((memory_impl::cptr) &instance.fused_memory(i));
-            }
-        }
         return args;
     }
 
@@ -98,38 +92,13 @@ public:
             get_default_weights_bias_optional_params<kernel_selector::binary_convolution_optional_params>(
                 arg.get_program());
 
-        for (auto& fused_prim : arg.get_fused_primitives()) {
-            using op_type = kernel_selector::binary_convolution_params::fused_operation_desc::Type;
-            kernel_selector::binary_convolution_params::fused_operation_desc desc;
-            if (fused_prim.prim->type == scale::type_id()) {
-                desc.type = op_type::SCALE;
-            } else if (fused_prim.prim->type == quantize::type_id()) {
-                desc.type = op_type::QUANTIZE;
-            } else {
-                CLDNN_ERROR_MESSAGE(arg.id(), "Invalid fused primitive type in binary convolution node");
-            }
-
-            desc.dep_idx_start = fused_prim.dep_start_idx;
-            desc.dep_size = fused_prim.deps.size();
-
-            for (size_t i = desc.dep_idx_start; i < desc.dep_idx_start + desc.dep_size; i++) {
-                desc.tensors.push_back(convert_data_tensor(arg.get_dependency(i).get_output_layout()));
-            }
-
-            if (fused_prim.activation != cldnn_activation_func_t::activation_none) {
-                desc.activation.m = fused_prim.activation_params.a;
-                desc.activation.n = fused_prim.activation_params.b;
-                desc.activation.function = get_kernel_selector_activation_param(fused_prim.activation);
-            }
-            conv_params.fused_ops.push_back(desc);
-        }
-
         const auto additional_offset = tensor::max(input_offset, (tensor) 0);
         if (additional_offset != (tensor) 0) {
             conv_params.inputs[0] = convert_data_tensor(input_layout, actual_split, additional_offset);
         }
 
         conv_params.pad_value = primitive->pad_value;
+        conv_params.out_dt = to_data_type(*primitive->output_data_type);
         conv_params.depthwise_separable_opt = depthwise_separable_opt;
         conv_params.split = static_cast<uint32_t>(split);
         conv_params.groups = static_cast<uint32_t>(groups);
@@ -170,16 +139,14 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<binary_convolution>::add(
-            std::make_tuple(engine_types::ocl, data_types::bin, format::b_fs_yx_32fp),
-            binary_convolution_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_binary_convolution_gpu::attach_binary_convolution_gpu() {
+    implementation_map<binary_convolution>::add(
+        std::make_tuple(engine_types::ocl, data_types::bin, format::b_fs_yx_32fp),
+        binary_convolution_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 596ed88..6b0a92d 100644 (file)
@@ -70,31 +70,37 @@ struct border_gpu : typed_primitive_gpu_impl<border> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = border_gpu::create;
-
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
-
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-        implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-    }
-    ~attach() = default;
-};
-
-attach attach_impl;
-
-}  // namespace
+namespace detail {
+
+attach_border_gpu::attach_border_gpu() {
+    auto val_fw = border_gpu::create;
+
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
+
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
+
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
+
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
+    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 680bea9..2ed7e36 100644 (file)
@@ -67,29 +67,25 @@ struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = broadcast_gpu::create;
+namespace detail {
 
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-        implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
-    }
-    ~attach() = default;
-};
+attach_broadcast_gpu::attach_broadcast_gpu() {
+    auto val_fw = broadcast_gpu::create;
 
-attach attach_impl;
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
+    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 93f4f4c..2048e9b 100644 (file)
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #if FP16_SUPPORTED
     #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #endif
index a615454..0d27d98 100644 (file)
@@ -28,8 +28,8 @@ command_queues_builder::command_queues_builder(const cl::Context& context,
     : _context(context),
       _device(device),
       _platform_id(platform_id),
-      _priority_mode(cldnn_priority_disabled),
-      _throttle_mode(cldnn_throttle_disabled) {}
+      _priority_mode(priority_mode_types::disabled),
+      _throttle_mode(throttle_mode_types::disabled) {}
 
 cl_command_queue_properties command_queues_builder::get_properties() {
     cl_command_queue_properties ret =
@@ -40,7 +40,7 @@ cl_command_queue_properties command_queues_builder::get_properties() {
 void command_queues_builder::build() {
     auto properties = get_properties();
 
-    if (_priority_mode == cldnn_priority_disabled && _throttle_mode == cldnn_throttle_disabled) {
+    if (_priority_mode == priority_mode_types::disabled && _throttle_mode == throttle_mode_types::disabled) {
         _queue = cl::CommandQueue(_context, _device, properties);
         return;
     }
@@ -48,10 +48,10 @@ void command_queues_builder::build() {
     unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
 
     switch (_priority_mode) {
-        case cldnn_priority_high:
+        case priority_mode_types::high:
             cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
             break;
-        case cldnn_priority_low:
+        case priority_mode_types::low:
             cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
             break;
         default:
@@ -61,10 +61,10 @@ void command_queues_builder::build() {
     unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR;
 
     switch (_throttle_mode) {
-        case cldnn_throttle_high:
+        case throttle_mode_types::high:
             cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR;
             break;
-        case cldnn_throttle_low:
+        case throttle_mode_types::low:
             cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR;
             break;
         default:
@@ -73,7 +73,7 @@ void command_queues_builder::build() {
 
     cl_int error_code = CL_SUCCESS;
 
-    if (_priority_mode != cldnn_priority_disabled && _throttle_mode != cldnn_throttle_disabled) {
+    if (_priority_mode != priority_mode_types::disabled && _throttle_mode != throttle_mode_types::disabled) {
         cl_queue_properties properties_low[] = {CL_QUEUE_PRIORITY_KHR,
                                                 cl_queue_priority_value,
                                                 CL_QUEUE_THROTTLE_KHR,
@@ -83,7 +83,7 @@ void command_queues_builder::build() {
                                                 0};
 
         _queue = clCreateCommandQueueWithProperties(_context.get(), _device.get(), properties_low, &error_code);
-    } else if (_priority_mode != cldnn_priority_disabled) {
+    } else if (_priority_mode != priority_mode_types::disabled) {
         cl_queue_properties properties_low[] = {CL_QUEUE_PRIORITY_KHR,
                                                 cl_queue_priority_value,
                                                 CL_QUEUE_PROPERTIES,
@@ -91,7 +91,7 @@ void command_queues_builder::build() {
                                                 0};
 
         _queue = clCreateCommandQueueWithProperties(_context.get(), _device.get(), properties_low, &error_code);
-    } else if (_throttle_mode != cldnn_throttle_disabled) {
+    } else if (_throttle_mode != throttle_mode_types::disabled) {
         cl_queue_properties properties_low[] = {CL_QUEUE_THROTTLE_KHR,
                                                 cl_queue_throttle_value,
                                                 CL_QUEUE_PROPERTIES,
@@ -107,8 +107,8 @@ void command_queues_builder::build() {
     }
 }
 
-void command_queues_builder::set_priority_mode(cldnn_priority_mode_type priority, bool extension_support) {
-    if (priority != cldnn_priority_disabled && !extension_support) {
+void command_queues_builder::set_priority_mode(priority_mode_types priority, bool extension_support) {
+    if (priority != priority_mode_types::disabled && !extension_support) {
         CLDNN_ERROR_MESSAGE("Command queues builders - priority_mode",
                             std::string("The param priority_mode is set in engine_configuration, ")
                             .append("but cl_khr_priority_hints or cl_khr_create_command_queue ")
@@ -117,8 +117,8 @@ void command_queues_builder::set_priority_mode(cldnn_priority_mode_type priority
     _priority_mode = priority;
 }
 
-void command_queues_builder::set_throttle_mode(cldnn_throttle_mode_type throttle, bool extension_support) {
-    if (throttle != cldnn_throttle_disabled && !extension_support) {
+void command_queues_builder::set_throttle_mode(throttle_mode_types throttle, bool extension_support) {
+    if (throttle != throttle_mode_types::disabled && !extension_support) {
         CLDNN_ERROR_MESSAGE("Command queues builders - throttle_mode",
                             std::string("The param throttle_mode is set in engine_configuration, ")
                             .append("but cl_khr_throttle_hints is not supported by current OpenCL implementation."));
index ba4d5a8..397ae85 100644 (file)
@@ -24,8 +24,8 @@ class command_queues_builder {
 public:
     command_queues_builder(const cl::Context& context, const cl::Device& device, const cl_platform_id& platform_id);
     void build();
-    void set_throttle_mode(cldnn_throttle_mode_type throttle, bool extension_support);
-    void set_priority_mode(cldnn_priority_mode_type priority, bool extension_support);
+    void set_throttle_mode(throttle_mode_types throttle, bool extension_support);
+    void set_priority_mode(priority_mode_types priority, bool extension_support);
     void set_profiling(bool flag) { _profiling = flag; }
     void set_out_of_order(bool flag) { _out_of_order = flag; }
     cl::CommandQueue& queue() { return _queue; }
@@ -38,8 +38,8 @@ private:
     cl_platform_id _platform_id;
     bool _profiling;
     bool _out_of_order;
-    cldnn_priority_mode_type _priority_mode;
-    cldnn_throttle_mode_type _throttle_mode;
+    priority_mode_types _priority_mode;
+    throttle_mode_types _throttle_mode;
 
     cl_command_queue_properties get_properties();
 };
index a9c8958..c002a2f 100644 (file)
@@ -101,55 +101,56 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<concatenation>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create},
-            // 5D
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), concatenation_gpu::create},
-            // block f16 format
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), concatenation_gpu::create},
-            // MMAD
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), concatenation_gpu::create},
-            // 6D
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), concatenation_gpu::create},
-            {std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), concatenation_gpu::create},
-        });
-    }
-    ~attach() {}
-};
-}  // namespace
-
-attach attach_impl;
+namespace detail {
+
+attach_concatenation_gpu::attach_concatenation_gpu() {
+    implementation_map<concatenation>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create},
+        // 5D
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), concatenation_gpu::create},
+        { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16), concatenation_gpu::create },
+        { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16), concatenation_gpu::create },
+        { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx_f16), concatenation_gpu::create },
+        { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx_f16), concatenation_gpu::create },
+        { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx_f16), concatenation_gpu::create },
+        // block f16 format
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), concatenation_gpu::create},
+        // MMAD
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), concatenation_gpu::create},
+        // 6D
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), concatenation_gpu::create},
+    });
+}
 
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 12b4a30..37ae7ad 100644 (file)
@@ -16,6 +16,7 @@
 #include "network_impl.h"
 #include "implementation_map.h"
 #include "math_utils.h"
+#include "register_gpu.hpp"
 
 #include <algorithm>
 #include <vector>
@@ -119,17 +120,15 @@ private:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                           condition_gpu::create);
-        implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                           condition_gpu::create);
-    }
-    ~attach() = default;
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_condition_gpu::attach_condition_gpu() {
+    implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                        condition_gpu::create);
+    implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                        condition_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index ef90c93..9561270 100644 (file)
@@ -23,6 +23,7 @@ namespace gpu {
 configuration::configuration()
     : enable_profiling(false),
       meaningful_kernels_names(false),
+      dump_custom_program(false),
       device_type(gpu),
       device_vendor(0x8086),
       compiler_options(""),
@@ -30,6 +31,9 @@ configuration::configuration()
       host_out_of_order(false),
       log(""),
       ocl_sources_dumps_dir(""),
+      priority_mode(priority_mode_types::disabled),
+      throttle_mode(throttle_mode_types::disabled),
+      queues_num(0),
       user_context(nullptr),
       tuning_cache_path("cache.json") {}
 }  // namespace gpu
index ad85086..01debd5 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 #include <string>
-#include "api/C/cldnn.h"
+#include "api/cldnn.hpp"
+#include "api/engine.hpp"
 
 namespace cl {
 class Context;
 }
 namespace cldnn {
 namespace gpu {
+
 struct configuration {
     enum device_types { default_device = 0, cpu, gpu, accelerator };
 
@@ -39,8 +41,8 @@ struct configuration {
     bool host_out_of_order;
     std::string log;
     std::string ocl_sources_dumps_dir;
-    cldnn_priority_mode_type priority_mode;
-    cldnn_throttle_mode_type throttle_mode;
+    priority_mode_types priority_mode;
+    throttle_mode_types throttle_mode;
     uint16_t queues_num;
     cl::Context* user_context;
     std::string tuning_cache_path;
index ebc2c10..4459e49 100644 (file)
@@ -68,23 +68,19 @@ struct contract_gpu : typed_primitive_gpu_impl<contract> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = contract_gpu::create;
+namespace detail {
 
-        implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-        implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-        implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-        implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-    }
-    ~attach() = default;
-};
+attach_contract_gpu::attach_contract_gpu() {
+    auto val_fw = contract_gpu::create;
 
-attach attach_impl;
+    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 3ef88d2..b1de8f3 100644 (file)
@@ -63,12 +63,6 @@ protected:
         args.output_calibration_factors = (memory_impl::cptr)
             (instance.output_calibration_factors_term() ? &instance.output_calibration_factors_memory(split) : nullptr);
 
-        if (instance.has_fused_primitives()) {
-            size_t count = instance.get_fused_mem_count();
-            for (size_t i = 0; i < count; i++) {
-                args.fused_op_inputs.push_back((memory_impl::cptr) &instance.fused_memory(i));
-            }
-        }
         return args;
     }
 
@@ -108,7 +102,8 @@ public:
         // Plugin should always pass weights in goiyx or goizyx layout for forward conv as a single input
         // cldnn optimizer then should transform this layouts to other ones and split if necessary
         // This WA is required to keep correct logical size of tensors for weights reorder
-        if (conv_params.inputs[0].GetLayout() == kernel_selector::DataLayout::bfyx_f16) {
+        if (conv_params.inputs[0].GetLayout() == kernel_selector::DataLayout::bfyx_f16 ||
+            conv_params.inputs[0].GetLayout() == kernel_selector::DataLayout::fs_b_yx_fsv32) {
             conv_params.weights = convert_weights_tensor(
                 layout(weights_layout.data_type, weights_layout.format,
                 {weights_layout.size.batch[0], weights_layout.size.feature[0], weights_layout.size.spatial[0], weights_layout.size.spatial[1]}));
@@ -122,9 +117,6 @@ public:
                                     additional_offset);
         }
 
-        if (primitive->with_activation)
-            convert_activation_func_params(primitive, conv_params.activation);
-
         if (primitive->deformable_mode) {
             conv_params.inputs.push_back(convert_data_tensor(arg.trans().get_output_layout()));
             conv_params.deformable_mode = true;
@@ -169,29 +161,8 @@ public:
             }
         }
 
-        for (auto& fused_prim : arg.get_fused_primitives()) {
-            using op_type = kernel_selector::convolution_params::fused_operation_desc::Type;
-            kernel_selector::convolution_params::fused_operation_desc desc;
-            if (fused_prim.prim->type == eltwise::type_id()) {
-                desc.type = op_type::ELTWISE;
-            } else {
-                CLDNN_ERROR_MESSAGE(arg.id(), "Invalid fused primitive type in convolution node");
-            }
-
-            desc.dep_idx_start = fused_prim.dep_start_idx;
-            desc.dep_size = fused_prim.deps.size();
-
-            for (size_t i = desc.dep_idx_start; i < desc.dep_idx_start + desc.dep_size; i++) {
-                desc.tensors.push_back(convert_data_tensor(arg.get_dependency(i).get_output_layout()));
-            }
-
-            if (fused_prim.activation != cldnn_activation_func_t::activation_none) {
-                desc.activation.m = fused_prim.activation_params.a;
-                desc.activation.n = fused_prim.activation_params.b;
-                desc.activation.function = get_kernel_selector_activation_param(fused_prim.activation);
-            }
-            conv_params.fused_ops.push_back(desc);
-        }
+        if (arg.get_output_layout().format == format::bfzyx_f16)
+            conv_optional_params.allowInputReordering = true;
 
         auto& kernel_selector = kernel_selector::convolution_kernel_selector::Instance();
 
@@ -214,67 +185,69 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(
-            std::make_tuple(engine_types::ocl, data_types::f32, format::winograd_2x3_s1_data),
-            convolution_gpu::create);
-        implementation_map<convolution>::add(
-            std::make_tuple(engine_types::ocl, data_types::f16, format::winograd_2x3_s1_data),
+namespace detail {
+
+attach_convolution_gpu::attach_convolution_gpu() {
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(
+        std::make_tuple(engine_types::ocl, data_types::f32, format::winograd_2x3_s1_data),
+        convolution_gpu::create);
+    implementation_map<convolution>::add(
+        std::make_tuple(engine_types::ocl, data_types::f16, format::winograd_2x3_s1_data),
+        convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bf8_xy16),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bf8_xy16),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                         convolution_gpu::create);
+    // block f16 format
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16),
+                                         convolution_gpu::create);
+    // MMAD
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byx8_f4),
+                                         convolution_gpu::create);
+
+    implementation_map<convolution>::add(
+        std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32),
+        convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16),
+                                         convolution_gpu::create);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16),
             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bf8_xy16),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bf8_xy16),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                             convolution_gpu::create);
-        // block f16 format
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16),
-                                             convolution_gpu::create);
-        // MMAD
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byx8_f4),
-                                             convolution_gpu::create);
-
-        implementation_map<convolution>::add(
-            std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32),
-            convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
-                                             convolution_gpu::create);
-        implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32),
-                                             convolution_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 577f0a7..3eeb83d 100644 (file)
@@ -164,31 +164,29 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<convolution_grad_weights>::add(
-            std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-            convolution_grad_weights_gpu::create);
-        implementation_map<convolution_grad_weights>::add(
-            std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-            convolution_grad_weights_gpu::create);
-        implementation_map<convolution_grad_weights>::add(
-            std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-            convolution_grad_weights_gpu::create);
-        implementation_map<convolution_grad_weights>::add(
-            std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-            convolution_grad_weights_gpu::create);
-        implementation_map<convolution_grad_weights>::add(
-            std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-            convolution_grad_weights_gpu::create);
-        implementation_map<convolution_grad_weights>::add(
-            std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-            convolution_grad_weights_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_convolution_grad_weights_gpu::attach_convolution_grad_weights_gpu() {
+    implementation_map<convolution_grad_weights>::add(
+        std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+        convolution_grad_weights_gpu::create);
+    implementation_map<convolution_grad_weights>::add(
+        std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+        convolution_grad_weights_gpu::create);
+    implementation_map<convolution_grad_weights>::add(
+        std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+        convolution_grad_weights_gpu::create);
+    implementation_map<convolution_grad_weights>::add(
+        std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+        convolution_grad_weights_gpu::create);
+    implementation_map<convolution_grad_weights>::add(
+        std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+        convolution_grad_weights_gpu::create);
+    implementation_map<convolution_grad_weights>::add(
+        std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+        convolution_grad_weights_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 025a606..b1293ba 100644 (file)
@@ -60,47 +60,49 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = crop_gpu::create;
+namespace detail {
 
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-        implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    }
-    ~attach() {}
-};
+attach_crop_gpu::attach_crop_gpu() {
+    auto val_fw = crop_gpu::create;
 
-attach attach_impl;
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx_f16), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx_f16), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx_f16), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx_f16), val_fw);
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 945b42e..39aae2f 100644 (file)
@@ -22,6 +22,7 @@
 #include "engine_impl.h"
 #include "jitter.h"
 #include "error_handler.h"
+#include "register_gpu.hpp"
 
 #include <map>
 #include <sstream>
@@ -62,13 +63,13 @@ struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
     }
 };
 
-static kernel_selector::kernel_argument_element get_arg(cldnn_arg arg) {
+static kernel_selector::kernel_argument_element get_arg(custom_gpu_primitive::arg_desc arg) {
     kernel_selector::kernel_argument_element ret;
-    switch (arg.arg_type) {
-        case arg_input:
+    switch (arg.type) {
+        case custom_gpu_primitive::arg_input:
             ret.t = kernel_selector::kernel_argument_types::INPUT;
             break;
-        case arg_output:
+        case custom_gpu_primitive::arg_output:
             ret.t = kernel_selector::kernel_argument_types::OUTPUT;
             break;
         default:
@@ -87,7 +88,7 @@ std::string value_macro(const std::string& name, const std::string& value) {
     return oss.str();
 }
 
-static void add_layout_to_jit(kernel_selector::jit_constants& mem_consts, const std::string& name, layout l) {
+static void add_layout_to_jit(kernel_selector::jit_constants& mem_consts, const std::string& name, const layout& l) {
     // Size (in elements)
     // #define INPUT0_DIMS (uint[]) { b, f, y, x, }
     mem_consts.AddConstant(kernel_selector::MakeJitConstant(name + "_DIMS", l.size.sizes(format::bfyx)));
@@ -212,12 +213,12 @@ static primitive_impl* create(const custom_gpu_primitive_node& arg) {
 
     return new custom_gpu_primitive_gpu(arg, cl_kernel);
 }
-
-namespace {
-struct attach {
-    attach() { implementation_map<custom_gpu_primitive>::add({{cldnn::engine_types::ocl, create}}); }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
 }  // namespace neural
+
+namespace cldnn { namespace gpu { namespace detail {
+
+attach_custom_gpu_primitive_gpu::attach_custom_gpu_primitive_gpu() {
+    implementation_map<custom_gpu_primitive>::add({{cldnn::engine_types::ocl, neural::create}});
+}
+
+} } }  // namespace cldnn::gpu::detail
index 7e4bff3..d74ec05 100644 (file)
@@ -117,9 +117,6 @@ public:
         auto deconv_optional_params =
             get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
 
-        if (primitive->with_activation)
-            convert_activation_func_params(primitive, deconv_params.activation);
-
         deconv_params.depthwise_separable_opt = depthwise_separable_opt;
 
         deconv_params.split = split;
@@ -158,29 +155,31 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                               deconvolution_gpu::create);
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                               deconvolution_gpu::create);
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                               deconvolution_gpu::create);
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                               deconvolution_gpu::create);
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                               deconvolution_gpu::create);
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                               deconvolution_gpu::create);
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                               deconvolution_gpu::create);
-        implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                               deconvolution_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_deconvolution_gpu::attach_deconvolution_gpu() {
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                           deconvolution_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 39d0b57..6365482 100644 (file)
@@ -150,22 +150,22 @@ public:
     }
 };
 
-namespace {
+namespace detail {
 
-struct attach {
-    attach() {
-        implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+attach_deformable_conv_gpu::attach_deformable_conv_gpu() {
+    implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
                                              deformable_conv_gpu::create);
-        implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+    implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
                                              deformable_conv_gpu::create);
-        implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                             deformable_interp_gpu::create);
-        implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                             deformable_interp_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+}
+
+attach_deformable_interp_gpu::attach_deformable_interp_gpu() {
+    implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                               deformable_interp_gpu::create);
+    implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                               deformable_interp_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index c9a0645..b0cb6ee 100644 (file)
@@ -52,18 +52,16 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = depth_to_space_gpu::create;
-        implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                val_fw);
-        implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                val_fw);
-    }
-    ~attach() = default;
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_depth_to_space_gpu::attach_depth_to_space_gpu() {
+    auto val_fw = depth_to_space_gpu::create;
+    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                            val_fw);
+    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                            val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index c3e0a47..b799e1b 100644 (file)
@@ -144,24 +144,21 @@ primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kerne
     return new detection_output_sort_gpu(arg, kernel);
 }
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                  detection_output_gpu::create);
-        implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                  detection_output_gpu::create);
-        implementation_map<detection_output_sort>::add(
-            std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-            detection_output_sort_gpu::create);
-        implementation_map<detection_output_sort>::add(
-            std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-            detection_output_sort_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_detection_output_gpu::attach_detection_output_gpu() {
+    implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                              detection_output_gpu::create);
+    implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                              detection_output_gpu::create);
+    implementation_map<detection_output_sort>::add(
+        std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+        detection_output_sort_gpu::create);
+    implementation_map<detection_output_sort>::add(
+        std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+        detection_output_sort_gpu::create);
+}
 
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index b086560..910dcc4 100644 (file)
@@ -101,8 +101,6 @@ public:
         }
 
         const auto& primitive = arg.get_primitive();
-        if (primitive->with_activation)
-            convert_activation_func_params(primitive, ew_params.activation);
 
         ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
                                          kernel_selector::eltwise_params::InputType::Buffer(1)},
@@ -215,45 +213,48 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<eltwise>::add(
-            {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create},
-             // block f16
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), eltwise_gpu::create},
-             // 3D
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create},
-             // MMAD
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create},
-             //
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create}});
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_eltwise_gpu::attach_eltwise_gpu() {
+    implementation_map<eltwise>::add(
+        {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create},
+         // block f16
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), eltwise_gpu::create},
+         // 3D
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx_f16), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx_f16), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx_f16), eltwise_gpu::create},
+         // MMAD
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create},
+         //
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create}});
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 8223372..36abcd3 100644 (file)
@@ -24,7 +24,7 @@
 #include "embed/embed_kernel_selector.h"
 #include "embed/embed_params.h"
 
-#include "api/CPP/input_layout.hpp"
+#include "api/input_layout.hpp"
 #include <vector>
 
 namespace cldnn {
@@ -73,18 +73,14 @@ struct embed_gpu : typed_primitive_gpu_impl<embed> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<embed>::add(
-            {{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), embed_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), embed_gpu::create}});
-    }
-    ~attach() {}
-};
+namespace detail {
 
-attach attach_impl;
-}  // namespace
+attach_embed_gpu::attach_embed_gpu() {
+    implementation_map<embed>::add(
+        {{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), embed_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), embed_gpu::create}});
+}
 
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 83e361d..4ed1db4 100644 (file)
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #include "engine_info.h"
 #include "ocl_toolkit.h"
 #include <unordered_map>
index 0b6e38c..3affa33 100644 (file)
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #pragma once
 #include <cstdint>
 #include <memory>
-#include "api/CPP/engine.hpp"
+#include "api/engine.hpp"
 #include "document.h"
 #include <string>
 
@@ -26,8 +25,6 @@ namespace gpu {
 class gpu_toolkit;
 struct engine_info_internal : cldnn::engine_info {
     std::string dev_id;
-    std::string driver_version;
-    std::string dev_name;
     std::uint32_t compute_units_count;
     std::shared_ptr<rapidjson::Document> device_cache;
 
index 8837cd0..da62374 100644 (file)
@@ -37,8 +37,10 @@ protected:
 
     event_impl::ptr get_from_pool(std::shared_ptr<gpu_toolkit>& ctx) {
         for (auto& ev : _events) {
-            if (!ev->is_valid())
+            if (!ev->is_valid()) {
+                ev->reset();
                 return ev;
+            }
         }
         const event_impl::ptr ev_impl { new Type(ctx), false };
         return allocate(ev_impl);
index e8c6d98..cff3da6 100644 (file)
@@ -27,8 +27,8 @@
 #include "error_handler.h"
 #include "kernel_runner.h"
 
-#include "api/CPP/reorder.hpp"
-#include "api/CPP/input_layout.hpp"
+#include "api/reorder.hpp"
+#include "api/input_layout.hpp"
 #include <memory>
 
 namespace cldnn {
@@ -40,8 +40,8 @@ struct fully_connected_gpu : typed_primitive_gpu_impl<fully_connected> {
 
 protected:
     kernel::kernel_arguments_data get_arguments(typed_primitive_inst<fully_connected>& instance,
-                                                        int32_t) const override {
-        kernel::kernel_arguments_data args;
+                                                        int32_t split) const override {
+        kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
 
         args.inputs = {(memory_impl::cptr) &instance.input_memory()};
         args.output = (memory_impl::cptr) &instance.output_memory();
@@ -63,9 +63,6 @@ public:
                 arg.get_program());
         fc_optional_params.allowInputReordering = true;
 
-        if (arg.get_primitive()->with_activation)
-            convert_activation_func_params(arg.get_primitive(), fc_params.activation);
-
         fc_params.output = fc_params.output.FlattenFeatureAndSpatials();
 
         const auto primitive = arg.get_primitive();
@@ -104,33 +101,31 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = fully_connected_gpu::create;
-
-        implementation_map<fully_connected>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
-            // MMAD
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), val_fw},
-            // IMAD
-            {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw},
-            // fs_b_yx_fsv32
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_fully_connected_gpu::attach_fully_connected_gpu() {
+    auto val_fw = fully_connected_gpu::create;
+
+    implementation_map<fully_connected>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
+        // MMAD
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), val_fw},
+        // IMAD
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw},
+        // fs_b_yx_fsv32
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index ed20ed2..2f20ac2 100644 (file)
@@ -21,7 +21,7 @@
 #include "kernel_selector_helper.h"
 #include "fully_connected_grad_input/fully_connected_grad_input_kernel_selector.h"
 #include "fully_connected_grad_input/fully_connected_grad_input_kernel_base.h"
-#include "api/CPP/fully_connected_grad_input.hpp"
+#include "api/fully_connected_grad_input.hpp"
 
 namespace cldnn {
 namespace gpu {
@@ -66,23 +66,21 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = fully_connected_grad_input_gpu::create;
+namespace detail {
 
-        implementation_map<fully_connected_grad_input>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+attach_fully_connected_grad_input_gpu::attach_fully_connected_grad_input_gpu() {
+    auto val_fw = fully_connected_grad_input_gpu::create;
+
+    implementation_map<fully_connected_grad_input>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
index 1c6da4a..aeb8aca 100644 (file)
@@ -22,7 +22,7 @@
 #include "kernel_selector_helper.h"
 #include "fully_connected_grad_weights/fully_connected_grad_weights_kernel_selector.h"
 #include "fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.h"
-#include "api/CPP/fully_connected_grad_weights.hpp"
+#include "api/fully_connected_grad_weights.hpp"
 
 namespace cldnn {
 namespace gpu {
@@ -93,23 +93,21 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = fully_connected_grad_weights_gpu::create;
-
-        implementation_map<fully_connected_grad_weights>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_fully_connected_grad_weights_gpu::attach_fully_connected_grad_weights_gpu() {
+    auto val_fw = fully_connected_grad_weights_gpu::create;
+
+    implementation_map<fully_connected_grad_weights>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
index 2fa5173..abe58e1 100644 (file)
@@ -110,9 +110,6 @@ public:
         fuse_params.fused_in_training = arg.is_fused_in_training();
         fuse_params.scale_bias = arg.scale_bias_term();
 
-        if (primitive->with_activation)
-            convert_activation_func_params(primitive, fuse_params.activation);
-
         fuse_params.split = split;
         fuse_params.filterSize = {
             (uint32_t)weights_size.spatial[0],
@@ -148,17 +145,15 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<fused_conv_bn_scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                     fused_conv_bn_scale_gpu::create);
-        implementation_map<fused_conv_bn_scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                     fused_conv_bn_scale_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_fused_conv_bn_scale_gpu::attach_fused_conv_bn_scale_gpu() {
+    implementation_map<fused_conv_bn_scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                                 fused_conv_bn_scale_gpu::create);
+    implementation_map<fused_conv_bn_scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                                 fused_conv_bn_scale_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 41538a9..3230480 100644 (file)
@@ -107,7 +107,7 @@ public:
         }
 
         if (primitive->conv.with_activation) {
-            convert_activation_func_params(&primitive->conv, fused_params.conv.activation);
+            convert_activation_func_params(&primitive->conv, fused_params.conv.activations);
         }
 
         fused_params.conv.depthwise_separable_opt = depthwise_separable_opt;
@@ -121,13 +121,15 @@ public:
         conv_params.filterSize = {
             (uint32_t)weights_size.spatial[0],
             (uint32_t)weights_size.spatial[1],
+            (uint32_t)weights_size.spatial[2],
         };
 
         conv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
-                               (uint32_t)std::max(-input_offset.spatial[1], 0)};
+                               (uint32_t)std::max(-input_offset.spatial[1], 0),
+                               (uint32_t)std::max(-input_offset.spatial[2], 0) };
 
-        conv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
-        conv_params.dilation = {(uint32_t)dilation.spatial[0], (uint32_t)dilation.spatial[1]};
+        conv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
+        conv_params.dilation = {(uint32_t)dilation.spatial[0], (uint32_t)dilation.spatial[1], (uint32_t)dilation.spatial[2] };
 
         if (primitive->conv.weights_quantization_factors.size() > 0) {
             conv_params.int8_quantization = true;
@@ -192,34 +194,42 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                    fused_conv_eltwise_gpu::create);
-        implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                    fused_conv_eltwise_gpu::create);
-        implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                    fused_conv_eltwise_gpu::create);
-        implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                                    fused_conv_eltwise_gpu::create);
-        implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                                    fused_conv_eltwise_gpu::create);
-        // MMAD
-        implementation_map<fused_conv_eltwise>::add(
-            std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32),
-            fused_conv_eltwise_gpu::create);
-        // IMAD
-        implementation_map<fused_conv_eltwise>::add(
-            std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
-            fused_conv_eltwise_gpu::create);
-        implementation_map<fused_conv_eltwise>::add(
-            std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
-            fused_conv_eltwise_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_fused_conv_eltwise_gpu::attach_fused_conv_eltwise_gpu() {
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
+                                                fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16),
+                                                fused_conv_eltwise_gpu::create);
+    // MMAD
+    implementation_map<fused_conv_eltwise>::add(
+        std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32),
+        fused_conv_eltwise_gpu::create);
+    // IMAD
+    implementation_map<fused_conv_eltwise>::add(
+        std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
+        fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(
+        std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
+        fused_conv_eltwise_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 912fe7a..e96690f 100644 (file)
@@ -69,16 +69,14 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = gather_gpu::create;
-        implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    }
-    ~attach() = default;
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_gather_gpu::attach_gather_gpu() {
+    auto val_fw = gather_gpu::create;
+    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp
new file mode 100644 (file)
index 0000000..604d28d
--- /dev/null
@@ -0,0 +1,63 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gather_tree_inst.h"
+
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "gather_tree/gather_tree_kernel_selector.h"
+#include "gather_tree/gather_tree_kernel_base.h"
+#include "error_handler.h"
+namespace cldnn {
+namespace gpu {
+
+struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
+    using parent = typed_primitive_gpu_impl<gather_tree>;
+    using parent::parent;
+
+    static primitive_impl* create(const gather_tree_node& arg) {
+        auto b_params = get_default_params<kernel_selector::gather_tree_params>(arg, 1);
+        auto b_optional_params =
+            get_default_optional_params<kernel_selector::gather_tree_optional_params>(arg.get_program());
+
+        auto desc = arg.get_primitive();
+
+        auto& kernel_selector = kernel_selector::gather_tree_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+            "Best_kernel.empty()",
+            best_kernels.empty(),
+            "Cannot find a proper kernel with this arguments");
+
+        return new gather_tree_gpu(arg, best_kernels[0]);
+    }
+};
+namespace detail {
+    attach_gather_tree_gpu::attach_gather_tree_gpu() {
+            auto val_fw = gather_tree_gpu::create;
+
+            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
+            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
+
+            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+        }
+
+}  // namespace detail
+}  // namespace gpu
+}  // namespace cldnn
index 4fecd32..7b86852 100644 (file)
@@ -56,22 +56,18 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = gemm_gpu::create;
-        implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-        implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-        implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    }
-    ~attach() = default;
-};
+namespace detail {
 
-attach attach_impl;
+attach_gemm_gpu::attach_gemm_gpu() {
+    auto val_fw = gemm_gpu::create;
+    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 567e7ec..e0d0288 100644 (file)
@@ -20,6 +20,7 @@
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
 #include "engine_impl.h"
+#include "register_gpu.hpp"
 #include <vector>
 
 using namespace cldnn;
@@ -87,11 +88,11 @@ static primitive_impl* create(const generic_layer_node& arg) {
     }
 }
 
-namespace {
-struct attach {
-    attach() { implementation_map<generic_layer>::add({{cldnn::engine_types::ocl, create}}); }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
 }  // namespace neural
+
+namespace cldnn { namespace gpu { namespace detail {
+    attach_generic_layer_gpu::attach_generic_layer_gpu() {
+        implementation_map<generic_layer>::add({ {cldnn::engine_types::ocl, neural::create} });
+    }
+
+} } }  // namespace cldnn::gpu::detail
index cf838bb..f3a184a 100644 (file)
@@ -79,33 +79,29 @@ struct index_select_gpu : typed_primitive_gpu_impl<index_select> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = index_select_gpu::create;
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                              val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                              val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx),
-                                              val_fw);
+namespace detail {
 
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                              val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                              val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
-        implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb),
-                                              val_fw);
-    }
-    ~attach() = default;
-};
+attach_index_select_gpu::attach_index_select_gpu() {
+    auto val_fw = index_select_gpu::create;
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                          val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                          val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx),
+                                          val_fw);
 
-attach attach_impl;
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                          val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                          val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
+    implementation_map<index_select>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb),
+                                          val_fw);
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 2358e8c..94c0514 100644 (file)
@@ -114,7 +114,7 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern
                 int num_of_bias_elements = static_cast<int>(weights_bias_params.bias[0].PhysicalSize());
                 bias_buffers.push_back(engine->allocate_memory({from_data_type(weights_bias_params.bias[0].GetDType()),
                                                                 format::bfyx,
-                                                                tensor(1, 1, num_of_bias_elements, 1)},
+                                                                tensor(1, num_of_bias_elements, 1, 1)},
                                                                0));
             }
             args.bias = bias_buffers[0];
@@ -124,10 +124,10 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern
     args.split = 0;
 }
 
-std::vector<uint64_t> kernel_runner::run_kernels(const kernel_selector::KernelsData& kernels_data) {
+std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_selector::KernelsData& kernels_data) {
     auto context = engine->get_context();
 
-    std::vector<uint64_t> run_times;
+    std::vector<std::chrono::nanoseconds> run_times;
 
     int num_of_kernels_to_run = static_cast<int>(kernels_data.size());
 
@@ -150,7 +150,7 @@ std::vector<uint64_t> kernel_runner::run_kernels(const kernel_selector::KernelsD
         int i = 0;
         for (auto it = batch_start; it < batch_end; it++) {
             std::vector<event_impl::ptr> events;
-            uint64_t kernel_run_time = 0;
+            auto kernel_run_time = std::chrono::nanoseconds::zero();
             int num_of_runs = 0;
 
             for (int iteration = 0; iteration < runs_per_kernel; iteration++) {
@@ -169,8 +169,8 @@ std::vector<uint64_t> kernel_runner::run_kernels(const kernel_selector::KernelsD
                 if (event.get() != NULL) {
                     auto profiling_intervals = event->get_profiling_info();
                     for (auto const& profiling_interval : profiling_intervals) {
-                        if (strcmp(profiling_interval.name, "executing") == 0) {
-                            kernel_run_time += profiling_interval.nanoseconds;
+                        if (profiling_interval.name == "executing") {
+                            kernel_run_time += profiling_interval.value->value();
                             num_of_runs++;
                             break;
                         }
@@ -181,7 +181,7 @@ std::vector<uint64_t> kernel_runner::run_kernels(const kernel_selector::KernelsD
             if (num_of_runs > 0) {
                 run_times.push_back(kernel_run_time / num_of_runs);
             } else {
-                run_times.push_back(std::numeric_limits<uint64_t>::max());
+                run_times.push_back(std::chrono::nanoseconds::max());
             }
             i++;
         }
index fef49a7..2e6fc97 100644 (file)
@@ -30,7 +30,7 @@ class kernel_runner : public kernel_selector::KernelRunnerInterface {
 public:
     explicit kernel_runner(engine_impl& engine_ref, bool weights_and_bias_exist = false);
 
-    std::vector<uint64_t> run_kernels(const kernel_selector::KernelsData& kernelsData) override;
+    std::vector<std::chrono::nanoseconds> run_kernels(const kernel_selector::KernelsData& kernelsData) override;
 
 private:
     const int compilation_batch_size = 50;
index f5561b0..e139dbd 100644 (file)
@@ -107,25 +107,23 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                              lookup_table_gpu::create);
-        implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                              lookup_table_gpu::create);
-        implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                              lookup_table_gpu::create);
-        implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                              lookup_table_gpu::create);
-        implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                              lookup_table_gpu::create);
-        implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                              lookup_table_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_lookup_table_gpu::attach_lookup_table_gpu() {
+    implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                          lookup_table_gpu::create);
+    implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                          lookup_table_gpu::create);
+    implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
+                                          lookup_table_gpu::create);
+    implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                          lookup_table_gpu::create);
+    implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                          lookup_table_gpu::create);
+    implementation_map<lookup_table>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+                                          lookup_table_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index a819aa1..769e74a 100644 (file)
@@ -40,7 +40,7 @@ struct lrn_gpu : typed_primitive_gpu_impl<lrn> {
         lrn_params.k = primitive->k;
         lrn_params.localSize = primitive->size;
         lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
-        lrn_params.normMode = primitive->norm_region == cldnn_lrn_norm_region_within_channel
+        lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
                                   ? kernel_selector::lrn_mode::WITHIN_CHANNEL
                                   : kernel_selector::lrn_mode::ACROSS_CHANNEL;
 
@@ -58,25 +58,23 @@ struct lrn_gpu : typed_primitive_gpu_impl<lrn> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                     lrn_gpu::create);
-        implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                     lrn_gpu::create);
-        implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                     lrn_gpu::create);
-        implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                     lrn_gpu::create);
-        implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                     lrn_gpu::create);
-        implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                     lrn_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_lrn_gpu::attach_lrn_gpu() {
+    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                 lrn_gpu::create);
+    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                 lrn_gpu::create);
+    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                 lrn_gpu::create);
+    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                 lrn_gpu::create);
+    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                 lrn_gpu::create);
+    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                 lrn_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index e1e72f7..f2a1525 100644 (file)
@@ -47,24 +47,23 @@ public:
     static primitive_impl* create(const lstm_dynamic_input_node& arg) {
         auto dlstm_input_params = get_default_params<kernel_selector::lstm_dynamic_input_params>(arg);
 
-        // dyn length
-        const auto& dyn_length_tensor = arg.dyn_length().get_output_layout();
-        dlstm_input_params.inputs.push_back(convert_data_tensor(dyn_length_tensor));
-
-        // weights
         const auto& weights_layout = arg.weights().get_output_layout();
-        dlstm_input_params.weights = convert_data_tensor(weights_layout);
+        dlstm_input_params.weights = convert_weights_tensor(weights_layout);
 
         if (arg.bias_term()) {
             const auto& bias_layout = arg.bias().get_output_layout();
-            dlstm_input_params.set_bias(convert_data_tensor(bias_layout));
+            dlstm_input_params.bias.push_back(convert_data_tensor(bias_layout));
         }
 
+        // dyn length
+        const auto& dyn_length_tensor = arg.dyn_length().get_output_layout();
+        dlstm_input_params.inputs.push_back(convert_data_tensor(dyn_length_tensor));
+
         dlstm_input_params.direction = arg.direction();
 
         // finially get best kernel
         auto lstm_dynamic_optional_params =
-            get_default_optional_params<kernel_selector::lstm_dynamic_input_optional_params>(arg.get_program());
+            get_default_weights_bias_optional_params<kernel_selector::lstm_dynamic_input_optional_params>(arg.get_program());
 
         auto& kernel_selector = kernel_selector::lstm_dynamic_input_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(dlstm_input_params, lstm_dynamic_optional_params);
@@ -80,19 +79,17 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = lstm_dynamic_input_gpu::create;
+namespace detail {
 
-        implementation_map<lstm_dynamic_input>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+attach_lstm_dynamic_input_gpu::attach_lstm_dynamic_input_gpu() {
+    auto val_fw = lstm_dynamic_input_gpu::create;
+
+    implementation_map<lstm_dynamic_input>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 11f1f26..0df109e 100644 (file)
@@ -100,19 +100,17 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = lstm_dynamic_timeloop_gpu::create;
-
-        implementation_map<lstm_dynamic_timeloop>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_lstm_dynamic_timeloop_gpu::attach_lstm_dynamic_timeloop_gpu() {
+    auto val_fw = lstm_dynamic_timeloop_gpu::create;
+
+    implementation_map<lstm_dynamic_timeloop>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 6786c6b..aee130a 100644 (file)
@@ -58,7 +58,7 @@ public:
             }
         }
 
-        lstm_elt_params.SetOffsetOrder(arg.offset_order());
+        lstm_elt_params.SetOffsetOrder(static_cast<int32_t>(arg.offset_order()));
         lstm_elt_params.clip = arg.clip();
         lstm_elt_params.input_forget = arg.input_forget();
         lstm_elt_params.direction = arg.direction();
@@ -77,21 +77,19 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = lstm_elt_gpu::create;
-
-        implementation_map<lstm_elt>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_lstm_elt_gpu::attach_lstm_elt_gpu() {
+    auto val_fw = lstm_elt_gpu::create;
+
+    implementation_map<lstm_elt>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 9cb5312..abb33e4 100644 (file)
@@ -97,21 +97,19 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = lstm_gemm_gpu::create;
-
-        implementation_map<lstm_gemm>::add({
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
-            {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_lstm_gemm_gpu::attach_lstm_gemm_gpu() {
+    auto val_fw = lstm_gemm_gpu::create;
+
+    implementation_map<lstm_gemm>::add({
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index fb9f379..1c32060 100644 (file)
@@ -70,31 +70,29 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                               max_unpooling_gpu::create);
-        implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                               max_unpooling_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_max_unpooling_gpu::attach_max_unpooling_gpu() {
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                           max_unpooling_gpu::create);
+    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
+                                           max_unpooling_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index b48e621..ffec2a4 100644 (file)
@@ -18,6 +18,7 @@
 #include "memory_gpu.h"
 #include "engine_impl.h"
 #include "ocl_base_event.h"
+#include <stdexcept>
 
 namespace cldnn {
 namespace gpu {
@@ -94,7 +95,7 @@ gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const la
             order = CL_RGBA;
             break;
         default:
-            throw error("unsupported image type!");
+            throw std::invalid_argument("unsupported image type!");
     }
 
     cl_channel_type type = layout.data_type == data_types::f16 ? CL_HALF_FLOAT : CL_FLOAT;
@@ -114,6 +115,7 @@ gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine,
       _context(engine->get_context()),
       _lock_count(0),
       _buffer(buffer),
+      _width(0), _height(0), _row_pitch(0), _slice_pitch(0),
       _mapped_ptr(nullptr) {}
 
 void* gpu_image2d::lock() {
index 2b8ddf2..b3f1199 100644 (file)
@@ -37,12 +37,12 @@ public:
     static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_gpu(arg, {}); }
 };
 
-namespace {
-struct attach {
-    attach() { implementation_map<mutable_data>::add({{engine_types::ocl, mutable_data_gpu::create}}); }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_mutable_data_gpu::attach_mutable_data_gpu() {
+    implementation_map<mutable_data>::add({{engine_types::ocl, mutable_data_gpu::create}});
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 9b50a9d..3dc9149 100644 (file)
@@ -57,29 +57,31 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                     mvn_gpu::create);
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                     mvn_gpu::create);
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                     mvn_gpu::create);
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                     mvn_gpu::create);
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                     mvn_gpu::create);
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                     mvn_gpu::create);
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                     mvn_gpu::create);
-        implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                     mvn_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_mvn_gpu::attach_mvn_gpu() {
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16),
+        mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16),
+        mvn_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index b212b2d..c221f45 100644 (file)
@@ -68,25 +68,23 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                           normalize_gpu::create);
-        implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                           normalize_gpu::create);
-        implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                           normalize_gpu::create);
-        implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                           normalize_gpu::create);
-        implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                           normalize_gpu::create);
-        implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                           normalize_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_normalize_gpu::attach_normalize_gpu() {
+    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                       normalize_gpu::create);
+    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                       normalize_gpu::create);
+    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                       normalize_gpu::create);
+    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                       normalize_gpu::create);
+    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                       normalize_gpu::create);
+    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                       normalize_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 813741a..915eb79 100644 (file)
@@ -33,6 +33,13 @@ bool is_event_profiled(const cl::Event& event) {
     }
     return false;
 }
+
+instrumentation::profiling_interval get_profiling_interval(const char* name, cl_ulong start,  cl_ulong end) {
+    auto diff = std::chrono::nanoseconds(end - start);
+    auto period = std::make_shared<instrumentation::profiling_period_basic>(diff);
+    return { name, period };
+}
+
 }  // namespace
 
 void CL_CALLBACK base_event::ocl_event_completion_callback(cl_event, cl_int, void* me) {
@@ -66,7 +73,7 @@ bool base_event::is_set_impl() {
     return true;
 }
 
-bool base_event::add_event_handler_impl(cldnn_event_handler, void*) {
+bool base_event::add_event_handler_impl(event_handler, void*) {
     set_ocl_callback();
     return true;
 }
@@ -77,7 +84,7 @@ static const std::vector<profiling_period_ocl_start_stop> profiling_periods{
     {"executing", CL_PROFILING_COMMAND_START, CL_PROFILING_COMMAND_END},
 };
 
-bool base_event::get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) {
+bool base_event::get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) {
     if (!is_event_profiled(_event))
         return true;
 
@@ -88,7 +95,7 @@ bool base_event::get_profiling_info_impl(std::list<cldnn_profiling_interval>& in
         _event.getProfilingInfo(period.start, &start);
         _event.getProfilingInfo(period.stop, &end);
 
-        info.push_back({period.name, end - start});
+        info.push_back(get_profiling_interval(period.name, start, end));
     }
 
     return true;
@@ -113,7 +120,7 @@ bool base_events::is_set_impl() {
     return true;
 }
 
-bool base_events::get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) {
+bool base_events::get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) {
     cl_ulong min_queue = CL_ULONG_MAX;
     cl_ulong min_sub = CL_ULONG_MAX;
     cl_ulong min_start = CL_ULONG_MAX;
@@ -145,9 +152,9 @@ bool base_events::get_profiling_info_impl(std::list<cldnn_profiling_interval>& i
         execution_time += curr_end - curr_start;
     }
 
-    info.push_back({profiling_periods[0].name, min_sub - min_queue});
-    info.push_back({profiling_periods[1].name, min_start - min_sub});
-    info.push_back({profiling_periods[2].name, execution_time});
+    info.push_back(get_profiling_interval(profiling_periods[0].name, min_sub, min_queue));
+    info.push_back(get_profiling_interval(profiling_periods[1].name, min_start, min_sub));
+    info.push_back(get_profiling_interval(profiling_periods[2].name, 0, execution_time));
 
     return true;
 }
index ab104d4..39e6f04 100644 (file)
@@ -50,6 +50,7 @@ public:
         _event = ev;
         _queue_stamp = q_stamp;
         _attached = true;
+        _set = false;
     }
 
     std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
@@ -64,8 +65,8 @@ private:
 private:
     void wait_impl() override;
     bool is_set_impl() override;
-    bool add_event_handler_impl(cldnn_event_handler, void*) override;
-    bool get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) override;
+    bool add_event_handler_impl(event_handler, void*) override;
+    bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override;
 
     friend struct base_events;
 
@@ -105,7 +106,7 @@ private:
     void wait_impl() override;
     bool is_set_impl() override;
 
-    bool get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) override;
+    bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override;
 
     std::shared_ptr<gpu_toolkit> _ctx;
     std::vector<event_impl::ptr> _events;
index abc25b5..3241f25 100644 (file)
@@ -59,6 +59,20 @@ public:
           _last_barrier_ev(other._last_barrier_ev),
           _output_event(other._output_event) {}
 
+    gpu_queue& operator=(gpu_queue&& other) {
+        if (this != &other) {
+            id = other.id;
+            _context = std::move(other._context);
+            _command_queue = std::move(other._command_queue);
+            _queue_counter = std::move(other._queue_counter.load());
+            _last_barrier = std::move(other._last_barrier.load());
+            _events_pool = std::move(std::move(other._events_pool));
+            _last_barrier_ev = std::move(other._last_barrier_ev);
+            _output_event = std::move(other._output_event);
+        }
+        return *this;
+    }
+
     ~gpu_queue() = default;
 
     void sync_events(std::vector<event_impl::ptr> const& deps);
index 3098d5f..3180f86 100644 (file)
@@ -70,7 +70,7 @@ namespace cldnn {
 namespace gpu {
 
 ocl_error::ocl_error(cl::Error const& err)
-    : error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {}
+    : std::runtime_error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {}
 
 std::shared_ptr<gpu_toolkit> gpu_toolkit::create(const configuration& cfg) {
     struct make_shared_wa : public gpu_toolkit {
@@ -204,9 +204,13 @@ void gpu_toolkit::set_output_event(uint16_t queue_id, bool out_event) {
 std::ofstream& gpu_toolkit::open_log() {
     if (!_logger->_log_file.is_open()) {
         _logger->_log_file.open(_configuration.log, std::ios::out | std::ios::trunc);
-        if (!_logger->_log_file.good())
+        if (!_logger->_log_file.good()) {
+            _logger->_log_file.close();
             throw std::runtime_error("Could not initialize ocl_toolkit log file");
+        }
+
         if (!_logger->_log_file.is_open()) {
+            _logger->_log_file.close();
             throw std::runtime_error("Could not open ocl_toolkit log file '" + _configuration.log + "' for writing");
         }
     }
index adab105..8f8924e 100644 (file)
@@ -29,6 +29,7 @@
 #include <chrono>
 #include <string>
 #include <vector>
+#include <stdexcept>
 
 namespace cldnn {
 typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
@@ -40,7 +41,7 @@ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithP
     const cl_queue_properties* properties,
     cl_int* errcodeRet);
 
-class ocl_error : public error {
+class ocl_error : public std::runtime_error {
 public:
     explicit ocl_error(cl::Error const& err);
 };
index 3c208a5..3e45cec 100644 (file)
@@ -30,11 +30,12 @@ void user_event::set_impl() {
     _attached = true;
 }
 
-bool user_event::get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) {
+bool user_event::get_profiling_info_impl(std::list<cldnn::instrumentation::profiling_interval>& info) {
     if (_duration == nullptr) {
         return false;
     }
 
-    info.push_back({"duration", static_cast<uint64_t>(_duration->value().count())});
+    auto period = std::make_shared<instrumentation::profiling_period_basic>(_duration->value());
+    info.push_back({"duration", period });
     return true;
-}
\ No newline at end of file
+}
index 6b0202e..e101e45 100644 (file)
@@ -17,7 +17,7 @@
 #pragma once
 
 #include "ocl_base_event.h"
-#include "api/CPP/profiling.hpp"
+#include "api/profiling.hpp"
 #include <memory>
 #include <list>
 
@@ -43,7 +43,7 @@ struct user_event : public base_event, public cldnn::user_event {
             _set = set;
         }
     }
-    bool get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) override;
+    bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override;
 
 protected:
     cldnn::instrumentation::timer<> _timer;
index 3ac398b..000154c 100644 (file)
@@ -56,29 +56,25 @@ struct one_hot_gpu : typed_primitive_gpu_impl<one_hot> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = one_hot_gpu::create;
+namespace detail {
 
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    }
-    ~attach() = default;
-};
+attach_one_hot_gpu::attach_one_hot_gpu() {
+    auto val_fw = one_hot_gpu::create;
 
-attach attach_impl;
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index d50908e..86d8b04 100644 (file)
@@ -52,16 +52,14 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<permute>::add({
-            {engine_types::ocl, permute_gpu::create},
-        });
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_permute_gpu::attach_permute_gpu() {
+    implementation_map<permute>::add({
+        {engine_types::ocl, permute_gpu::create},
+    });
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 395a120..66dd58c 100644 (file)
@@ -163,62 +163,66 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf),
-                                         pooling_gpu::create);
-        // block fp16 format
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16),
-                                         pooling_gpu::create);
-        // 3D
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                         pooling_gpu::create);
-        // MMAD
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(
-            std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32),
-            pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
-                                         pooling_gpu::create);
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
-                                         pooling_gpu::create);
-        //
-        implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32),
-                                         pooling_gpu::create);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_pooling_gpu::attach_pooling_gpu() {
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf),
+                                     pooling_gpu::create);
+    // block fp16 format
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16),
+                                     pooling_gpu::create);
+    // 3D
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx_f16),
+                                     pooling_gpu::create);
+    // MMAD
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(
+        std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32),
+        pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
+                                     pooling_gpu::create);
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
+                                     pooling_gpu::create);
+    //
+    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32),
+                                     pooling_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 3d1984e..d474ba8 100644 (file)
 namespace cldnn {
 namespace gpu {
 
+bool is_user_cpu(const program_node* user) {
+    if (user->can_be_optimized()) {
+        auto users = user->get_users();
+        for (const auto& u : users) {
+            if (is_user_cpu(u)) {
+                return true;
+            }
+        }
+        return false;
+    }
+    return user->get_selected_impl()->is_cpu();
+}
+
 bool is_any_user_cpu(const std::list<const program_node*>& users) {
     for (const auto& user : users) {
-        if (user->get_selected_impl()->is_cpu())
+        if (is_user_cpu(user))
             return true;
     }
     return false;
index 041b8ae..800035b 100644 (file)
@@ -25,6 +25,7 @@
 #include "error_handler.h"
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
+#include "register_gpu.hpp"
 #include <vector>
 #include <list>
 #include <utility>
@@ -82,6 +83,13 @@ protected:
             args.inputs.push_back((memory_impl::cptr)&instance.input_memory(i));
         }
 
+        if (instance.has_fused_primitives()) {
+            size_t count = instance.get_fused_mem_count();
+            for (size_t i = 0; i < count; i++) {
+                args.fused_op_inputs.push_back((memory_impl::cptr) &instance.fused_memory(i));
+            }
+        }
+
         args.output = (memory_impl::cptr) &instance.output_memory();
 
         return args;
index be10ccf..a2785d4 100644 (file)
@@ -21,6 +21,7 @@
 #include "engine_impl.h"
 #include "math_utils.h"
 #include "error_handler.h"
+#include "register_gpu.hpp"
 
 #include <algorithm>
 #include <string>
@@ -52,8 +53,6 @@ inline bool hasSingleBatchOutput(const program_node& node) {
 
 struct roi_t {
     float x0, y0, x1, y1;
-
-    inline float area() const { return std::max(0.f, y1 - y0 + 1.f) * std::max(0.f, x1 - x0 + 1.f); }
 };
 
 struct delta_t {
@@ -420,18 +419,15 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                          proposal_gpu::create);
-        implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                          proposal_gpu::create);
-    }
+namespace detail {
 
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+attach_proposal_gpu::attach_proposal_gpu() {
+    implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                      proposal_gpu::create);
+    implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                      proposal_gpu::create);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 746a815..10288d4 100644 (file)
@@ -57,20 +57,16 @@ struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = pyramid_roi_align_gpu::create;
-        implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                   val_fw);
-        implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                   val_fw);
-    }
-    ~attach() = default;
-};
+namespace detail {
 
-attach attach_impl;
+attach_pyramid_roi_align_gpu::attach_pyramid_roi_align_gpu() {
+    auto val_fw = pyramid_roi_align_gpu::create;
+    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                               val_fw);
+    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                               val_fw);
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
index 86dc7ef..05ad63a 100644 (file)
@@ -38,7 +38,7 @@ public:
             get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
 
         quantize_params.levels = arg.get_primitive()->levels;
-        quantize_params.packed_binary_output = arg.get_packed_binary_output();
+        quantize_params.packed_binary_output = arg.get_output_layout().data_type == data_types::bin;
 
         for (size_t i = 1; i < arg.inputs_count(); i++) {
             quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
@@ -60,18 +60,20 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = quantize_gpu::create;
+namespace detail {
 
-        implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-        implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+attach_quantize_gpu::attach_quantize_gpu() {
+    auto val_fw = quantize_gpu::create;
+
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 0ff1bf9..fc00e5f 100644 (file)
@@ -84,23 +84,21 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = reduce_gpu::create;
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-        implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    }
-    ~attach() = default;
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_reduce_gpu::attach_reduce_gpu() {
+    auto val_fw = reduce_gpu::create;
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
+    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index b416ea3..bbf50ae 100644 (file)
@@ -55,20 +55,16 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<region_yolo>::add(
-            {{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), region_yolo_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), region_yolo_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), region_yolo_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), region_yolo_gpu::create}});
-    }
-    ~attach() {}
-};
+namespace detail {
 
-attach attach_impl;
-}  // namespace
+attach_region_yolo_gpu::attach_region_yolo_gpu() {
+    implementation_map<region_yolo>::add(
+        {{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), region_yolo_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), region_yolo_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), region_yolo_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), region_yolo_gpu::create}});
+}
 
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
new file mode 100644 (file)
index 0000000..3620ff5
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "register_gpu.hpp"
+
+namespace cldnn { namespace gpu {
+
+#define REGISTER_GPU(prim)                      \
+    static detail::attach_##prim##_gpu attach_##prim
+
+void register_implementations_gpu() {
+    REGISTER_GPU(activation);
+    REGISTER_GPU(activation_grad);
+    REGISTER_GPU(apply_adam);
+    REGISTER_GPU(arg_max_min);
+    REGISTER_GPU(average_unpooling);
+    REGISTER_GPU(batch_norm);
+    REGISTER_GPU(batch_norm_grad);
+    REGISTER_GPU(binary_convolution);
+    REGISTER_GPU(border);
+    REGISTER_GPU(broadcast);
+    REGISTER_GPU(concatenation);
+    REGISTER_GPU(condition);
+    REGISTER_GPU(contract);
+    REGISTER_GPU(convolution);
+    REGISTER_GPU(convolution_grad_weights);
+    REGISTER_GPU(crop);
+    REGISTER_GPU(custom_gpu_primitive);
+    REGISTER_GPU(data);
+    REGISTER_GPU(deconvolution);
+    REGISTER_GPU(deformable_conv);
+    REGISTER_GPU(deformable_interp);
+    REGISTER_GPU(depth_to_space);
+    REGISTER_GPU(detection_output);
+    REGISTER_GPU(eltwise);
+    REGISTER_GPU(embed);
+    REGISTER_GPU(fully_connected);
+    REGISTER_GPU(fully_connected_grad_input);
+    REGISTER_GPU(fully_connected_grad_weights);
+    REGISTER_GPU(gather);
+    REGISTER_GPU(gemm);
+    REGISTER_GPU(index_select);
+    REGISTER_GPU(input_layout);
+    REGISTER_GPU(lookup_table);
+    REGISTER_GPU(lrn);
+    REGISTER_GPU(lstm_gemm);
+    REGISTER_GPU(lstm_elt);
+    REGISTER_GPU(max_unpooling);
+    REGISTER_GPU(mutable_data);
+    REGISTER_GPU(mvn);
+    REGISTER_GPU(normalize);
+    REGISTER_GPU(one_hot);
+    REGISTER_GPU(permute);
+    REGISTER_GPU(pooling);
+    REGISTER_GPU(prior_box);
+    REGISTER_GPU(proposal);
+    REGISTER_GPU(pyramid_roi_align);
+    REGISTER_GPU(quantize);
+    REGISTER_GPU(reduce);
+    REGISTER_GPU(region_yolo);
+    REGISTER_GPU(reorder);
+    REGISTER_GPU(reorg_yolo);
+    REGISTER_GPU(reshape);
+    REGISTER_GPU(reverse_sequence);
+    REGISTER_GPU(roi_pooling);
+    REGISTER_GPU(scale);
+    REGISTER_GPU(scale_grad_input);
+    REGISTER_GPU(scale_grad_weights);
+    REGISTER_GPU(select);
+    REGISTER_GPU(shuffle_channels);
+    REGISTER_GPU(softmax);
+    REGISTER_GPU(softmax_loss_grad);
+    REGISTER_GPU(strided_slice);
+    REGISTER_GPU(tile);
+    REGISTER_GPU(upsampling);
+    REGISTER_GPU(fused_conv_bn_scale);
+    REGISTER_GPU(fused_conv_eltwise);
+    REGISTER_GPU(lstm_dynamic_input);
+    REGISTER_GPU(lstm_dynamic_timeloop);
+    REGISTER_GPU(generic_layer);
+    REGISTER_GPU(gather_tree);
+}
+
+}  // namespace gpu
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp
new file mode 100644 (file)
index 0000000..acfa5c9
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "api/activation.hpp"
+#include "api/activation_grad.hpp"
+#include "api/apply_adam.hpp"
+#include "api/arg_max_min.hpp"
+#include "api/average_unpooling.hpp"
+#include "api/batch_norm.hpp"
+#include "api/batch_norm_grad.hpp"
+#include "api/binary_convolution.hpp"
+#include "api/border.hpp"
+#include "api/broadcast.hpp"
+#include "api/concatenation.hpp"
+#include "api/condition.hpp"
+#include "api/contract.hpp"
+#include "api/convolution.hpp"
+#include "api/convolution_grad_weights.hpp"
+#include "api/crop.hpp"
+#include "api/custom_gpu_primitive.hpp"
+#include "api/data.hpp"
+#include "api/deconvolution.hpp"
+#include "api/depth_to_space.hpp"
+#include "api/detection_output.hpp"
+#include "api/eltwise.hpp"
+#include "api/embed.hpp"
+#include "api/fully_connected.hpp"
+#include "api/fully_connected_grad_input.hpp"
+#include "api/fully_connected_grad_weights.hpp"
+#include "api/gather.hpp"
+#include "api/gemm.hpp"
+#include "api/index_select.hpp"
+#include "api/input_layout.hpp"
+#include "api/lookup_table.hpp"
+#include "api/lrn.hpp"
+#include "api/lstm.hpp"
+#include "api/lstm_dynamic.hpp"
+#include "api/max_unpooling.hpp"
+#include "api/mutable_data.hpp"
+#include "api/mvn.hpp"
+#include "api/normalize.hpp"
+#include "api/one_hot.hpp"
+#include "api/permute.hpp"
+#include "api/pooling.hpp"
+#include "api/prior_box.hpp"
+#include "api/proposal.hpp"
+#include "api/pyramid_roi_align.hpp"
+#include "api/quantize.hpp"
+#include "api/reduce.hpp"
+#include "api/region_yolo.hpp"
+#include "api/reorder.hpp"
+#include "api/reorg_yolo.hpp"
+#include "api/reshape.hpp"
+#include "api/reverse_sequence.hpp"
+#include "api/roi_pooling.hpp"
+#include "api/scale.hpp"
+#include "api/scale_grad_input.hpp"
+#include "api/scale_grad_weights.hpp"
+#include "api/select.hpp"
+#include "api/shuffle_channels.hpp"
+#include "api/softmax.hpp"
+#include "api/softmax_loss_grad.hpp"
+#include "api/strided_slice.hpp"
+#include "api/tile.hpp"
+#include "api/upsampling.hpp"
+#include "api/gather_tree.hpp"
+#include "api_extension/fused_conv_bn_scale.hpp"
+#include "api_extension/fused_conv_eltwise.hpp"
+#include "api_extension/lstm_dynamic_input.hpp"
+#include "api_extension/lstm_dynamic_timeloop.hpp"
+#include "generic_layer.hpp"
+
+
+namespace cldnn { namespace gpu {
+void register_implementations_gpu();
+
+namespace detail {
+
+#define REGISTER_GPU(prim)              \
+    struct attach_##prim##_gpu {        \
+        attach_##prim##_gpu();          \
+    }
+
+REGISTER_GPU(activation);
+REGISTER_GPU(activation_grad);
+REGISTER_GPU(apply_adam);
+REGISTER_GPU(arg_max_min);
+REGISTER_GPU(average_unpooling);
+REGISTER_GPU(batch_norm);
+REGISTER_GPU(batch_norm_grad);
+REGISTER_GPU(binary_convolution);
+REGISTER_GPU(border);
+REGISTER_GPU(broadcast);
+REGISTER_GPU(concatenation);
+REGISTER_GPU(condition);
+REGISTER_GPU(contract);
+REGISTER_GPU(convolution);
+REGISTER_GPU(convolution_grad_weights);
+REGISTER_GPU(crop);
+REGISTER_GPU(custom_gpu_primitive);
+REGISTER_GPU(data);
+REGISTER_GPU(deconvolution);
+REGISTER_GPU(deformable_conv);
+REGISTER_GPU(deformable_interp);
+REGISTER_GPU(depth_to_space);
+REGISTER_GPU(detection_output);
+REGISTER_GPU(eltwise);
+REGISTER_GPU(embed);
+REGISTER_GPU(fully_connected);
+REGISTER_GPU(fully_connected_grad_input);
+REGISTER_GPU(fully_connected_grad_weights);
+REGISTER_GPU(gather);
+REGISTER_GPU(gemm);
+REGISTER_GPU(index_select);
+REGISTER_GPU(input_layout);
+REGISTER_GPU(lookup_table);
+REGISTER_GPU(lrn);
+REGISTER_GPU(lstm_gemm);
+REGISTER_GPU(lstm_elt);
+REGISTER_GPU(max_unpooling);
+REGISTER_GPU(mutable_data);
+REGISTER_GPU(mvn);
+REGISTER_GPU(normalize);
+REGISTER_GPU(one_hot);
+REGISTER_GPU(permute);
+REGISTER_GPU(pooling);
+REGISTER_GPU(prior_box);
+REGISTER_GPU(proposal);
+REGISTER_GPU(pyramid_roi_align);
+REGISTER_GPU(quantize);
+REGISTER_GPU(reduce);
+REGISTER_GPU(region_yolo);
+REGISTER_GPU(reorder);
+REGISTER_GPU(reorg_yolo);
+REGISTER_GPU(reshape);
+REGISTER_GPU(reverse_sequence);
+REGISTER_GPU(roi_pooling);
+REGISTER_GPU(scale);
+REGISTER_GPU(scale_grad_input);
+REGISTER_GPU(scale_grad_weights);
+REGISTER_GPU(select);
+REGISTER_GPU(shuffle_channels);
+REGISTER_GPU(softmax);
+REGISTER_GPU(softmax_loss_grad);
+REGISTER_GPU(strided_slice);
+REGISTER_GPU(tile);
+REGISTER_GPU(upsampling);
+REGISTER_GPU(fused_conv_bn_scale);
+REGISTER_GPU(fused_conv_eltwise);
+REGISTER_GPU(lstm_dynamic_input);
+REGISTER_GPU(lstm_dynamic_timeloop);
+REGISTER_GPU(generic_layer);
+REGISTER_GPU(gather_tree);
+
+#undef REGISTER_GPU
+
+}  // namespace detail
+}  // namespace gpu
+}  // namespace cldnn
index 4c4dd08..e55d589 100644 (file)
@@ -70,16 +70,16 @@ public:
 
         if (reorder_params.mode != kernel_selector::mean_subtruct_mode::NONE) {
             switch (arg.get_primitive()->mean_mode) {
-                case cldnn_reorder_mean_mode::mean_none:
+                case reorder_mean_mode::none:
                     reorder_params.mean_op = kernel_selector::mean_op::NONE;
                     break;
-                case cldnn_reorder_mean_mode::mean_mul:
+                case reorder_mean_mode::mul:
                     reorder_params.mean_op = kernel_selector::mean_op::MUL;
                     break;
-                case cldnn_reorder_mean_mode::mean_subtract:
+                case reorder_mean_mode::subtract:
                     reorder_params.mean_op = kernel_selector::mean_op::SUB;
                     break;
-                case cldnn_reorder_mean_mode::mean_div:
+                case reorder_mean_mode::div:
                     reorder_params.mean_op = kernel_selector::mean_op::DIV;
                     break;
                 default:
@@ -109,14 +109,12 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() { implementation_map<reorder>::add({{engine_types::ocl, reorder_gpu::create}}); }
-    ~attach() {}
-};
+namespace detail {
 
-attach attach_impl;
+attach_reorder_gpu::attach_reorder_gpu() {
+    implementation_map<reorder>::add({{engine_types::ocl, reorder_gpu::create}});
+}
 
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index d986206..91f1c37 100644 (file)
@@ -52,22 +52,18 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = reorg_yolo_gpu::create;
-        implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-        implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-        implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    }
-    ~attach() {}
-};
+namespace detail {
 
-attach attach_impl;
-}  // namespace
+attach_reorg_yolo_gpu::attach_reorg_yolo_gpu() {
+    auto val_fw = reorg_yolo_gpu::create;
+    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+}
 
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 3d28c1f..25eda17 100644 (file)
@@ -53,12 +53,12 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() { implementation_map<reshape>::add({{engine_types::ocl, reshape_gpu::create}}); }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_reshape_gpu::attach_reshape_gpu() {
+    implementation_map<reshape>::add({{engine_types::ocl, reshape_gpu::create}});
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 1ab9d98..ec3a89a 100644 (file)
@@ -55,18 +55,16 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = reverse_sequence_gpu::create;
-        implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                  val_fw);
-        implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                  val_fw);
-    }
-    ~attach() = default;
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_reverse_sequence_gpu::attach_reverse_sequence_gpu() {
+    auto val_fw = reverse_sequence_gpu::create;
+    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                              val_fw);
+    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                              val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 182f43a..c81d2a1 100644 (file)
@@ -123,19 +123,15 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                             roi_pooling_gpu::create);
-        implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                             roi_pooling_gpu::create);
-    }
+namespace detail {
 
-    ~attach() {}
-};
+attach_roi_pooling_gpu::attach_roi_pooling_gpu() {
+    implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                         roi_pooling_gpu::create);
+    implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                         roi_pooling_gpu::create);
+}
 
-attach attach_impl;
-}  // namespace
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 0134451..3be7604 100644 (file)
@@ -32,8 +32,8 @@ struct scale_gpu : typed_primitive_gpu_impl<scale> {
     using parent::parent;
 
 protected:
-    kernel::kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t) const override {
-        kernel::kernel_arguments_data args;
+    kernel::kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
+        kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
         args.inputs = {(memory_impl::cptr) &instance.input_memory(), (memory_impl::cptr) &instance.scale_memory()};
         args.output = (memory_impl::cptr) &instance.output_memory();
 
@@ -78,25 +78,26 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = scale_gpu::create;
-
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), val_fw);
-        implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_scale_gpu::attach_scale_gpu() {
+    auto val_fw = scale_gpu::create;
+
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx_f16), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx_f16), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 7afe76d..5740aa6 100644 (file)
@@ -69,27 +69,25 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = scale_grad_input_gpu::create;
-
-        implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                                  val_fw);
-        implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                  val_fw);
-        implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                  val_fw);
-        implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                  val_fw);
-        implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                                  val_fw);
-        implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                                  val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_scale_grad_input_gpu::attach_scale_grad_input_gpu() {
+    auto val_fw = scale_grad_input_gpu::create;
+
+    implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                              val_fw);
+    implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                              val_fw);
+    implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                              val_fw);
+    implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                              val_fw);
+    implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                              val_fw);
+    implementation_map<scale_grad_input>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                              val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 669dedb..7da6f91 100644 (file)
@@ -71,27 +71,25 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = scale_grad_weights_gpu::create;
-
-        implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                                    val_fw);
-        implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                    val_fw);
-        implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                    val_fw);
-        implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                    val_fw);
-        implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                                    val_fw);
-        implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                                    val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_scale_grad_weights_gpu::attach_scale_grad_weights_gpu() {
+    auto val_fw = scale_grad_weights_gpu::create;
+
+    implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                                val_fw);
+    implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                                val_fw);
+    implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                                val_fw);
+    implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                                val_fw);
+    implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                                val_fw);
+    implementation_map<scale_grad_weights>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                                val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 0fc69e9..c8cb473 100644 (file)
@@ -53,29 +53,26 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<select>::add(
-            {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), select_gpu::create},
+namespace detail {
 
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), select_gpu::create},
+attach_select_gpu::attach_select_gpu() {
+    implementation_map<select>::add(
+        {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), select_gpu::create},
 
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), select_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), select_gpu::create}});
-    }
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), select_gpu::create},
 
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), select_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), select_gpu::create}});
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 3d9df14..5558106 100644 (file)
@@ -60,18 +60,16 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = shuffle_channels_gpu::create;
-        implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                  val_fw);
-        implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                  val_fw);
-    }
-    ~attach() = default;
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
+    auto val_fw = shuffle_channels_gpu::create;
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                              val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                              val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index fba4739..6fa7d06 100644 (file)
@@ -88,24 +88,22 @@ struct softmax_gpu : typed_primitive_gpu_impl<softmax> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = softmax_gpu::create;
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    }
-    ~attach() {}
-};
-
-attach attach_impl;
-}  // namespace
-
+namespace detail {
+
+attach_softmax_gpu::attach_softmax_gpu() {
+    auto val_fw = softmax_gpu::create;
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx_f16), val_fw);
+    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx_f16), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 33dc60a..2dd0364 100644 (file)
@@ -51,28 +51,24 @@ struct softmax_loss_grad_gpu : typed_primitive_gpu_impl<softmax_loss_grad> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = softmax_loss_grad_gpu::create;
-        implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                                   val_fw);
-        implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                   val_fw);
-        implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                   val_fw);
-        implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                   val_fw);
-        implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                                   val_fw);
-        implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                                   val_fw);
-    }
-    ~attach() {}
-};
+namespace detail {
 
-attach attach_impl;
-}  // namespace
+attach_softmax_loss_grad_gpu::attach_softmax_loss_grad_gpu() {
+    auto val_fw = softmax_loss_grad_gpu::create;
+    implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                                               val_fw);
+    implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
+                                               val_fw);
+    implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                               val_fw);
+    implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                               val_fw);
+    implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
+                                               val_fw);
+    implementation_map<softmax_loss_grad>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
+                                               val_fw);
+}
 
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 8b7b49f..8818d01 100644 (file)
@@ -81,18 +81,16 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = strided_slice_gpu::create;
-        implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                               val_fw);
-        implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                               val_fw);
-    }
-    ~attach() = default;
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_strided_slice_gpu::attach_strided_slice_gpu() {
+    auto val_fw = strided_slice_gpu::create;
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                                           val_fw);
+    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
+                                           val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 722ff50..43b2f78 100644 (file)
@@ -71,19 +71,17 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        auto val_fw = tile_gpu::create;
+namespace detail {
 
-        implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-        implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-        implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-        implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+attach_tile_gpu::attach_tile_gpu() {
+    auto val_fw = tile_gpu::create;
+
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index e12d758..8b918f6 100644 (file)
@@ -49,7 +49,7 @@ struct upsampling_gpu : typed_primitive_gpu_impl<upsampling> {
 
         const auto& primitive = arg.get_primitive();
         if (primitive->with_activation)
-            convert_activation_func_params(primitive, us_params.activation);
+            convert_activation_func_params(primitive, us_params.activations);
 
         us_params.num_filter = primitive->num_filter;
         us_params.sampleType = convert_to_sample_type(primitive->sample_type);
@@ -68,20 +68,18 @@ struct upsampling_gpu : typed_primitive_gpu_impl<upsampling> {
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<upsampling>::add(
-            {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), upsampling_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), upsampling_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), upsampling_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), upsampling_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), upsampling_gpu::create},
-             {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), upsampling_gpu::create}});
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+namespace detail {
+
+attach_upsampling_gpu::attach_upsampling_gpu() {
+    implementation_map<upsampling>::add(
+        {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), upsampling_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), upsampling_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), upsampling_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), upsampling_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), upsampling_gpu::create},
+         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), upsampling_gpu::create}});
+}
+
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 2d563fd..ea725da 100644 (file)
@@ -19,6 +19,7 @@
 #include "prior_box_inst.h"
 #include "input_layout_inst.h"
 #include "implementation_map.h"
+#include "register_gpu.hpp"
 
 #include "network_impl.h"
 #include "events_waiter.h"
@@ -51,19 +52,20 @@ public:
     }
 };
 
-namespace {
-struct attach {
-    attach() {
-        implementation_map<data>::add({{engine_types::ocl, wait_for_events_gpu::create_data}});
+namespace detail {
 
-        implementation_map<input_layout>::add({{engine_types::ocl, wait_for_events_gpu::create_input_layout}});
+attach_data_gpu::attach_data_gpu() {
+    implementation_map<data>::add({ {engine_types::ocl, wait_for_events_gpu::create_data} });
+}
 
-        implementation_map<prior_box>::add({{engine_types::ocl, wait_for_events_gpu::create_prior_box}});
-    }
-    ~attach() {}
-};
-attach attach_impl;
-}  // namespace
+attach_input_layout_gpu::attach_input_layout_gpu() {
+    implementation_map<input_layout>::add({{engine_types::ocl, wait_for_events_gpu::create_input_layout}});
+}
+
+attach_prior_box_gpu::attach_prior_box_gpu() {
+    implementation_map<prior_box>::add({{engine_types::ocl, wait_for_events_gpu::create_prior_box}});
+}
 
+}  // namespace detail
 }  // namespace gpu
 }  // namespace cldnn
index 8d5211b..21cf015 100644 (file)
@@ -25,6 +25,7 @@
 #include "scale_inst.h"
 #include "tensor_type.h"
 #include <memory>
+#include <stdexcept>
 
 /*
 This pass checks if data formats (layouts) of output/input in hidden layers match.
@@ -41,12 +42,11 @@ void add_required_reorders::add_reorder(program_impl& p, program_node* node, pro
     // ToDo: add a method to program_impl class which adds an intermediate node given a node and its user
     auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
     if (it == usr->get_dependencies().end()) {
-        throw error("Inconcistency in topology description: user of a node is not present among its dependecies.",
-                    CLDNN_ERROR);
+        throw std::runtime_error("Inconcistency in topology description: user of a node is not present among its dependecies.");
     }
     auto idx = it - usr->get_dependencies().begin();
     if (idx < 0 || (size_t)idx >= usr->get_dependencies().size()) {
-        throw error("Internal Error: container index out of range exception.", CLDNN_ERROR);
+        throw std::runtime_error("Internal Error: container index out of range exception.");
     }
     p.add_intermediate(new_reorder_node, *usr, idx);
 }
@@ -81,9 +81,8 @@ void add_required_reorders::run(program_impl& p) {
                     correct_layout_selected = true;
                     break;
                 } else {
-                    throw error("Internal Error: no layout format available for " + usr->id() + " comaptible with " +
-                                    node->id(),
-                                CLDNN_ERROR);
+                    throw std::runtime_error("Internal Error: no layout format available for " + usr->id() + " comaptible with " +
+                                    node->id());
                 }
             }
         }
@@ -110,9 +109,8 @@ void add_required_reorders::run(program_impl& p) {
             }
 
             if (!correct_layout_selected) {
-                throw error("Internal Error: no implementation for " + usr->id() +
-                                " kernel which satisfies output format dependecies.",
-                            CLDNN_ERROR);
+                throw std::runtime_error("Internal Error: no implementation for " + usr->id() +
+                                " kernel which satisfies output format dependecies.");
             }
         }
 
index f71e6d2..ac2ace8 100644 (file)
@@ -36,12 +36,10 @@ void calculate_prior_boxes::run(program_impl& p) {
         pb_node.calc_result();
         p.remove_connection(pb_node.input(), pb_node);
 
-        auto& result = pb_node.get_result_buffer();
-        result.add_ref();  // need to inc ref count since we will be assigning this memory as cldnn_memory in next line
-                           // that is not ref_count_obj
-        auto cpp_mem = details::memory_c_to_cpp_converter::convert(api_cast(&result));
+        auto result = pb_node.get_result_buffer();
+        auto cpp_mem = memory(result.detach());
 
         auto& data_node = p.get_or_create(std::make_shared<data>("_cldnn_tmp_" + pb_node.id() + "_result", cpp_mem));
         p.replace(pb_node, data_node);
     }
-}
\ No newline at end of file
+}
index 2011533..77df357 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "api/CPP/tensor.hpp"
+#include "api/tensor.hpp"
 
 #include "pass_manager.h"
 
index 4253136..6088270 100644 (file)
@@ -34,6 +34,9 @@ void eltwise_shrinking::run(program_impl& p) {
                 }
             }
 
+            if (node->get_output_layout().format == format::fs_b_yx_fsv32)
+                continue;
+
             const auto eltw = std::static_pointer_cast<const eltwise>(node->get_primitive());
             // TODO: support cases which already have stride!
             if (eltw->stride.empty() && !node->get_users().empty()) {
index 4c08450..34b8fb4 100644 (file)
@@ -91,7 +91,7 @@ void graph_initializations::replace_nodes(program_impl& p) {
                 }
 
                 // For all the other dimensions, copy from the split_input
-                for (int dimension = 0; dimension < CLDNN_TENSOR_DIM_MAX; dimension++) {
+                for (int dimension = 0; dimension < tensor_dim_max; dimension++) {
                     reference_input_size.raw[dimension] = (reference_input_size.raw[dimension] == 0)
                                                               ? output_layout_size.raw[dimension]
                                                               : reference_input_size.raw[dimension];
@@ -131,14 +131,19 @@ void graph_initializations::replace_nodes(program_impl& p) {
             auto num_filter = upsampling_prim->num_filter;
 
             // setting deconvolution parameters based on upsampling input
-            auto scale = static_cast<tensor::value_type>(upsampling_prim->scale);
-            tensor stride(1, 1, scale, scale);
-            auto offset = static_cast<tensor::value_type>(std::ceil((scale - 1) / 2.f));
-            tensor input_offset(0, 0, -offset, -offset);
+            auto upsampled_size = node->get_output_layout().size;
+            auto input_size = input_node.get_output_layout().size;
+            auto scale_x = static_cast<tensor::value_type>(upsampled_size.spatial[0] / input_size.spatial[0]);
+            auto scale_y = static_cast<tensor::value_type>(upsampled_size.spatial[1] / input_size.spatial[1]);
+            tensor stride(1, 1, scale_x, scale_y);
+            auto offset_x = static_cast<tensor::value_type>(std::ceil((scale_x - 1) / 2.f));
+            auto offset_y = static_cast<tensor::value_type>(std::ceil((scale_y - 1) / 2.f));
+            tensor input_offset(0, 0, -offset_x, -offset_y);
 
             // setting weights for deconvolution
-            auto kernel_size = static_cast<tensor::value_type>((2 * scale) - (scale % 2));
-            layout weights_layout(data_types::f32, format::bfyx, tensor(1, 1, kernel_size, kernel_size));
+            auto kernel_size_x = static_cast<tensor::value_type>((2 * scale_x) - (static_cast<tensor::value_type>(scale_x) % 2));
+            auto kernel_size_y = static_cast<tensor::value_type>((2 * scale_y) - (static_cast<tensor::value_type>(scale_y) % 2));
+            layout weights_layout(data_types::f32, format::bfyx, tensor(1, 1, kernel_size_x, kernel_size_y));
 
             std::vector<primitive_id> weights_vec;
             for (uint32_t weights_idx = 0; weights_idx < num_filter; weights_idx++) {
@@ -146,14 +151,16 @@ void graph_initializations::replace_nodes(program_impl& p) {
                 mem_lock<float> dst{data_to_allocate};
                 float* dst_data = dst.data();
                 // initialize with bilinear weights data
-                auto f = static_cast<uint32_t>(std::ceil(kernel_size / 2.0f));
-                float c = (2 * f - 1 - f % 2) / (2.f * f);
+                auto f_x = static_cast<uint32_t>(std::ceil(kernel_size_x / 2.0f));
+                auto f_y = static_cast<uint32_t>(std::ceil(kernel_size_y / 2.0f));
+                float c_x = (2 * f_x - 1 - f_x % 2) / (2.f * f_x);
+                float c_y = (2 * f_y - 1 - f_y % 2) / (2.f * f_y);
                 float x = 0.f;
                 float y = 0.f;
                 for (size_t i = 0; i < weights_layout.count(); ++i) {
-                    x = static_cast<float>(i % kernel_size);
-                    y = static_cast<float>((i / kernel_size) % kernel_size);
-                    dst_data[i] = (1 - std::abs(x / f - c)) * (1 - std::abs(y / f - c));
+                    x = static_cast<float>(i % kernel_size_x);
+                    y = static_cast<float>((i / kernel_size_x) % kernel_size_y);
+                    dst_data[i] = (1 - std::abs(x / f_x - c_x)) * (1 - std::abs(y / f_y - c_y));
                 }
 
                 // create weights primitive, with dummy memory which will be replaced in firther step
@@ -214,7 +221,8 @@ void graph_initializations::replace_nodes(program_impl& p) {
             auto& input_node = node->get_dependency(0);
 
             // disable for 5D
-            if (input_node.get_output_layout().format == format::bfzyx)
+            if (input_node.get_output_layout().format == format::bfzyx ||
+                input_node.get_output_layout().format == format::bfzyx_f16)
                 continue;
 
             primitive_id input_id = deconv_prim->input[0];
@@ -228,8 +236,6 @@ void graph_initializations::replace_nodes(program_impl& p) {
             std::vector<primitive_id> bias_vec;
             for (auto& bias_id : biases) bias_vec.push_back(bias_id);
             auto input_offset = deconv_prim->input_offset;
-            auto with_activation = deconv_prim->with_activation;
-            auto activation_negative_slope = deconv_prim->activation_negative_slope;
             auto output_padding = deconv_prim->output_padding;
 
             // remove deconvolution node and its connections to weights and biases, rename it and move to the optimized
@@ -267,8 +273,6 @@ void graph_initializations::replace_nodes(program_impl& p) {
                                                                stride,
                                                                input_offset,
                                                                tensor{1, 1, 1, 1},
-                                                               with_activation,
-                                                               activation_negative_slope,
                                                                output_padding);
                 p.get_or_create(conv_prim);
             } else {
@@ -278,8 +282,6 @@ void graph_initializations::replace_nodes(program_impl& p) {
                                                                stride,
                                                                input_offset,
                                                                tensor{1, 1, 1, 1},
-                                                               with_activation,
-                                                               activation_negative_slope,
                                                                output_padding);
                 p.get_or_create(conv_prim);
             }
@@ -456,10 +458,10 @@ void graph_initializations::handle_lstm(program_impl& p) {
                 }
             }
 
-            bool emit_last_cell = lstm_prim->output_selection == cldnn_lstm_output_hidden_cell ||
-                                  lstm_prim->output_selection == cldnn_lstm_output_sequence_cell;
-            bool emit_sequence = lstm_prim->output_selection == cldnn_lstm_output_sequence_cell ||
-                                 lstm_prim->output_selection == cldnn_lstm_output_sequence;
+            bool emit_last_cell = lstm_prim->output_selection == lstm_output_selection::hidden_cell ||
+                                  lstm_prim->output_selection == lstm_output_selection::sequence_cell;
+            bool emit_sequence = lstm_prim->output_selection == lstm_output_selection::sequence_cell ||
+                                 lstm_prim->output_selection == lstm_output_selection::sequence;
 
             std::vector<program_node*> cell_list(directions * sequence_len);
             std::vector<program_node*> hidden_list(directions * sequence_len);
index bbd5708..f8e0ad6 100644 (file)
@@ -42,7 +42,7 @@ void handle_reshape::run(program_impl& p) {
     while (node_itr != p.get_processing_order().end()) {
         auto& node = (*node_itr++);
         program_helpers::do_for_types<reshape>(*node, [&p](reshape_node& node) {
-            if (node.is_output() || node.get_users().size() > 1 || node.get_fused_activation_func() != activation_none)
+            if (node.is_output() || node.get_users().size() > 1 || !node.get_fused_activations_funcs().empty())
                 return;
 
             auto& out_node = node.get_users().front();
index cafa9a0..5e29b05 100644 (file)
@@ -20,6 +20,7 @@
 #include "gpu/primitive_gpu_base.h"
 #include "fully_connected/fully_connected_params.h"
 #include <memory>
+#include <stdexcept>
 
 /*
 This pass checks if if primitive's input format matches implementation's input format
@@ -39,12 +40,11 @@ program_node& post_input_reorder::add_reorder(program_impl& p,
     // ToDo: add a method to program_impl class which adds an intermediate node given a node and its user
     auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
     if (it == usr->get_dependencies().end()) {
-        throw error("Inconcistency in topology description: user of a node is not present among its dependecies.",
-                    CLDNN_ERROR);
+        throw std::runtime_error("Inconcistency in topology description: user of a node is not present among its dependecies.");
     }
     auto idx = it - usr->get_dependencies().begin();
     if (idx < 0 || (size_t)idx >= usr->get_dependencies().size()) {
-        throw error("Internal Error: container index out of range exception.", CLDNN_ERROR);
+        throw std::runtime_error("Internal Error: container index out of range exception.");
     }
     p.add_intermediate(new_reorder_node, *usr, idx);
     return new_reorder_node;
index ea43c18..8e19ddd 100644 (file)
 
 #include "pass_manager.h"
 #include "program_helpers.h"
-#include "api_extension/CPP/fused_conv_eltwise.hpp"
+#include "api_extension/fused_conv_eltwise.hpp"
 #include "include/fused_conv_eltwise_inst.h"
 #include "include/binary_convolution_inst.h"
 #include "include/deformable_convolution_inst.h"
+#include "lstm_dynamic_input_inst.h"
 
 namespace cldnn {
 
-post_optimize_weights::post_optimize_weights(layout_optimizer& lo_ref)
-    : base_pass("post_optimize_weights"), _lo(lo_ref) {}
-
-void post_optimize_weights::run(program_impl& p) { run(p, _lo); }
+post_optimize_weights::post_optimize_weights(reorder_factory& rf_ref)
+    : base_pass("post_optimize_weights"), _rf(rf_ref) {}
 
 // function which prepares given primitive for weights optimization
-template <typename T>
-void post_optimize_weights::optimize_weights(T& node, layout_optimizer& lo, program_impl& p) {
-    auto weights_offset = node.get_primitive()->input.size();
-    auto bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->weights).size();
-    for (auto i = weights_offset; i < bias_offset; i++) {
-        auto& weights = node.get_dependency(i);
-        auto* impl = node.get_selected_impl().get();
-        auto output_layout = node.get_output_layout();
-        auto& weights_node = node.get_dependency(1);
-        auto weights_layout = weights_node.get_output_layout();
-        const auto weights_type = layout_optimizer::data_type::weights;
+template<typename T> post_optimize_weights::weights_bias_offset post_optimize_weights::get_weights_bias_offset(const T& node) {
+    return weights_bias_offset(node.get_primitive()->input.size(), program_helpers::wrap_if_single(node.get_primitive()->weights).size());
+}
 
-        auto reorders = lo.get_generic_layer(impl->_weights_reorder_params, weights.id(), weights_layout, weights_type);
+template <>
+post_optimize_weights::weights_bias_offset post_optimize_weights::get_weights_bias_offset<fused_conv_eltwise_node>(const fused_conv_eltwise_node& node) {
+    return weights_bias_offset(node.get_primitive()->input.size(), program_helpers::wrap_if_single(node.get_primitive()->conv.weights).size());
+}
 
-        for (auto& reorder : reorders) {
-            // insert new generic_layer node to topology
-            p.add_intermediate(reorder.first, node, i, !reorder.second);
-            // set generic_layer's node output layout and implementation
-            auto& g_node = node.get_dependency(i);
-            g_node.get_output_layout(false);
-            g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
-        }
-        // set the old output layout and do not invalidate users as change of weights will not affect output layout
-        node.set_output_layout(output_layout, false);
-    }
+template <>
+post_optimize_weights::weights_bias_offset post_optimize_weights::get_weights_bias_offset<lstm_dynamic_input_node>(const lstm_dynamic_input_node& node) {
+    return weights_bias_offset(node.get_primitive()->input.size() + 1, program_helpers::wrap_if_single(node.get_primitive()->weights).size());
 }
 
 // function which prepares given primitive for weights optimization
-template <>
-void post_optimize_weights::optimize_weights<fused_conv_eltwise_node>(fused_conv_eltwise_node& node,
-                                                                      layout_optimizer& lo,
-                                                                      program_impl& p) {
-    auto weights_offset = node.get_primitive()->input.size();
-    auto bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->conv.weights).size();
-    for (auto i = weights_offset; i < bias_offset; i++) {
-        auto& weights = node.get_dependency(i);
+template<typename T>
+void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
+    auto offsets = get_weights_bias_offset(node);
+    for (auto i = offsets.weights_offset; i < offsets.bias_offset; i++) {
+        auto& weights_node = node.get_dependency(i);
         auto* impl = node.get_selected_impl().get();
         auto output_layout = node.get_output_layout();
-        auto& weights_node = node.get_dependency(1);
         auto weights_layout = weights_node.get_output_layout();
-        const auto weights_type = layout_optimizer::data_type::weights;
 
-        auto reorders = lo.get_generic_layer(impl->_weights_reorder_params, weights.id(), weights_layout, weights_type);
+        auto reorders = _rf.get_weights_reorder(weights_node.id(), weights_layout, impl->_weights_reorder_params);
 
         for (auto& reorder : reorders) {
             // insert new generic_layer node to topology
@@ -88,37 +69,23 @@ void post_optimize_weights::optimize_weights<fused_conv_eltwise_node>(fused_conv
     }
 }
 
-template void post_optimize_weights::optimize_weights<convolution_node>(convolution_node& node,
-                                                                        layout_optimizer& lo,
-                                                                        program_impl& p);
-template void post_optimize_weights::optimize_weights<deconvolution_node>(deconvolution_node& node,
-                                                                          layout_optimizer& lo,
-                                                                          program_impl& p);
-template void post_optimize_weights::optimize_weights<fully_connected_node>(fully_connected_node& node,
-                                                                            layout_optimizer& lo,
-                                                                            program_impl& p);
-template void post_optimize_weights::optimize_weights<binary_convolution_node>(binary_convolution_node& node,
-                                                                               layout_optimizer& lo,
-                                                                               program_impl& p);
-template void post_optimize_weights::optimize_weights<deformable_conv_node>(deformable_conv_node& node,
-                                                                               layout_optimizer& lo,
-                                                                               program_impl& p);
-
-void post_optimize_weights::run(program_impl& p, layout_optimizer& lo) {
+void post_optimize_weights::run(program_impl& p) {
     for (auto& node : p.get_processing_order()) {
         if (node->type() == convolution::type_id()) {
-            optimize_weights(node->as<convolution>(), lo, p);
+            optimize_weights(node->as<convolution>(), p);
         }
         if (node->type() == binary_convolution::type_id()) {
-            optimize_weights(node->as<binary_convolution>(), lo, p);
+            optimize_weights(node->as<binary_convolution>(), p);
         } else if (node->type() == deconvolution::type_id()) {
-            optimize_weights(node->as<deconvolution>(), lo, p);
+            optimize_weights(node->as<deconvolution>(), p);
         } else if (node->type() == deformable_conv::type_id()) {
-            optimize_weights(node->as<deformable_conv>(), lo, p);
+            optimize_weights(node->as<deformable_conv>(), p);
         } else if (node->type() == fully_connected::type_id()) {
-            optimize_weights(node->as<fully_connected>(), lo, p);
+            optimize_weights(node->as<fully_connected>(), p);
         } else if (node->type() == fused_conv_eltwise::type_id()) {
-            optimize_weights(node->as<fused_conv_eltwise>(), lo, p);
+            optimize_weights(node->as<fused_conv_eltwise>(), p);
+        } else if (node->type() == lstm_dynamic_input::type_id()) {
+            optimize_weights(node->as<lstm_dynamic_input>(), p);
         }
     }
 }
index 2a62c6f..d9d4e1a 100644 (file)
 
 using namespace cldnn;
 
-pre_optimize_bias::pre_optimize_bias(layout_optimizer& lo_ref) : base_pass("pre_optimize_bias"), _lo(lo_ref) {}
+pre_optimize_bias::pre_optimize_bias(reorder_factory& rf_ref) : base_pass("pre_optimize_bias"), _rf(rf_ref) {}
 
-void pre_optimize_bias::run(program_impl& p) { run(p, _lo); }
+void pre_optimize_bias::run(program_impl& p) { run(p, _rf); }
 
 // function which prepares given primitive for weights optimization
 template <typename T>
-void pre_optimize_bias::optimize_bias(T& node, layout_optimizer& lo, program_impl& p) {
+void pre_optimize_bias::optimize_bias(T& node, reorder_factory& rf, program_impl& p) {
     layout output_layout = node.get_output_layout();
 
     size_t weights_offset = node.get_primitive()->input.size();
     size_t bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->weights).size();
-    for (size_t i = bias_offset; i < node.get_dependencies().size(); ++i) {
+    for (size_t i = bias_offset; i < node.get_dependencies().size() - node.get_fused_inputs_count(); ++i) {
         // find weights primitive with given pimitive_id and add it to weights_optimizer
         const program_node& bias = node.get_dependency(i);
-        const auto bias_type = layout_optimizer::data_type::bias;
-        auto reorder = lo.get_reorder(bias.get_output_layout(), bias.id(), bias_type, node, output_layout);
+        auto new_layout = layout(output_layout.data_type,
+                                 format::bfyx,
+                                 { 1, static_cast<tensor::value_type>(bias.get_output_layout().count()), 1, 1 });
+        if (new_layout.data_type == data_types::bin) {
+            new_layout.data_type = bias.get_output_layout().data_type;
+        }
+        auto reorder = rf.get_reorder(bias.id(),
+                                      bias.get_output_layout(),
+                                      new_layout);
 
         if (reorder.first)
             p.add_intermediate(reorder.first, node, i, !reorder.second);
     }
 }
 template void pre_optimize_bias::optimize_bias<convolution_node>(convolution_node& node,
-                                                                 layout_optimizer& lo,
+                                                                 reorder_factory& rf,
                                                                  program_impl& p);
 template void pre_optimize_bias::optimize_bias<deconvolution_node>(deconvolution_node& node,
-                                                                   layout_optimizer& lo,
+                                                                   reorder_factory& rf,
                                                                    program_impl& p);
 template void pre_optimize_bias::optimize_bias<fully_connected_node>(fully_connected_node& node,
-                                                                     layout_optimizer& lo,
+                                                                     reorder_factory& rf,
                                                                      program_impl& p);
-template void pre_optimize_bias::optimize_bias<embed_node>(embed_node& node, layout_optimizer& lo, program_impl& p);
+template void pre_optimize_bias::optimize_bias<embed_node>(embed_node& node, reorder_factory& rf, program_impl& p);
 
-void pre_optimize_bias::run(program_impl& p, layout_optimizer& lo) {
+void pre_optimize_bias::run(program_impl& p, reorder_factory& rf) {
     for (auto& prim : p.get_processing_order()) {
         if (prim->type() == convolution::type_id()) {
             if (!prim->as<convolution>().weights_quantization_term())
-                optimize_bias(prim->as<convolution>(), lo, p);
+                optimize_bias(prim->as<convolution>(), rf, p);
         } else if (prim->type() == deconvolution::type_id()) {
-            optimize_bias(prim->as<deconvolution>(), lo, p);
+            optimize_bias(prim->as<deconvolution>(), rf, p);
         } else if (prim->type() == fully_connected::type_id()) {
             if (!prim->as<fully_connected>().weights_quantization_term())
-                optimize_bias(prim->as<fully_connected>(), lo, p);
+                optimize_bias(prim->as<fully_connected>(), rf, p);
         } else if (prim->type() == embed::type_id()) {
-            optimize_bias(prim->as<embed>(), lo, p);
+            optimize_bias(prim->as<embed>(), rf, p);
         }
     }
-}
\ No newline at end of file
+}
index 8769477..53f627b 100644 (file)
@@ -50,7 +50,7 @@ void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre(program_impl& p, T&
     if (node.get_primitive()->bias.size() != 0) {
         const auto& bias_layout = node.get_dependency(dependency_offset).get_output_layout();
         auto target_layout =
-            layout(bias_layout.data_type, cldnn::format::bfyx, {1, 1, bias_layout.size.spatial[0] * split, 1});
+            layout(bias_layout.data_type, cldnn::format::bfyx, {1, bias_layout.size.feature[0] * split, 1, 1});
         program_helpers::merge_buffers(p.get_engine(),
                                        node,
                                        target_layout,
@@ -109,4 +109,4 @@ void prep_opt_depthwise_sep_post::run(program_impl& p) {
             optimize_depthwise_sep_pre(p, prim->as<deconvolution>());
         }
     }
-}
\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_binarization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_binarization.cpp
deleted file mode 100644 (file)
index c07ef47..0000000
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-// Copyright (c) 2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <api/CPP/quantize.hpp>
-#include <api/CPP/binary_convolution.hpp>
-#include <api/CPP/scale.hpp>
-#include "quantize_inst.h"
-#include "binary_convolution_inst.h"
-#include "data_inst.h"
-#include "pass_manager.h"
-#include "program_helpers.h"
-#include <algorithm>
-
-void prepare_binarization::prepare_packed_quantize(program_impl&, program_node& node) {
-    auto& quantize_node = node.as<quantize>();
-
-    if (quantize_node.get_primitive()->levels != 2 || quantize_node.get_users().size() > 1 ||
-        quantize_node.is_output() || !(quantize_node.get_users().front()->is_type<binary_convolution>()))
-        return;
-
-    auto& input_low = quantize_node.get_dependency(1).template as<data>();
-    auto& input_high = quantize_node.get_dependency(2).template as<data>();
-
-    auto& mem_input_low = input_low.get_attached_memory();
-    auto& mem_input_high = input_high.get_attached_memory();
-
-    bool is_binarization = true;
-    switch (mem_input_high.get_layout().data_type) {
-        case data_types::f32: {
-            float* data_input_low = static_cast<float*>(mem_input_low.lock());
-            float* data_input_high = static_cast<float*>(mem_input_high.lock());
-
-            for (size_t i = 0; i < mem_input_high.get_layout().count(); i++) {
-                if (data_input_high[i] != data_input_low[i]) {
-                    is_binarization = false;
-                    break;
-                }
-            }
-            break;
-        }
-        case data_types::f16: {
-            uint16_t* data_input_low = static_cast<uint16_t*>(mem_input_low.lock());
-            uint16_t* data_input_high = static_cast<uint16_t*>(mem_input_high.lock());
-
-            for (size_t i = 0; i < mem_input_high.get_layout().count(); i++) {
-                if (data_input_high[i] != data_input_low[i]) {
-                    is_binarization = false;
-                    break;
-                }
-            }
-            break;
-        }
-        default:
-            throw std::runtime_error("PrepareBinarization: Unsupported precision of quantize inputs");
-    }
-
-    mem_input_low.unlock();
-    mem_input_high.unlock();
-
-    if (!is_binarization)
-        return;
-
-    quantize_node.set_packed_binary_output(true);
-}
-
-void prepare_binarization::prepare_fusing(program_impl& p, program_node& node) {
-    auto& binary_conv_node = node.as<binary_convolution>();
-
-    program_node* user;
-
-    // TODO: support more than 1 fused node
-    bool repeat = false;
-    do {
-        if (binary_conv_node.get_users().size() > 1 || binary_conv_node.get_users().empty())
-            return;
-
-        user = binary_conv_node.get_users().front();
-
-        // check all primitive types that can be possibly fused
-        bool fuse_scale = user->is_type<scale>();
-        bool fuse_quantize = user->is_type<quantize>() && user->as<quantize>().get_packed_binary_output() &&
-                             binary_conv_node.get_output_layout().size.feature[0] == user->get_dependency(1).get_output_layout().size.feature[0] &&
-                             binary_conv_node.get_output_layout().size.feature[0] == user->get_dependency(2).get_output_layout().size.feature[0] &&
-                             binary_conv_node.get_primitive()->dilation == tensor{1};
-        if (!fuse_scale && !fuse_quantize)
-            return;
-
-        cldnn::padding needed_padding =
-            padding::max(user->get_output_layout().data_padding, binary_conv_node.get_output_layout().data_padding);
-        binary_conv_node.add_fused_primitive(user);
-
-        while (user->get_dependencies().size() > 1) {
-            auto& dep = user->get_dependency(user->get_dependencies().size() - 1);
-            p.remove_connection(dep, *user);
-        }
-
-        p.add_optimized_primitive_info(user->id(), {binary_conv_node.id()});
-
-        binary_conv_node.merge_output_padding(needed_padding);
-        binary_conv_node.set_output_layout(user->get_output_layout());
-
-        p.extract_and_remove(*user);
-    } while (repeat);
-}
-
-void prepare_binarization::run(program_impl& p) {
-    for (auto& prim : p.get_processing_order()) {
-        if (prim->type() == quantize::type_id()) {
-            prepare_packed_quantize(p, *prim);
-        }
-    }
-
-    for (auto& prim : p.get_processing_order()) {
-        if (prim->type() == binary_convolution::type_id()) {
-            prepare_fusing(p, *prim);
-        }
-    }
-}
index a14e8b9..d4f6947 100644 (file)
@@ -16,9 +16,9 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "api/CPP/eltwise.hpp"
-#include "api/CPP/pooling.hpp"
-#include "api/CPP/upsampling.hpp"
+#include "api/eltwise.hpp"
+#include "api/pooling.hpp"
+#include "api/upsampling.hpp"
 #include "primitive_inst.h"
 #include "activation_inst.h"
 #include "concatenation_inst.h"
@@ -37,7 +37,6 @@
 using namespace cldnn;
 
 // ToDo remove friendship relation from  program_node
-
 void prepare_buffer_fusing::run(program_impl& p) {
     bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
     /*
@@ -49,7 +48,7 @@ void prepare_buffer_fusing::run(program_impl& p) {
     If crop is before concat there can be padding mismtach, since concat changes padding.
     */
     auto can_optimize = [](const program_node* node) {
-        if (node->is_output() || (node->get_fused_activation_func() != cldnn_activation_func_t::activation_none)) {
+        if (node->is_output() || (!node->get_fused_activations_funcs().empty())) {
             return false;
         }
         return true;
@@ -272,123 +271,10 @@ void prepare_buffer_fusing::run(program_impl& p) {
             continue;
         program_helpers::do_for_types<reshape>(*node, [&p](reshape_node& node) {
             node.get_output_layout();
-            if (node.is_in_place() && node.get_fused_activation_func() == activation_none)
+            if (node.is_in_place() && node.get_fused_activations_funcs().empty())
                 node.can_be_optimized(true);
             else
                 node.can_be_optimized(false);
         });
-        program_helpers::do_for_types<reorder>(*node, [&p](reorder_node& node) {
-            auto& input = node.input();
-
-            auto output_layout = node.get_output_layout();
-            // This is WA for topologies that due to additional reorders added perform worse with conv1x1 optimization
-            auto remove_bf8_xy_opt = ((input.is_type<pooling>() || input.is_type<concatenation>()) &&
-                                      output_layout.format == format::bf8_xy16 && input.get_users().size() == 1);
-            // Remove reorder from convolution 1x1 to bfyx in some conditions
-            auto remove_byxf_opt = (input.is_type<convolution>() && input.get_users().size() == 1 &&
-                                    input.get_output_layout().format == format::byxf);
-
-            // Work-around to propagate blocked formats to first convolution - fs_byx_fsv32, bfyx_f16
-            // Pattern of convolution -> pooling -> reorder
-            auto blocked_conv_pool_reorder =
-                input.is_type<pooling>() &&
-                input.get_dependencies().front()->is_type<convolution>() &&   // Input to pooling is convolution
-                input.get_dependencies().front()->get_users().size() == 1 &&  // Convolution has only one user (pooling)
-                input.get_dependencies().front()->get_output_layout().format ==
-                    format::bfyx;  // Convolution outputs bfyx format
-            // Pattern of convolution -> reorder
-            auto blocked_conv_reorder = input.is_type<convolution>() && input.get_users().size() == 1 &&
-                                        input.get_output_layout().format == format::bfyx;
-            auto remove_bfyx_to_blocked =
-                (output_layout.format == format::fs_b_yx_fsv32 ||
-                 // For bfyx_f16 if the size is large enough it is more optimal to sink reorder into convolution
-                 (output_layout.format == format::bfyx_f16 &&
-                  output_layout.count() > 500000
-                  // bfyx -> bfyx_f16 implementation can only handle 3 input features
-                  && input.get_output_layout().size.feature[0] == 3)) &&
-                (blocked_conv_pool_reorder || blocked_conv_reorder);
-
-            // check if all inputs user have the same format
-            auto all_users_same_format = true;
-            auto input_user_layout_format = input.get_users().front()->get_output_layout().format;
-            for (auto const& user : input.get_users()) {
-                if (user->get_output_layout().format != input_user_layout_format) {
-                    all_users_same_format = false;
-                    break;
-                }
-            }
-            auto same_data_type = input.get_output_layout().data_type == output_layout.data_type;
-            // Optimization only available in case of layers that support different input and output formats.
-            // todo: new api needs to be created to read such caps
-            if (!(input.is_type<pooling>() &&
-                  (output_layout.format == format::bfyx || output_layout.format == format::yxfb ||
-                   output_layout.format == format::byxf) &&
-                  input.get_output_layout().format != format::fs_b_yx_fsv32 &&
-                  input.get_output_layout().format != format::bfyx_f16 && all_users_same_format && same_data_type) &&
-                !remove_bf8_xy_opt &&
-                !(input.is_type<convolution>() && (input.get_output_layout().format == format::bf8_xy16)) &&
-                !(input.is_type<eltwise>() &&
-                  (output_layout.format == format::bfyx || output_layout.format == format::yxfb ||
-                   output_layout.format == format::byxf) &&
-                  input.get_output_layout().format != format::fs_b_yx_fsv32 &&
-                  input.get_output_layout().format != format::bfyx_f16 && all_users_same_format && same_data_type) &&
-                !(remove_byxf_opt &&
-                  (node.get_users().front()->is_type<eltwise>() || node.get_users().front()->is_type<pooling>()) &&
-                  output_layout.format != format::fs_b_yx_fsv32) &&
-                !(remove_bfyx_to_blocked))
-                return;
-
-            if (remove_bf8_xy_opt) {
-                auto users_user_layout = node.get_users().front()->get_users().front()->get_output_layout();
-                // if users_user_layout is still bf8_yx16 (stacked convolutions) then leave the reorder
-                if (users_user_layout.format == format::bf8_xy16)
-                    return;
-                auto input_layout = input.get_output_layout();
-                auto target_layout = layout(input_layout.data_type,
-                                            users_user_layout.format,
-                                            input_layout.size,
-                                            input_layout.data_padding);
-                input.set_output_layout(target_layout, false);
-            } else if (remove_byxf_opt) {
-                for (auto user : node.get_users()) {
-                    auto users_users = user->get_users();
-
-                    for (auto const& users_user : users_users) {
-                        if (users_user->get_output_layout().format != format::byxf && !users_user->is_type<eltwise>()) {
-                            remove_byxf_opt = false;
-                            return;
-                        }
-                    }
-                }
-
-                for (auto user : node.get_users()) {
-                    if (remove_byxf_opt) {
-                        auto input_layout = input.get_output_layout();
-                        user->set_output_layout(input_layout, false);
-                    }
-                }
-            } else if (remove_bfyx_to_blocked) {
-                auto& conv_node = blocked_conv_reorder ? input : *(input.get_dependencies().front());
-                auto original_layout = conv_node.get_output_layout();
-                auto output_format = output_layout.format;
-                // Change convolution output layout since it can handle bfyx -> blocked format change
-                auto target_layout = layout(original_layout.data_type,
-                                            output_format,
-                                            original_layout.size,
-                                            original_layout.data_padding);
-
-                if (blocked_conv_pool_reorder) {
-                    input.set_output_padding(output_layout.data_padding);
-                } else {
-                    target_layout.data_padding = output_layout.data_padding;
-                }
-
-                conv_node.set_output_layout(target_layout);
-            } else {
-                input.set_output_layout(output_layout, false);
-            }
-            node.can_be_optimized(true);
-            p.extract_and_remove(node);  // try to remove redundant reorders
-        });
     }
 }
index dab8ba8..9c6ec77 100644 (file)
@@ -32,7 +32,7 @@ void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre(T& node) {
         // are not reused in other primitives as they will be overriden with concatenated ones
         for (size_t i = 1; i < node.get_dependencies().size(); i++) {
             auto& weights_or_biases = node.get_dependency(i);
-            if (weights_or_biases.get_users().size() > 1 || weights_or_biases.type() != data::type_id())
+            if (weights_or_biases.get_users().size() > 1 || !weights_or_biases.template is_type<data>())
                 return;
         }
     } else {
@@ -52,9 +52,9 @@ template void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre<deconvolutio
 void prepare_depthwise_sep_opt::run(program_impl& p) {
     // depthiwise separated convolution/deconvolution optimization
     for (auto& prim : p.get_processing_order()) {
-        if (prim->type() == convolution::type_id()) {
+        if (prim->is_type<convolution>()) {
             optimize_depthwise_sep_pre(prim->as<convolution>());
-        } else if (prim->type() == deconvolution::type_id()) {
+        } else if (prim->is_type<deconvolution>()) {
             optimize_depthwise_sep_pre(prim->as<deconvolution>());
         }
     }
index 6f2505e..458611e 100644 (file)
@@ -36,6 +36,9 @@ void prepare_padding::run(program_impl& p) {
                 if (!prim->with_output_size)
                     continue;
 
+                if (node->get_output_layout().format == format::bfzyx_f16)
+                    continue;
+
                 auto filter_size = prim_node.weights(0).get_output_layout().size;
 
                 auto needed_padding = calc_sliding_window_needed_input_padding(prim_node.input().get_output_layout(),
@@ -119,7 +122,8 @@ void prepare_padding::run(program_impl& p) {
             conv_layout.format != cldnn::format::byxf_af32 &&
             conv_layout.format != cldnn::format::fs_bs_yx_bsv4_fsv32 &&
             conv_layout.format != cldnn::format::b_fs_yx_fsv4 &&
-            conv_layout.format != cldnn::format::fs_b_yx_fsv32) {
+            conv_layout.format != cldnn::format::fs_b_yx_fsv32 &&
+            conv_layout.format != cldnn::format::b_fs_yx_32fp) {
             continue;
         }
 
index d6c359c..0e60acd 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "api/CPP/pooling.hpp"
-#include "api/CPP/proposal.hpp"
-#include "api/CPP/roi_pooling.hpp"
+#include "api/pooling.hpp"
+#include "api/proposal.hpp"
+#include "api/roi_pooling.hpp"
 
 #include "program_helpers.h"
 #include "pass_manager.h"
 
+#include "quantize_inst.h"
+#include "binary_convolution_inst.h"
 #include "activation_inst.h"
 #include "batch_norm_inst.h"
 #include "batch_norm_grad_inst.h"
 #include <memory>
 #include <string>
 #include <utility>
+#include "error_handler.h"
 
-void prepare_primitive_fusing::fuse_skip_layers(program_impl& p, program_node* node) {
-    program_helpers::do_for_types<eltwise>(*node, [&p](eltwise_node& node) {
-        if (node.get_primitive()->mode != eltwise_mode::sum || node.inputs_count() != 2)
-            return;
+void prepare_primitive_fusing::run(program_impl& p) {
+    fuse_reorders(p);
+    fuse_simple_primitives(p);
+    fuse_activations(p);
+    fuse_skip_layers(p);
+}
 
-        // both inputs should be deconvolutions
-        if (!(node.input(0).is_type<deconvolution>() && node.input(1).is_type<deconvolution>())) {
-            return;
-        }
+void prepare_primitive_fusing::fuse_reorders(program_impl &p) {
+    // This loop tries fusing several reorders one by one (if present) into one reorder
+    auto itr = p.get_processing_order() .begin();
+    while (itr != p.get_processing_order().end()) {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
 
-        auto& to_fuse_with = node.input(0);
-        int to_fuse_index = 1;
+        if (node->is_output())
+            continue;
 
-        // remove dependencies and users of elwtise that is going to be extracted
-        p.add_connection(node.input(to_fuse_index), to_fuse_with);
-        p.remove_connection(node.input(to_fuse_index), node);
+        program_helpers::do_for_types<reorder>(*node, [&p](reorder_node& node) {
+            auto& input = node.input();
 
-        p.get_processing_order().erase(&to_fuse_with);
-        p.get_processing_order().insert(&node, &to_fuse_with);
+            // Restrictions:
+            // - inputs cannot be padded
+            // - primitives input cannot be output
+            // - input was optimized
+            if (node.has_padded_dependency() || input.is_output() ||
+                node.get_dependencies().size() != 1 || input.can_be_optimized())
+                return;
 
-        if (node.get_fused_activation_func() != activation_none)
-            to_fuse_with.set_fused_activation(node.get_fused_activation_func(), node.get_fused_activation_params());
-        to_fuse_with.set_output_padding(node.get_output_layout().data_padding);
+            // - check if previous node is reorder with 1 user (and if the layouts are the same - remove reorder)
+            // - do not fuse if current node has mean subtract
+            if (input.get_users().size() != 1 || !input.is_type<reorder>() ||
+                input.get_output_layout() != node.get_output_layout() || node.has_mean() ||
+                !node.get_primitive()->subtract_per_feature.empty())
+                return;
 
-        p.extract_and_remove(node);
-    });
-}
+            p.add_optimized_primitive_info(node.id());
 
-template <typename T>
-static bool node_is_type(program_node* n) {
-    return n->is_type<T>();
+            auto output_layout = node.get_output_layout();
+            input.set_output_layout(output_layout, false);
+            p.extract_and_remove(node);
+        });
+    }
 }
 
-void prepare_primitive_fusing::fuse_conv_bn_scale(program_impl& p, program_node* node) {
-    program_helpers::do_for_types<convolution>(*node, [&p](convolution_node& node) {
-        if (node.get_users().size() > 2)
-            return;
+void prepare_primitive_fusing::fuse_activations(program_impl &p) {
+    bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
+    auto itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        program_helpers::do_for_types<activation>(*node, [&p, &is_debug](activation_node& node) {
+            auto& input = node.input();
 
-        auto found_bn = std::find_if(node.get_users().begin(), node.get_users().end(), node_is_type<batch_norm>);
-        auto bn_node = found_bn != node.get_users().end() ? *found_bn : nullptr;
-        if (bn_node != nullptr) {
-            if (bn_node->get_users().size() > 2)
+            // Restrictions:
+            // - inputs cannot be padded
+            // - primitives input cannot be output
+            // - no activation additional input
+            // - input was optimized
+            if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
+                node.get_dependencies().size() != 1 || input.can_be_optimized())
                 return;
 
-            auto found_scale =
-                std::find_if(bn_node->get_users().begin(), bn_node->get_users().end(), node_is_type<scale>);
-            auto sc_node = found_bn != node.get_users().end() ? *found_scale : nullptr;
-            if (sc_node != nullptr) {
-                int bn_index = static_cast<int>(std::distance(node.get_users().begin(), found_bn));
-                int sc_index = static_cast<int>(std::distance(bn_node->get_users().begin(), found_scale));
-                auto scale_prim = std::static_pointer_cast<const scale>(sc_node->get_primitive());
-                auto bn_prim = std::static_pointer_cast<const batch_norm>(bn_node->get_primitive());
-                auto prim = node.get_primitive();
-                bool training = false;
-
-                if (node.get_users().size() == 2) {
-                    training = true;
-                    float zero = 0.0f;
-                    layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1));
-
-                    auto bn_backw = node.get_users().begin();
-                    std::advance(bn_backw, bn_index == 0 ? 1 : 0);
-                    if (!(*bn_backw)->is_type<batch_norm_grad>())
-                        return;
-                    auto sc_backw = bn_node->get_users().begin();
-                    std::advance(sc_backw, sc_index == 0 ? 1 : 0);
-                    if (!(*sc_backw)->is_type<scale_grad_weights>())
-                        return;
-
-                    auto conv_out_prim = std::make_shared<mutable_data>(prim->id + "_fused_conv_out",
-                                                                        memory::attach(dummy_layout, &zero, 1));
-                    auto& conv_out_node = p.get_or_create(conv_out_prim);
-                    auto conv_out_mem = p.get_engine().allocate_memory(node.get_output_layout(), 0);
-                    conv_out_node.as<mutable_data>().attach_memory(*conv_out_mem, false);
-                    p.add_intermediate(conv_out_node, **bn_backw, 1, true);
-
-                    auto bn_out_prim = std::make_shared<mutable_data>(prim->id + "_fused_bn_out",
-                                                                      memory::attach(dummy_layout, &zero, 1));
-                    auto& bn_out_node = p.get_or_create(bn_out_prim);
-                    auto bn_out_mem = p.get_engine().allocate_memory(bn_node->get_output_layout(), 0);
-                    bn_out_node.as<mutable_data>().attach_memory(*bn_out_mem, false);
-                    p.add_intermediate(bn_out_node, **sc_backw, 0, true);
-                }
+            // - limit to primitives which implementations support activation fusing
+            if (input.get_users().size() != 1 ||
+                // TODO: new api needs to be created to read such caps
+                // right now use whitelist so no new primitives will be affected in case of lack of fused activation
+                // support
+                (!input.is_type<batch_norm>() && !input.is_type<concatenation>() && !input.is_type<convolution>() &&
+                 !input.is_type<crop>() && !input.is_type<deconvolution>() && !input.is_type<eltwise>() &&
+                 !input.is_type<fully_connected>() && !input.is_type<lrn>() && !input.is_type<normalize>() &&
+                 !input.is_type<permute>() && !input.is_type<pooling>() && !input.is_type<reorder>() &&
+                 !input.is_type<reshape>() && !input.is_type<roi_pooling>() && !input.is_type<scale>() &&
+                 !input.is_type<softmax>() && !input.is_type<upsampling>() && !input.is_type<mvn>() &&
+                 !input.is_type<depth_to_space>() && !input.is_type<gather>() && !input.is_type<reverse_sequence>() &&
+                 !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() &&
+                 !input.is_type<fused_conv_eltwise>() && !input.is_type<activation>()))
+                return;
 
-                auto new_conv = std::make_shared<fused_conv_bn_scale>(prim->id + "_fused",
-                                                                      prim->input[0],
-                                                                      prim->weights.ref(),
-                                                                      prim->bias.ref(),
-                                                                      bn_prim->epsilon,
-                                                                      scale_prim->input[1],
-                                                                      scale_prim->bias,
-                                                                      prim->stride,
-                                                                      prim->dilation,
-                                                                      prim->input_offset,
-                                                                      bn_prim->inv_variance,
-                                                                      prim->with_activation,
-                                                                      prim->activation_negative_slope,
-                                                                      prim->output_padding);
-                auto& new_node = p.get_or_create(new_conv);
-                p.replace(node, new_node);
-
-                while (sc_node->get_dependencies().size() > 1) {
-                    // ToDo: here we modify users and dependencies,
-                    // It should be done through public methods in program_node/program_impl
-                    // to avoid friend declarations
-                    auto& dep = sc_node->get_dependency(sc_node->get_dependencies().size() - 1);
-                    p.remove_connection(dep, *sc_node);
-                    dep.users.push_back(&new_node);
-                    if (sc_node->get_dependencies().size() == 1)
-                        new_node.dependencies.insert(new_node.dependencies.begin() + 1, &dep);
-                    else
-                        new_node.dependencies.push_back(&dep);
-                }
-                p.extract_and_remove(*sc_node);
-                while (bn_node->get_dependencies().size() > 1) {
-                    auto& dep = bn_node->get_dependency(bn_node->get_dependencies().size() - 1);
-                    p.remove_connection(dep, *bn_node);
-                    new_node.dependencies.push_back(&dep);
+            if (input.get_fused_primitives().empty()) {
+                input.add_fused_activation(node.get_primitive()->activation_function, node.get_primitive()->additional_params);
+                for (size_t i = 0; i < node.get_fused_activations_funcs().size(); i++) {
+                    input.add_fused_activation(node.get_fused_activations_funcs()[i],
+                                               node.get_fused_activations_params()[i]);
                 }
-                p.extract_and_remove(*bn_node);
-                auto inv_var_node =
-                    std::find_if(new_node.dependencies.begin(),
-                                 new_node.dependencies.end(),
-                                 [&new_conv](const program_node* node) {
-                                     return node->id().find(new_conv->inv_variance) != std::string::npos;
-                                 });
-                (*inv_var_node)->users.push_back(&new_node);
-
-                if (training) {
-                    auto user = std::find_if(new_node.get_users().begin(),
-                                             new_node.get_users().end(),
-                                             [](const program_node* node) {
-                                                 return node->id().find("_fused_conv_out") != std::string::npos;
-                                             });
-                    p.reverse_connection(new_node, **user);
-                    user = std::find_if(
-                        new_node.get_users().begin(),
-                        new_node.get_users().end(),
-                        [](const program_node* node) { return node->id().find("_fused_bn_out") != std::string::npos; });
-                    p.reverse_connection(new_node, **user);
-                    p.get_processing_order()
-                        .calculate_BFS_processing_order();  // this should be avoided, why do we need recalculation of
-                                                            // processing order?
+                input.set_output_padding(node.get_output_layout().data_padding);
+            } else {
+                // If node already has any fused node using new mechanism,
+                // we can just use the same way and handle any amount of activations
+                p.fuse_nodes(input, node);
+            }
+
+            p.add_optimized_primitive_info(node.id(), {input.id()});
+
+            p.extract_and_remove(node);
+        });
+    }
+}
+
+void prepare_primitive_fusing::fuse_skip_layers(program_impl& p) {
+    // This loop tries fusing eltwise (sum) with deconvolution
+    auto itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        program_helpers::do_for_types<eltwise>(*node, [&p](eltwise_node& node) {
+            if (node.get_primitive()->mode != eltwise_mode::sum || node.inputs_count() != 2)
+                return;
+
+            // both inputs should be deconvolutions
+            if (!(node.input(0).is_type<deconvolution>() && node.input(1).is_type<deconvolution>())) {
+                return;
+            }
+
+            auto& to_fuse_with = node.input(0);
+            int to_fuse_index = 1;
+
+            // remove dependencies and users of elwtise that is going to be extracted
+            p.add_connection(node.input(to_fuse_index), to_fuse_with);
+            p.remove_connection(node.input(to_fuse_index), node);
+
+            p.get_processing_order().erase(&to_fuse_with);
+            p.get_processing_order().insert(&node, &to_fuse_with);
+
+            if (!node.get_fused_activations_funcs().empty()) {
+                for (size_t i = 0; i < node.get_fused_activations_funcs().size(); i++) {
+                    to_fuse_with.add_fused_activation(node.get_fused_activations_funcs()[i],
+                                                      node.get_fused_activations_params()[i]);
                 }
             }
-        }
-    });
+            to_fuse_with.set_output_padding(node.get_output_layout().data_padding);
+
+            p.extract_and_remove(node);
+        });
+    }
+}
+
+void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
+    bool recalc_processing_order = false;
+
+    auto itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        auto fuse_activation_f = [&p](activation_node& activation_node) {
+            auto& input_data = activation_node.get_dependency(0);
+            if (activation_node.get_dependencies().size() >= 3 ||
+                (!(input_data.is_type<convolution>() && input_data.get_output_layout().format == format::bfyx_f16) &&
+                 !input_data.is_type<binary_convolution>()))
+                return;
+
+            p.fuse_nodes(input_data, activation_node);
+        };
+
+        auto fuse_scale_f = [&p](scale_node& scale_node) {
+            if (scale_node.get_dependencies().empty())
+                CLDNN_ERROR_MESSAGE(scale_node.id(), "scale has invalid count of dependencies");
+
+            auto& input_data = scale_node.get_dependency(0);
+            bool fuse_to_binary_conv = input_data.is_type<binary_convolution>() &&
+                                       input_data.as<binary_convolution>().get_primitive()->dilation == tensor{1};
+            bool fuse_to_conv = input_data.is_type<convolution>() && input_data.get_output_layout().format == format::bfyx_f16;
+
+            bool should_fuse = fuse_to_binary_conv || fuse_to_conv;
+            if (!should_fuse)
+                return;
+
+            p.fuse_nodes(input_data, scale_node);
+        };
+
+        auto fuse_quantize_f = [&p](quantize_node& quantize_node) {
+            auto& input_data = quantize_node.get_dependency(0);
+            auto& input_lo = quantize_node.get_dependency(1);
+            auto& input_hi = quantize_node.get_dependency(2);
+
+            auto out_layout = quantize_node.get_output_layout();
+            auto in_layout = input_data.get_output_layout();
+
+            bool fuse_to_binary_conv = input_data.is_type<binary_convolution>() &&
+                                       ((out_layout.data_type == data_types::bin &&
+                                       quantize_node.get_dependencies().size() == 5 &&
+                                       ((in_layout.size.feature[0] == input_lo.get_output_layout().size.feature[0] &&
+                                         in_layout.size.feature[0] == input_hi.get_output_layout().size.feature[0]) ||
+                                        (input_lo.get_output_layout().size.feature[0] == 1 &&
+                                         input_hi.get_output_layout().size.feature[0] == 1)))) &&
+                                       input_data.as<binary_convolution>().get_primitive()->dilation.spatial[0] == 1 &&
+                                       input_data.as<binary_convolution>().get_primitive()->dilation.spatial[1] == 1;
+
+            bool should_fuse = fuse_to_binary_conv;
+
+            if (!should_fuse)
+                return;
+
+            p.fuse_nodes(input_data, quantize_node);
+        };
+
+        auto fuse_eltwise_f = [&p, &recalc_processing_order, this](eltwise_node& node) {
+            std::shared_ptr<const cldnn::eltwise> prim = node.get_primitive();
+            if (node.is_output() || node.inputs_count() != 2 ||
+                prim->mode != eltwise_mode::sum || !prim->stride.empty())
+                return;
+
+            std::vector<cldnn::program_node*> parents = node.get_dependencies();
+            std::list<cldnn::program_node*> users = node.get_users();
+
+            auto parent1 = parents[0];
+            auto parent2 = parents[1];
+
+            // We should have at least one convolution node
+            if (!parent1->is_type<convolution>() && !parent2->is_type<convolution>())
+                return;
+
+            // Choose a convolution node
+            size_t fused_idx = parent1->is_type<convolution>() ? 0 : 1;
+            size_t peer_idx  = parent1->is_type<convolution>() ? 1 : 0;
+
+            int p1_pnum = p.get_processing_order().get_processing_number(parents[fused_idx]);
+            int p2_pnum = p.get_processing_order().get_processing_number(parents[peer_idx]);
+
+            if (p1_pnum < p2_pnum && parents[peer_idx]->is_type<convolution>()) {
+                std::swap(fused_idx, peer_idx);
+            }
+
+            if (parent1->is_type<convolution>() && !_lo.is_format_optimized(parent1->as<convolution>(), format::bfyx_f16))
+                return;
+            if (parent2->is_type<convolution>() && !_lo.is_format_optimized(parent2->as<convolution>(), format::bfyx_f16))
+                return;
+
+            auto fused_node = parents[fused_idx];
+            auto peer_node = parents[peer_idx];
+
+            // This fusing can be extended to support peer node in any layout and with broadcast
+            // Fusing is allowed only if current layouts are bfyx_f16 (in tests) or bfyx_f16_network attribute is true
+            bool merge_allowed = fused_node->get_users().size() == 1 &&
+                                 (_lo.get_optimization_attributes().bfyx_f16_network ||
+                                 (fused_node->get_output_layout().format == format::bfyx_f16 &&
+                                  peer_node->get_output_layout().format == format::bfyx_f16)) &&
+                                 fused_node->get_output_layout().size == peer_node->get_output_layout().size;
+
+            for (auto& parent : fused_node->get_dependencies())
+                if (parent->id() == peer_node->id())
+                    merge_allowed = false;
+
+            if (!merge_allowed)
+                return;
+
+            if (p.get_processing_order().get_processing_number(fused_node) <
+                p.get_processing_order().get_processing_number(peer_node))
+                recalc_processing_order = true;
+
+            p.fuse_nodes(*fused_node, node);
+        };
+
+        program_helpers::do_for_types<activation, scale, quantize, eltwise>(*node,
+                fuse_activation_f,
+                fuse_scale_f,
+                fuse_quantize_f,
+                fuse_eltwise_f);
+    }
+
+    // Need to update processing order to handle cases when peer node processing number is greater
+    // than fused node one
+    if (recalc_processing_order)
+        p.get_processing_order().calc_processing_order(p);
 }
 
 void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node* node) {
@@ -243,9 +365,10 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
     }
 
     // TODO Allow to pass arbitrary convolution activation in constructor
-    if (conv_node->get_fused_activation_func() != cldnn_activation_func::activation_none &&
-        conv_node->get_fused_activation_func() != cldnn_activation_func::activation_relu &&
-        conv_node->get_fused_activation_func() != cldnn_activation_func::activation_relu_negative_slope)
+    if (!conv_node->get_fused_activations_funcs().empty() &&
+        !(conv_node->get_fused_activations_funcs().size() == 1 && (conv_node->get_fused_activations_funcs()[0] == activation_func::relu ||
+                                                                   conv_node->get_fused_activations_funcs()[0] == activation_func::relu_negative_slope ||
+                                                                   conv_node->get_fused_activations_funcs()[0] == activation_func::none)))
         return;
 
     // make sure eltwise have only 2 inputs
@@ -298,26 +421,21 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
     if (eltw_node->inputs_calibration_term())
         return;
 
-    // TODO Allow to pass arbitrary convolution activation in constructor
-    if (conv_node->get_fused_activation_func() != cldnn_activation_func::activation_none) {
-        conv.with_activation = true;
-
-        if (conv_node->get_fused_activation_func() == cldnn_activation_func::activation_relu_negative_slope) {
-            conv.activation_negative_slope = conv_node->get_fused_activation_params().a;
-        }
-    }
     auto conv_id = conv_node->id();
     auto eltw_id = eltw_node->id();
 
+    bool conv_with_activation = !conv_node->get_fused_activations_funcs().empty();
+    auto conv_netagive_slope = conv_with_activation ? conv_node->get_fused_activations_params().begin()->a : 0.0f;
+
     auto fused_conv_eltw =
         std::make_shared<fused_conv_eltwise>(conv_id + "_fused_" + eltw_id,
                                              conv_node->input().id(),
                                              eltw_node->input(eltw_second_input_idx).id(),
                                              eltw.mode,
-                                             conv.weights.ref(),
-                                             conv.bias.ref(),
-                                             conv.weights_quantization_factors.ref(),
-                                             conv.output_calibration_factors.ref(),
+                                             conv.weights,
+                                             conv.bias,
+                                             conv.weights_quantization_factors,
+                                             conv.output_calibration_factors,
                                              conv.input_quantization_factor,
                                              eltw_scale,  // eltw_scale
                                              eltw.output_calibration_factors,
@@ -325,8 +443,8 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
                                              new_conv_stride,
                                              conv.input_offset,
                                              conv.dilation,
-                                             conv.with_activation,
-                                             conv.activation_negative_slope,
+                                             conv_with_activation,
+                                             conv_netagive_slope,
                                              false,  // eltw.with_activation - use fused activation
                                              0.f);   // eltw.activation_negative_slope - use fused activation
 
@@ -335,7 +453,9 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
 
     auto& new_node = p.get_or_create(fused_conv_eltw);
 
-    new_node.set_fused_activation(eltw_node->get_fused_activation_func(), eltw_node->get_fused_activation_params());
+    for (size_t i = 0; i < eltw_node->get_fused_activations_funcs().size(); i++)
+        new_node.add_fused_activation(eltw_node->get_fused_activations_funcs()[i],
+                                      eltw_node->get_fused_activations_params()[i]);
 
     // Copy output calibration factors pointer as replace will remove eltwise node
     program_node* output_calibration_factors = nullptr;
@@ -378,212 +498,26 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
     p.add_optimized_primitive_info(eltw_id, {new_node.id()});
 }
 
-void prepare_primitive_fusing::run(program_impl& p) {
-    bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
-
-    std::list<program_node*> conv_nodes;
-    auto itr = p.get_processing_order()
-                   .begin();  // note we need to use iterators since currently processed element can be removed
-    while (itr != p.get_processing_order().end()) {
-        auto node_itr = itr++;
-        if ((*node_itr)->is_type<convolution>()) {
-            // Don't push nodes that will be executed in bfyx_f16 layout since
-            // these ones supports eltwise fusing inside common convolution kernel
-            if (!_lo.is_format_optimized((*node_itr)->as<convolution>(), format::bfyx_f16))
-                conv_nodes.push_back(*node_itr);
-        }
-    }
-
-    // Disabled due to kernel being not optimized
-    // itr = conv_nodes.begin();
-    // while (itr != conv_nodes.end())
-    //{
-    //    auto node_itr = itr++;
-    //    auto& node = (*node_itr);
-
-    //    fuse_conv_bn_scale(p, node);
-    //}
-
-    // This loop tries fusing several reorders one by one (if present) into one reorder
-    itr = p.get_processing_order().begin();
-    while (itr != p.get_processing_order().end()) {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-
-        if (node->is_output())
-            continue;
-
-        program_helpers::do_for_types<reorder>(*node, [&p, is_debug](reorder_node& node) {
-            auto& input = node.input();
-
-            // Restrictions:
-            // - inputs cannot be padded
-            // - primitives input cannot be output
-            // - input was optimized
-            if (node.has_padded_dependency() || (input.is_output() && !is_debug) ||
-                node.get_dependencies().size() != 1 || input.can_be_optimized())
-                return;
-
-            // - check if previous node is reorder with 1 user (and if the layouts are the same - remove reorder)
-            // - do not fuse if current node has mean subtract
-            if (input.get_users().size() != 1 || !input.is_type<reorder>() ||
-                input.get_output_layout() != node.get_output_layout() || node.has_mean() ||
-                !node.get_primitive()->subtract_per_feature.empty())
-                return;
-
-            p.add_optimized_primitive_info(node.id());
-
-            input.set_output_layout(node.get_output_layout(), false);
-            p.extract_and_remove(node);
-        });
-    }
-
-    itr = p.get_processing_order().begin();
-    while (itr != p.get_processing_order().end()) {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-
-        program_helpers::do_for_types<activation>(*node, [&p, is_debug](activation_node& node) {
-            auto& input = node.input();
-
-            // Restrictions:
-            // - inputs cannot be padded
-            // - primitives input cannot be output
-            // - no activation additional input
-            // - input was optimized
-            if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
-                node.get_dependencies().size() != 1 || input.can_be_optimized())
-                return;
-
-            // - check if there is no activation fused already
-            // - limit to primitives which implementations support activation fusing
-            if (input.get_users().size() != 1 || input.get_fused_activation_func() != activation_none ||
-                // TODO: new api needs to be created to read such caps
-                // right now use whitelist so no new primitives will be affected in case of lack of fused activation
-                // support
-                (!input.is_type<batch_norm>() && !input.is_type<concatenation>() && !input.is_type<convolution>() &&
-                 !input.is_type<crop>() && !input.is_type<deconvolution>() && !input.is_type<eltwise>() &&
-                 !input.is_type<fully_connected>() && !input.is_type<lrn>() && !input.is_type<normalize>() &&
-                 !input.is_type<permute>() && !input.is_type<pooling>() && !input.is_type<reorder>() &&
-                 !input.is_type<reshape>() && !input.is_type<roi_pooling>() && !input.is_type<scale>() &&
-                 !input.is_type<softmax>() && !input.is_type<upsampling>() && !input.is_type<mvn>() &&
-                 !input.is_type<depth_to_space>() && !input.is_type<gather>() && !input.is_type<reverse_sequence>() &&
-                 !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() &&
-                 !input.is_type<fused_conv_eltwise>()))
-                return;
-
-            input.set_fused_activation(node.get_primitive()->activation_func, node.get_primitive()->additional_params);
-            input.set_output_padding(node.get_output_layout().data_padding);
-
-            p.add_optimized_primitive_info(node.id(), {input.id()});
-
-            p.extract_and_remove(node);
-        });
-    }
-
-    // This loop tries fusing eltwise (sum) with deconvolution
-    itr = p.get_processing_order().begin();
-    while (itr != p.get_processing_order().end()) {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-
-        fuse_skip_layers(p, node);
-    }
-}
-
 void prepare_conv_eltw_fusing::run(program_impl& p) {
-    bool recalc_processing_order = false;
-    auto itr = p.get_processing_order().begin();
-
-    if (bfyx_f16_opt) {
-        while (itr != p.get_processing_order().end()) {
-            auto node_itr = itr++;
-            auto& node = (*node_itr);
-            program_helpers::do_for_types<eltwise>(*node, [&p, this, &recalc_processing_order](eltwise_node& node) {
-                std::shared_ptr<const cldnn::eltwise> prim = node.get_primitive();
-                if (node.is_output() || node.inputs_count() != 2 ||
-                    prim->mode != eltwise_mode::sum || !prim->stride.empty())
-                    return;
-
-                std::vector<cldnn::program_node*> parents = node.get_dependencies();
-                std::list<cldnn::program_node*> users = node.get_users();
-
-                auto parent1 = parents[0];
-                auto parent2 = parents[1];
-
-                // We should have at least one convolution node
-                if (!parent1->is_type<convolution>() && !parent2->is_type<convolution>())
-                    return;
-
-                // Choose a convolution node
-                size_t fused_idx = parent1->is_type<convolution>() ? 0 : 1;
-                size_t peer_idx  = parent1->is_type<convolution>() ? 1 : 0;
-
-                int p1_pnum = p.get_processing_order().get_processing_number(parents[fused_idx]);
-                int p2_pnum = p.get_processing_order().get_processing_number(parents[peer_idx]);
-
-                if (p1_pnum < p2_pnum && parents[peer_idx]->is_type<convolution>()) {
-                    std::swap(fused_idx, peer_idx);
-                }
-
-                if (parent1->is_type<convolution>() && !_lo.is_format_optimized(*parent1, format::bfyx_f16))
-                    return;
-                if (parent2->is_type<convolution>() && !_lo.is_format_optimized(*parent2, format::bfyx_f16))
-                    return;
-
-                auto fused_node = parents[fused_idx];
-                auto peer_node = parents[peer_idx];
-
-                // This fusing can be extended to support peer node in any layout and with broadcast
-                bool merge_allowed = fused_node->get_users().size() == 1 &&
-                                     fused_node->get_output_layout().format == format::bfyx_f16 &&
-                                     peer_node->get_output_layout().format == format::bfyx_f16 &&
-                                     fused_node->get_output_layout().size == peer_node->get_output_layout().size;
-
-                for (auto& parent : fused_node->get_dependencies())
-                    if (parent->id() == peer_node->id())
-                        merge_allowed = false;
-
-                if (!merge_allowed)
-                    return;
-
-                if (p.get_processing_order().get_processing_number(fused_node) <
-                    p.get_processing_order().get_processing_number(peer_node))
-                    recalc_processing_order = true;
-
-                fused_node->set_output_padding(node.get_output_layout().data_padding);
-
-                p.add_optimized_primitive_info(node.id(), { fused_node->id() });
-                fused_node->as<convolution>().add_fused_primitive(&node);
-
-                fused_node->users.remove(&node);
-                peer_node->users.remove(&node);
-                p.replace_all_usages(node, *fused_node);
-                node.dependencies.clear();
-                p.remove_if_dangling(node);
-            });
-        }
-        // Need to update processing order to handle cases when peer node processing number is greater
-        // than fused node one
-        if (recalc_processing_order)
-            p.get_processing_order().calc_processing_order(p);
-    }
-
-
     std::list<program_node*> conv_nodes;
     // note we need to use iterators since currently processed element can be removed
-    itr = p.get_processing_order().begin();
+    auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
         auto node_itr = itr++;
-        if ((*node_itr)->is_type<convolution>())
+        if (node_itr != p.get_processing_order().end() &&
+            (*node_itr)->is_type<convolution>())
             if (!bfyx_f16_opt || !_lo.is_format_optimized((*node_itr)->as<convolution>(), format::bfyx_f16))
                 conv_nodes.push_back(*node_itr);
     }
 
     // fuse conv + eltwise after activations
-    itr = conv_nodes.begin();
-    while (itr != conv_nodes.end()) {
-        auto node_itr = itr++;
+    auto conv_itr = conv_nodes.begin();
+    while (conv_itr != conv_nodes.end()) {
+        auto node_itr = conv_itr++;
+
+        if (node_itr == conv_nodes.end())
+            break;
+
         auto& node = (*node_itr);
 
         fuse_conv_eltwise(p, node);
@@ -598,6 +532,13 @@ void prepare_conv_eltw_read_write_opt::conv_eltwise_read_write_opt(program_impl&
         return;
     }
 
+    // look for conflicts
+    auto this_node_processing_number = p.get_processing_order().get_processing_number(node);
+    for (auto& user : second_input_node->users) {
+        if (p.get_processing_order().get_processing_number(user) > this_node_processing_number)
+            return;
+    }
+
     // buffer shared between primitives, if second input is mutable data, then we can reuse this memory
     auto shared_buffer_mem = second_input_node->is_type<mutable_data>()
                                  ? second_input_node->as<mutable_data>().get_attached_memory_ptr()
@@ -656,7 +597,8 @@ void prepare_conv_eltw_read_write_opt::run(program_impl& p) {
                    .begin();  // note we need to use iterators since currently processed element can be removed
     while (itr != p.get_processing_order().end()) {
         auto node_itr = itr++;
-        if ((*node_itr)->is_type<fused_conv_eltwise>())
+        if (node_itr != p.get_processing_order().end() &&
+            (*node_itr)->is_type<fused_conv_eltwise>())
             fused_conv_eltw_nodes.push_back(*node_itr);
     }
 
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
new file mode 100644 (file)
index 0000000..b2518af
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "api/quantize.hpp"
+#include "api/binary_convolution.hpp"
+#include "api/scale.hpp"
+#include "api/pooling.hpp"
+
+#include "quantize_inst.h"
+#include "binary_convolution_inst.h"
+#include "data_inst.h"
+#include "pass_manager.h"
+#include "program_helpers.h"
+#include <algorithm>
+#include "to_string_utils.h"
+#include "error_handler.h"
+
+
+void prepare_quantization::prepare_packed_quantize(program_impl& p) {
+    auto itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto node_itr = itr++;
+        auto &node = (*node_itr);
+
+        program_helpers::do_for_types<quantize>(*node, [&](quantize_node& quantize_node) {
+            if (quantize_node.is_output())
+                return;
+
+            auto &input_low = quantize_node.get_dependency(1).template as<data>();
+            auto &input_high = quantize_node.get_dependency(2).template as<data>();
+
+            auto &mem_input_low = input_low.get_attached_memory();
+            auto &mem_input_high = input_high.get_attached_memory();
+
+            auto output_dt = quantize_node.get_output_layout().data_type;
+
+            if (quantize_node.get_primitive()->levels == 2) {
+                bool is_binarization = true;
+                switch (mem_input_high.get_layout().data_type) {
+                    case data_types::f32: {
+                        auto data_input_low = static_cast<float*>(mem_input_low.lock());
+                        auto data_input_high = static_cast<float*>(mem_input_high.lock());
+
+                        for (size_t i = 0; i < mem_input_high.get_layout().count(); i++) {
+                            if (data_input_high[i] != data_input_low[i]) {
+                                is_binarization = false;
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                    case data_types::f16: {
+                        auto data_input_low = static_cast<uint16_t*>(mem_input_low.lock());
+                        auto data_input_high = static_cast<uint16_t*>(mem_input_high.lock());
+
+                        for (size_t i = 0; i < mem_input_high.get_layout().count(); i++) {
+                            if (data_input_high[i] != data_input_low[i]) {
+                                is_binarization = false;
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                    default:
+                        CLDNN_ERROR_MESSAGE(node->id(), "prepare_quantization: Unsupported precision of quantize inputs");
+                }
+                mem_input_low.unlock();
+                mem_input_high.unlock();
+
+                if (is_binarization) {
+                    output_dt = data_types::bin;
+                }
+            }
+
+            quantize_node.typed_desc()->output_data_type = optional_data_type{output_dt};
+            quantize_node.recalc_output_layout();
+        });
+    }
+}
+
+void prepare_quantization::run(program_impl& p) {
+    prepare_packed_quantize(p);
+}
index 34eb779..5c0812f 100644 (file)
@@ -79,9 +79,9 @@ void propagate_constants::run(program_impl& p) {
     // with recomputed cldnn::data
     for (auto& cout : to_replace) {
         auto& id_to_replace = cout.first;
+        auto mem_impl = cout.second;
 
-        // TODO: do not use API primitives internally and get rid of this last 'cldnn::memory' internal usage
-        memory api_memory = details::memory_c_to_cpp_converter::convert(api_cast(cout.second.get()));
+        memory api_memory = memory(mem_impl.detach());
         // c-cpp converter does not retain since normally it is done inside API-impl layer (cldnn.cpp) so we need to do
         // it manually
         cout.second->add_ref();
index 07782bc..01f2caa 100644 (file)
 
 #include "pass_manager.h"
 #include "program_helpers.h"
+#include "binary_convolution_inst.h"
 #include <vector>
 #include <list>
+#include <utility>
 
 using namespace cldnn;
 
-remove_redundant_reorders::remove_redundant_reorders(bool bfyx_to_bfyx_f16_opt)
-    : base_pass("remove_redundant_reorders"), bfyx_to_bfyx_f16_opt(bfyx_to_bfyx_f16_opt) {}
+remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations)
+    : base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations) {}
 
 void remove_redundant_reorders::run(program_impl& p) {
-    auto itr = p.get_processing_order()
-                   .begin();  // note we need to use iterators since currently processed element can be removed
+    auto update_implementation = [&](program_node& node) {
+        if (!update_implementations)
+            return;
+
+        auto& eng = p.get_engine();
+        auto new_impl = node.type()->choose_impl(eng, node);
+        node.set_selected_impl(std::move(new_impl));
+    };
+
+    // Fuse reorders into primitives
+    auto itr = p.get_processing_order().begin();
+    if (enable_reorder_fusing) {
+        while (itr != p.get_processing_order().end()) {
+            auto node_ptr = *itr++;
+            if (!node_ptr->is_type<reorder>())  // only care for reorders
+                continue;
+
+            auto& node = node_ptr->as<reorder>();
+
+            auto& input = node.input();
+            auto output_layout = node.get_output_layout();
+
+            if (node.is_output())
+                continue;
+
+            if (node.has_mean() || !node.get_primitive()->subtract_per_feature.empty())
+                continue;
+
+            if (!node.get_fused_activations_funcs().empty())
+                continue;
+
+            auto same_data_type = input.get_output_layout().data_type == output_layout.data_type;
+            if (!same_data_type)
+                continue;
+
+            bool all_users_fuse = true;
+            std::vector<program_node*> recalc_list;
+
+            for (auto usr : node.get_users()) {
+                if (!lo.can_fuse_reorder(input, *usr, input.get_output_layout().format, usr->get_output_layout().format)) {
+                    all_users_fuse = false;
+                    break;
+                }
+
+                if (usr->is_type<fully_connected>())
+                    recalc_list.push_back(usr);
+            }
+
+            if (!all_users_fuse)
+                continue;
+
+            auto output_padded = static_cast<bool>(output_layout.data_padding);
+            auto can_omit_padding = output_layout.format == format::bfyx_f16 && input.get_output_layout().format == format::bfyx;
+
+            if (output_padded && !can_omit_padding) {
+                if (input.get_users().size() != 1)
+                    continue;
+
+                if (input.is_type<input_layout>())
+                    continue;
+
+                input.merge_output_padding(output_layout.data_padding);
+            }
+
+            node.can_be_optimized(true);
+            p.extract_and_remove(node);
+
+            for (auto rl : recalc_list) {
+                rl->recalc_output_layout(true);
+            }
+        }
+    }
+
+    // Shrink reorder chains
+    itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
-        auto& node = (*itr++);          // post-inc to avoid invalidation due to possible erase
+        auto node = *itr++;
         if (!node->is_type<reorder>())  // only care for reorders
             continue;
 
-        program_node* current_node = node;
-        std::vector<program_node*> r_nodes_to_remove;
-
-        auto optimize = true;
-        while (current_node) {
-            auto& r_node = current_node->as<reorder>();
-            current_node = nullptr;
-
-            if (r_node.has_mean() ||
-                !r_node.get_primitive()->subtract_per_feature.empty() ||  // do not optimize if mean of subtract are present
-                r_node.is_output() ||                   // do not optimize when both reorder and layer before are outputs
-                r_node.get_fused_activation_func() != activation_none) {
-                // TODO Verify whether optimization can be performed at current sub-chain of reorders
-                optimize = false;
-                break;
-            }
+        auto& r_node = node->as<reorder>();
+        auto& dep_node = r_node.get_dependency(0);
+
+        if (!dep_node.is_type<reorder>())
+            continue;
+
+        auto& r_dep_node = dep_node.as<reorder>();
+
+        bool remove_dep = r_dep_node.get_users().size() == 1 &&
+            !r_dep_node.has_mean() &&
+            r_dep_node.get_primitive()->subtract_per_feature.empty() &&
+            !r_dep_node.is_output() &&
+            r_dep_node.get_fused_activations_funcs().empty();
 
-            r_nodes_to_remove.push_back(&r_node);
+        bool remove_current =
+            r_dep_node.get_users().size() == 1 &&
+            !r_dep_node.is_output() &&
+            !r_node.has_mean() &&
+            r_node.get_primitive()->subtract_per_feature.empty() &&
+            r_node.get_fused_activations_funcs().empty();
 
-            if (r_node.get_dependency(0).is_type<reorder>() && r_node.get_dependencies().size() == 1 &&
-                r_node.get_users().size() == 1 && r_node.get_dependency(0).get_users().size() == 1)
-                current_node = &r_node.get_dependency(0);
+        if (remove_dep) {
+            r_dep_node.can_be_optimized(true);
+            p.add_optimized_primitive_info(r_dep_node.id());
+            p.extract_and_remove(r_dep_node);
+            update_implementation(r_node);
+        } else if (remove_current) {
+            auto output_layout = r_node.get_output_layout();
+            auto dep_prim = std::const_pointer_cast<reorder>(r_dep_node.get_primitive());
+            dep_prim->output_format = output_layout.format;
+            dep_prim->output_data_type = output_layout.data_type;
+
+            r_node.can_be_optimized(true);
+            p.add_optimized_primitive_info(r_node.id());
+            p.extract_and_remove(r_node);
+
+            r_dep_node.recalc_output_layout(false);
+            update_implementation(r_dep_node);
         }
-        if (!optimize)
+    }
+
+    // Optimize reorders not changing memory layout
+    itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto node = *itr++;
+        if (!node->is_type<reorder>())  // only care for reorders
             continue;
 
-        assert(node->get_dependencies().size() == 1 &&
-               "reorder without mean should have exactly one dependecy (input)");
-        auto& r_output = r_nodes_to_remove.front();
-        auto& r_input = r_nodes_to_remove.back()->get_dependency(0);
-        auto o_layout = r_output->get_output_layout();
-        auto i_layout = r_input.get_output_layout();
+        auto& r_node = node->as<reorder>();
 
-        auto ident = program_helpers::are_layouts_identical(o_layout, i_layout);
-        if (!ident.second)
+        if (r_node.has_mean() ||
+            !r_node.get_primitive()->subtract_per_feature.empty() ||
+            r_node.is_output() ||
+            !r_node.get_fused_activations_funcs().empty())
             continue;
 
-        for (auto remove_reorder_node : r_nodes_to_remove) {
-            auto& r_node = remove_reorder_node->as<reorder>();
+        auto o_layout = r_node.get_output_layout();
+        auto i_layout = r_node.get_dependency(0).get_output_layout();
 
-            if (ident.first && ident.second && r_node.is_output() &&
-                r_node.get_dependency(0).is_input()) {  // do not optimize when reorder is output and layer before is input
-                optimize = false;
-                break;
-            }
-        }
-        if (!optimize)
+        auto ident = program_helpers::are_layouts_identical(o_layout, i_layout);
+
+        if (!ident.second)
             continue;
 
-        auto rem_itr = r_nodes_to_remove.begin();
-        while (rem_itr != r_nodes_to_remove.end()) {
-            auto remove_reorder_node = *rem_itr++;
-            auto& r_node = remove_reorder_node->as<reorder>();
-            // mark as optimized
-            r_node.can_be_optimized(true);
-            r_node.requires_reinterpret(!ident.first);
-            if (ident.first) {  // no need of reshape
-                p.add_optimized_primitive_info(r_node.get_primitive()->id);
-                p.extract_and_remove(
-                    r_node);  // try to remove if possible (with respect to r_node not being marked as output)
-            }
+        // mark as optimized
+        r_node.can_be_optimized(true);
+        r_node.requires_reinterpret(!ident.first);
+        if (ident.first) {  // no need of reshape
+            p.add_optimized_primitive_info(r_node.get_primitive()->id);
+            p.extract_and_remove(
+                r_node);  // try to remove if possible (with respect to r_node not being marked as output)
         }
     }
 
@@ -116,7 +201,7 @@ void remove_redundant_reorders::run(program_impl& p) {
             if (user->is_type<reorder>() &&
                 user != node &&
                 !user->is_output() &&
-                user->get_fused_activation_func() == cldnn_activation_func_t::activation_none) {
+                user->get_fused_activations_funcs().empty()) {
                 auto l1 = node->get_output_layout();
                 auto l2 = user->get_output_layout();
 
@@ -129,6 +214,9 @@ void remove_redundant_reorders::run(program_impl& p) {
         if (r_nodes_to_remove.empty())
             continue;
 
+        if (itr == p.get_processing_order().end())
+            break;
+
         auto rem_itr = r_nodes_to_remove.begin();
         while (rem_itr != r_nodes_to_remove.end()) {
             auto remove_reorder_node = *rem_itr++;
@@ -144,29 +232,77 @@ void remove_redundant_reorders::run(program_impl& p) {
         }
     }
 
-    if (bfyx_to_bfyx_f16_opt) {
-        // Removes reorder bfyx->bfyx_f16 when ic=3 and oc>=16 in order to enable specific kernel
-        // Needs to be done after passes that can change layouts (like prepare_padding)
-        itr = p.get_processing_order().begin();
-        while (itr != p.get_processing_order().end()) {
-            auto &node = *itr++;
-            if (!node->is_type<reorder>())
-                continue;
-
-            if (node->get_dependencies().size() != 1 || node->get_users().size() != 1)
-                continue;
+    // This pass removed reorder if previous node can store directly to required layout
+    itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto& node = *itr++;
+        if (!node->is_type<reorder>() || !node->is_in_data_flow() || node->get_dependencies().size() != 1)
+            continue;
 
-            auto &user = node->get_users().front();
-            auto &dep = node->get_dependency(0);
+        auto& dep = node->get_dependency(0);
+        if (!dep.is_type<binary_convolution>() || node->get_output_layout().format != format::bfyx_f16)
+            continue;
 
-            if (user->is_type<convolution>() &&
-                node->get_fused_activation_func() == cldnn_activation_func_t::activation_none &&
-                dep.get_output_layout().format == format::bfyx &&
-                dep.get_output_layout().size.feature[0] == 3 &&
-                node->get_output_layout().format == format::bfyx_f16 &&
-                user->get_output_layout().size.feature[0] >= 16) {
-                p.extract_and_remove(*node);
-            }
+        auto output_layout = node->get_output_layout();
+        dep.set_output_layout(output_layout, false);
+        if (dep.type()->does_possible_implementation_exist(p.get_engine(), dep)) {
+            p.replace_all_usages(*node, dep);
+            p.get_processing_order().erase(node);
+            p.add_optimized_primitive_info(node->id());
+            p.remove_all_connections(*node);
+            p.remove_if_dangling(*node);
         }
     }
+
+    // This pass removed reorder if the next node supports reorder's input format
+    itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto& node = *itr++;
+        if (!node->is_type<reorder>() || !node->is_in_data_flow() || node->get_users().size() != 1 || node->get_dependencies().size() != 1)
+            continue;
+
+        auto& usr = node->get_users().front();
+        auto& dep = node->get_dependency(0);
+        if (!usr->is_type<quantize>() || node->get_output_layout().format != format::bfyx ||
+            dep.get_output_layout().format != format::bfyx_f16)
+            continue;
+
+        dep.merge_output_padding(node->get_output_layout().data_padding);
+        p.replace_all_usages(*node, dep);
+        p.get_processing_order().erase(node);
+        p.add_optimized_primitive_info(node->id());
+        p.remove_all_connections(*node);
+        p.remove_if_dangling(*node);
+    }
+
+    // Remove u8 -> fp conversion in reorder if the next layer is scale
+    // Scale node loads u8, converts it to fp type and performs scaling and shifting
+    // FIXME: scale layer sometimes works incorrectly for u8 input. Need to fix it and this pass can be enabled again.
+//    itr = p.get_processing_order().begin();
+//    while (itr != p.get_processing_order().end()) {
+//        auto& node = *itr++;
+//        if (!node->is_type<reorder>() || !node->is_in_data_flow())
+//            continue;
+//
+//        if (node->get_users().size() != 1 || node->get_dependencies().size() != 1)
+//            continue;
+//
+//        auto& usr = node->get_users().front();
+//        auto& dep = node->get_dependency(0);
+//        if (!usr->is_type<scale>() ||
+//            !dep.is_input() ||
+//            dep.get_output_layout().data_type != data_types::u8 ||
+//            (node->get_output_layout().data_type != data_types::f32 && node->get_output_layout().data_type != data_types::f16) ||
+//            dep.get_output_layout().format != node->get_output_layout().format ||
+//            dep.get_output_layout().size != node->get_output_layout().size)
+//            continue;
+//
+//        usr->merge_output_padding(node->get_output_layout().data_padding);
+//
+//        p.replace_all_usages(*node, dep);
+//        p.get_processing_order().erase(node);
+//        p.add_optimized_primitive_info(node->id());
+//        p.remove_all_connections(*node);
+//        p.remove_if_dangling(*node);
+//    }
 }
index 5bb34bd..b5d08dc 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "api/CPP/proposal.hpp"
-#include "api/CPP/roi_pooling.hpp"
-#include "api/CPP/reorg_yolo.hpp"
-#include "api/CPP/eltwise.hpp"
-#include <api/CPP/binary_convolution.hpp>
-#include <api/CPP/prior_box.hpp>
-#include "api/CPP/softmax.hpp"
-#include "api/CPP/permute.hpp"
-#include "api/CPP/reshape.hpp"
-#include "api/CPP/activation.hpp"
-#include "api/CPP/scale.hpp"
-#include "api/CPP/custom_gpu_primitive.hpp"
-#include "upsampling_inst.h"
+#include "api/binary_convolution.hpp"
 #include "pass_manager.h"
 #include "program_node.h"
 #include "layout_optimizer.h"
 #include "program_helpers.h"
 #include <vector>
 #include <memory>
+#include <list>
+#include <map>
+#include <set>
 
 using namespace cldnn;
 
 // ToDo remove friendship relation from program_impl
 
-reorder_inputs::reorder_inputs(layout_optimizer& lo_ref) : base_pass("reorder_inputs"), _lo(lo_ref) {}
+reorder_inputs::reorder_inputs(layout_optimizer& lo_ref, reorder_factory& rf_ref) : base_pass("reorder_inputs"), _lo(lo_ref), _rf(rf_ref) {}
 
-void reorder_inputs::run(program_impl& p) { run(p, _lo); }
+void reorder_inputs::run(program_impl& p) { run(p, _lo, _rf); }
 
-void reorder_inputs::run(program_impl& p, layout_optimizer& lo) {
-    // first pass to set layout optimization_attributes for topology
-    bool can_use_fsv32 = true;
-    bool can_use_f16 = true;
-    size_t total_conv_layers = 0;
-    size_t total_dw_conv_layers = 0;
-    size_t total_grouped_conv_layers = 0;
-    size_t opt_conv_layers_bfyx_f16 = 0;
+namespace {
 
-    for (auto& node : p.get_processing_order()) {
-        auto& prim = *node;
-        if (prim.type() == cldnn::convolution::type_id()) {
-            if (prim.as<convolution>().get_primitive()->split() > 1)
-                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::splitted_convolution, 1);
+std::map<program_node*, format::type> get_preferred_formats(program_impl& p, layout_optimizer& lo) {
+    std::map<program_node*, format::type> fmt_map;
+    for (auto n : p.get_processing_order()) {
+        if (!n->is_in_data_flow())
+            continue;
 
-            if (prim.as<convolution>().get_primitive()->groups > 1)
-                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::group_convolution, 1);
+        auto ex = lo.get_preferred_format(*n);
+        fmt_map[n] = ex;
+    }
+    return fmt_map;
+}
+
+enum class direction_e {
+    forwards = 0,
+    backwards = 1
+};
 
-            if (prim.as<convolution>().get_primitive()->deformable_mode)
-                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::deformable_convolution, 1);
+inline constexpr direction_e reverse(direction_e dir) {
+    return dir == direction_e::forwards ? direction_e::backwards : direction_e::forwards;
+}
 
-            uint32_t ifm = static_cast<uint32_t>(node->get_dependency(0).get_output_layout().size.feature[0]);
-            if (prim.as<convolution>().get_primitive()->groups == ifm)
-                total_dw_conv_layers++;
-            else if (prim.as<convolution>().get_primitive()->groups > 1 || prim.as<convolution>().get_primitive()->split() > 1)
-                total_grouped_conv_layers++;
+template <direction_e dir = direction_e::forwards>
+struct travel_direction_wrapper {
+    static const std::list<program_node*>& next_nodes(program_node* node) {
+        return node->get_users();
+    }
 
-            if (lo.is_format_optimized(prim.as<convolution>(), format::bfyx_f16))
-                opt_conv_layers_bfyx_f16++;
+    template <typename T>
+    static T& first(T& current, T& /*next*/) { return current; }
 
-            total_conv_layers++;
-        }
+    template <typename T>
+    static T& second(T& /*current*/, T& next) { return next; }
+};
 
-        // list of layers that do not support yxfb or perform worse than bfyx
-        if (prim.type() == cldnn::detection_output::type_id() || prim.type() == cldnn::proposal::type_id() ||
-            prim.type() == cldnn::roi_pooling::type_id() || prim.type() == cldnn::deconvolution::type_id() ||
-            prim.type() == cldnn::upsampling::type_id() || prim.type() == cldnn::reorg_yolo::type_id())
-            lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfyx_only_layer, 1);
-
-        // Check if all layers in topology support fs_byx_fsv32 format
-        if (prim.is_in_data_flow() && prim.type() != cldnn::convolution::type_id() &&
-            prim.type() != cldnn::pooling::type_id() && prim.type() != cldnn::eltwise::type_id() &&
-            prim.type() != cldnn::fully_connected::type_id() && prim.type() != cldnn::reorder::type_id() &&
-            prim.type() != cldnn::permute::type_id() && prim.type() != cldnn::reshape::type_id() &&
-            prim.type() != cldnn::input_layout::type_id() && prim.type() != cldnn::softmax::type_id())
-            can_use_fsv32 = false;
-
-        if (prim.is_in_data_flow() &&
-            prim.type() != cldnn::convolution::type_id() &&
-            prim.type() != cldnn::activation::type_id() &&
-            prim.type() != cldnn::pooling::type_id() &&
-            prim.type() != cldnn::eltwise::type_id() &&
-            prim.type() != cldnn::permute::type_id() &&
-            prim.type() != cldnn::reshape::type_id() &&
-            prim.type() != cldnn::detection_output::type_id() &&
-            prim.type() != cldnn::custom_gpu_primitive::type_id() &&
-            prim.type() != cldnn::concatenation::type_id() &&
-            prim.type() != cldnn::fully_connected::type_id() &&
-            prim.type() != cldnn::reorder::type_id() &&
-            prim.type() != cldnn::input_layout::type_id() &&
-            prim.type() != cldnn::softmax::type_id() &&
-            prim.type() != cldnn::prior_box::type_id() &&
-            prim.type() != cldnn::scale::type_id())
-            can_use_f16 = false;
+template <>
+struct travel_direction_wrapper<direction_e::backwards> {
+    static const std::vector<program_node*>& next_nodes(program_node* node) {
+        return node->get_dependencies();
     }
 
+    template <typename T>
+    static T& first(T& /*current*/, T& next) { return next; }
+
+    template <typename T>
+    static T& second(T& current, T& /*next*/) { return current; }
+};
+
+template <direction_e dir>
+bool can_propagate_formats_rec(
+    const std::map<program_node*, format::type>& fmt_map,
+    layout_optimizer& lo,
+    program_node* prev,
+    program_node* node,
+    format::type fmt) {
+
+    auto sel_fmt = fmt_map.at(node);
+    if (fmt == sel_fmt)
+        return true;
+
+    auto first_node = travel_direction_wrapper<dir>::first(prev, node);
+    auto second_node = travel_direction_wrapper<dir>::second(prev, node);
+    auto first_fmt = travel_direction_wrapper<dir>::first(fmt, sel_fmt);
+    auto second_fmt = travel_direction_wrapper<dir>::second(fmt, sel_fmt);
+
+    if (lo.can_fuse_reorder(*first_node,
+                            *second_node,
+                            first_fmt,
+                            second_fmt))
+        return true;
+
+    if (sel_fmt != format::any)
+        return false;
+
+    if (!lo.is_format_supported(*node, fmt))
+        return false;
+
+    auto reverse_reorders = std::count_if(
+        travel_direction_wrapper<reverse(dir)>::next_nodes(node).begin(),
+        travel_direction_wrapper<reverse(dir)>::next_nodes(node).end(),
+        [&](program_node* rev) {
+        return rev->is_in_data_flow() && fmt_map.at(rev) != fmt && rev != prev;
+    });
+
+    if (reverse_reorders > 0)
+        return false;
+
+    for (auto next : travel_direction_wrapper<dir>::next_nodes(node)) {
+        if (!next->is_in_data_flow())
+            continue;
+        if (!can_propagate_formats_rec<dir>(fmt_map, lo, node, next, fmt))
+            return false;
+    }
 
-    // Due to fact that single winograd convolution is faster than bfyx_f16 and
-    // using them together leads do redundant reorders, whole topology switch
-    // will be performed if at least half of layers can use bfyx_f16.
-    bool should_use_bfyx_f16_conv = can_use_f16 &&
-                                    ((opt_conv_layers_bfyx_f16 / static_cast<float>(total_conv_layers)) > 0.5f) &&
-                                    total_conv_layers > 11 &&
-                                    total_grouped_conv_layers == 0;  // conv with groups are not supported correctly yet
-
-    if (can_use_fsv32)
-        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::only_fsv32_layers, 1);
-
-    if (should_use_bfyx_f16_conv)
-        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfyx_f16_network, 1);
-
-    const auto reorder_input = [&p, &lo, should_use_bfyx_f16_conv](typed_program_node<convolution>& conv_node) {
-        auto conv_prim = conv_node.get_primitive();
-        auto& input_node = conv_node.get_dependency(0);
-        auto&& weights_layout = conv_node.weights(0).get_output_layout();
-        auto&& input_layout = input_node.get_output_layout();
-
-        std::shared_ptr<reorder> new_input = nullptr;
-
-        if (input_node.type() == reorder::type_id()) {  // convolution's input is a reorder
-            auto reorder_prim = input_node.as<reorder>().typed_desc();
-            auto& reorder_input = input_node.get_dependency(0);
-            auto reorder_layout = input_node.get_output_layout();
-            reorder_layout.data_type = *reorder_prim->output_data_type;
-            new_input = lo.get_reorder(reorder_layout,
-                                       reorder_prim->id,
-                                       layout_optimizer::data_type::input,
-                                       conv_node,
-                                       weights_layout)
-                            .first;
-
-            auto reorder_removed = false;
-            if (new_input && new_input->output_format != format::winograd_2x3_s1_data &&
-                new_input->output_format != format::bf8_xy16 && new_input->output_format != format::byxf &&
-                new_input->output_format != format::fs_b_yx_fsv32 &&
-                new_input->output_format != format::bfyx_f16) {  // output format is not optimal
-                auto reorder_input_layout = reorder_input.get_output_layout();
-
-                auto opt_layout =
-                    layout(*new_input->output_data_type, new_input->output_format, reorder_input_layout.size);
-                if (reorder_input_layout == opt_layout) {  // reorder 'breaks' optimal format
-                    if (reorder_prim->subtract_per_feature.empty() && reorder_prim->mean.empty() &&
-                        !reorder_prim->output_padding) {  // just plain reorder
-                        conv_node.replace_dependency(0, reorder_input);
-                        if (input_node.get_users().size() == 0 && !input_node.is_output()) {
-                            reorder_removed = p.extract_and_remove(input_node);
-                        }
-                        new_input = nullptr;
-                    } else {  // change reorder's output layout
-                        reorder_prim->output_format = opt_layout.format;
-                        reorder_prim->output_data_type = opt_layout.data_type;
-                        new_input = nullptr;
-                    }
-                } else {  // current reorder gives bad output, simply change it
-                    reorder_prim->output_format = opt_layout.format;
-                    reorder_prim->output_data_type = opt_layout.data_type;
-                    new_input = nullptr;
-                }
-            }
+    return true;
+}
 
-            if (!reorder_removed)
-                input_node.recalc_output_layout();
-            else
-                conv_node.recalc_output_layout();
-        } else {
-            new_input = lo.get_reorder(input_node.get_output_layout(),
-                                       input_node.id(),
-                                       layout_optimizer::data_type::input,
-                                       conv_node,
-                                       weights_layout)
-                            .first;
+template <direction_e dir>
+void propagate_formats_rec(std::map<program_node*, format::type>& fmt_map,
+                           layout_optimizer& lo,
+                           program_node* prev,
+                           program_node* node,
+                           format::type fmt) {
+    auto sel_fmt = fmt_map.at(node);
+    if (sel_fmt == fmt)
+        return;
+
+    auto first_node = travel_direction_wrapper<dir>::first(prev, node);
+    auto second_node = travel_direction_wrapper<dir>::second(prev, node);
+    auto first_fmt = travel_direction_wrapper<dir>::first(fmt, sel_fmt);
+    auto second_fmt = travel_direction_wrapper<dir>::second(fmt, sel_fmt);
+
+    if (lo.can_fuse_reorder(*first_node,
+                            *second_node,
+                            first_fmt,
+                            second_fmt))
+        return;
+
+    fmt_map.at(node) = fmt;
+
+    for (auto next : travel_direction_wrapper<dir>::next_nodes(node)) {
+        if (!next->is_in_data_flow())
+            continue;
+        propagate_formats_rec<dir>(fmt_map, lo, node, next, fmt);
+    }
+}
+
+template <direction_e dir>
+void propagate_formats_in_dir(std::map<program_node*, format::type>& fmt_map,
+                         layout_optimizer& lo,
+                         program_node* node) {
+    auto fmt = fmt_map.at(node);
+
+    for (auto next : travel_direction_wrapper<dir>::next_nodes(node)) {
+        if (!next->is_in_data_flow())
+            continue;
+        if (!can_propagate_formats_rec<dir>(fmt_map, lo, node, next, fmt))
+            return;
+    }
+
+    for (auto next : travel_direction_wrapper<dir>::next_nodes(node)) {
+        if (!next->is_in_data_flow())
+            continue;
+        propagate_formats_rec<dir>(fmt_map, lo, node, next, fmt);
+    }
+}
+
+void propagate_formats(program_impl& p, std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo) {
+    auto it = p.get_processing_order().begin();
+    while (it != p.get_processing_order().end()) {
+        auto node = *it++;
+
+        if (fmt_map.count(node) == 0 || fmt_map.at(node) == format::any)
+            continue;
+
+        propagate_formats_in_dir<direction_e::forwards>(fmt_map, lo, node);
+        propagate_formats_in_dir<direction_e::backwards>(fmt_map, lo, node);
+    }
+}
+
+struct reorder_cnt {
+    size_t number;
+    size_t total_sizes;
+};
+
+template <direction_e dir>
+reorder_cnt count_reorders_in_dir(const std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo, program_node* node) {
+    size_t cnt = 0;
+    size_t size = 0;
+    auto sel_fmt = fmt_map.at(node);
+
+    for (auto next : travel_direction_wrapper<dir>::next_nodes(node)) {
+        if (!next->is_in_data_flow())
+            continue;
+
+        auto next_fmt = fmt_map.at(next);
+
+        if (next_fmt == format::any ||
+            (sel_fmt != next_fmt &&
+             !lo.can_fuse_reorder(*travel_direction_wrapper<dir>::first(node, next),
+                                  *travel_direction_wrapper<dir>::second(node, next),
+                                  travel_direction_wrapper<dir>::first(sel_fmt, next_fmt),
+                                  travel_direction_wrapper<dir>::second(sel_fmt, next_fmt)))) {
+            cnt += 1;
+            size += travel_direction_wrapper<dir>::first(node, next)->get_output_layout().count();
         }
+    }
 
-        if (new_input && new_input->output_format == format::winograd_2x3_s1_data) {
-            auto lower_size = (conv_prim->input_offset.negate() + input_layout.size);
-
-            tensor upper_input_padding = tensor{0};
-            upper_input_padding.spatial[0] =
-                (2 - (lower_size.spatial[0] % 2)) % 2;  // winograd conv requires input's x to be in form 4 + 2n, with
-                                                        // restriction that x >= 3, we can shortage it to x % 2 == 0
-            upper_input_padding.spatial[1] =
-                (8 - ((lower_size.spatial[1] - 2) % 8)) % 8;  // for y, y - 2 % 8 == 0 must hold
-
-            p.apply_needed_padding(conv_node,
-                                   input_node,
-                                   padding{conv_prim->input_offset.negate().sizes(), upper_input_padding.sizes()});
-
-            auto winograd_output = std::make_shared<reorder>("_winograd_" + conv_node.id(),
-                                                             conv_node.id(),
-                                                             input_layout.format,
-                                                             input_layout.data_type,
-                                                             std::vector<float>{},
-                                                             cldnn_reorder_mean_mode::mean_subtract,
-                                                             conv_node.output_layout.data_padding);
-            conv_node.output_layout.data_padding = padding{};
-            program_node& back_node = p.get_or_create(winograd_output);
-            p.get_processing_order().insert_next(&conv_node, &back_node);
-
-            auto bias_term = conv_node.bias_term();
-            // create additional eltwise node after reorder to compute bias
-            if (bias_term) {
-                auto& bias_node = conv_node.get_dependency(2);
-                std::vector<primitive_id> inputs = {back_node.id(), bias_node.id()};
-                auto winograd_output_biases = std::make_shared<eltwise>(back_node.id() + "_bias",
-                                                                        inputs,
-                                                                        cldnn::eltwise_mode::sum,
-                                                                        conv_prim->with_activation,
-                                                                        conv_prim->activation_negative_slope,
-                                                                        back_node.get_output_layout().data_padding);
-                back_node.get_output_layout().data_padding = padding{};
-                auto& back_bias_node = p.get_or_create(winograd_output_biases);
-                p.get_processing_order().insert_next(&back_node, &back_bias_node);
-                p.replace_all_usages(back_node, back_bias_node);
-                p.add_connection(back_node, back_bias_node);
-                p.add_connection(bias_node, back_bias_node);
-                conv_node.invalidate_users();
-                p.replace_all_usages(conv_node, back_bias_node);
-            }
+    return { cnt, size };
+}
 
-            if (conv_prim->with_activation) {
-                conv_node.typed_desc()->with_activation = false;
-                if (!bias_term)
-                    back_node.set_fused_activation(
-                        activation_relu_negative_slope,
-                        cldnn_activation_additional_params_t{conv_prim->activation_negative_slope, 0.0f});
-            }
+reorder_cnt count_reorders(const std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo, program_node* node) {
+    auto fwd = count_reorders_in_dir<direction_e::forwards>(fmt_map, lo, node);
+    auto bwd = count_reorders_in_dir<direction_e::backwards>(fmt_map, lo, node);
 
-            if (!bias_term) {
-                conv_node.invalidate_users();
-                p.replace_all_usages(conv_node, back_node);
-            }
-            p.add_connection(conv_node, back_node);
-
-            auto& r_node = p.get_or_create(new_input);
-            r_node.as<reorder>().set_input_offset(conv_prim->input_offset);
-
-            if (!bias_term) {
-                p.swap_names(conv_node, back_node);
-                if (conv_node.is_output()) {
-                    conv_node.set_output(false);
-                    back_node.set_output(true);
-                    for (auto& output : p.get_outputs()) {
-                        if (output == &conv_node) {
-                            output = &back_node;
-                            break;
-                        }
-                    }
-                }
-            } else {
-                conv_node.remove_dependency(2);
-                auto& back_bias_node = *(p.nodes_map.find(back_node.id() + "_bias")->second);
-                p.swap_names(conv_node, back_bias_node);
-                if (conv_node.is_output()) {
-                    conv_node.set_output(false);
-                    back_bias_node.set_output(true);
-                    for (auto& output : p.get_outputs()) {
-                        if (output == &conv_node) {
-                            output = &back_bias_node;
-                            break;
-                        }
-                    }
-                }
+    return { fwd.number + bwd.number, fwd.total_sizes + bwd.total_sizes };
+}
+
+void minimize_local_reorders(program_impl& p, std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo) {
+    for (auto node : p.get_processing_order()) {
+        if (!node->is_in_data_flow())
+            continue;
+
+        if (lo.get_preferred_format(*node) != format::any)
+            continue;
+
+        if (fmt_map.at(node) == format::any) {
+            auto out_fmt = node->get_output_layout().format;
+            if (lo.is_format_supported(*node, out_fmt)) {
+                fmt_map.at(node) = out_fmt;
             }
         }
 
-        if (new_input && (new_input->output_format == format::bf8_xy16 || new_input->output_format == format::byxf)) {
-            auto conv1x1_output = std::make_shared<reorder>("_conv1x1_reorder_back_" + conv_node.id(),
-                                                            conv_node.id(),
-                                                            input_layout.format,
-                                                            input_layout.data_type);
-            auto& back_node = p.get_or_create(conv1x1_output);
-            p.get_processing_order().insert_next(&conv_node, &back_node);
-            conv_node.invalidate_users();
-            p.replace_all_usages(conv_node, back_node);
-            p.add_connection(conv_node, back_node);
-
-            p.mark_if_constant(back_node);
-            p.mark_if_data_flow(back_node);
-            p.mark_if_constant(conv_node);
-            p.mark_if_data_flow(conv_node);
-        }
+        auto sel_fmt = fmt_map.at(node);
+        auto best_reorder_cnt = count_reorders(fmt_map, lo, node);
+        auto best_format = sel_fmt;
+
+        if (best_reorder_cnt.number == 0)
+            continue;
+
+        std::set<format::type> local_formats;
 
-        if (new_input) {
-            auto& r_node = p.get_or_create(new_input);
-            p.add_intermediate(r_node, conv_node, 0, r_node.get_dependencies().empty());
-            conv_node.recalc_output_layout();
+        for (auto user : node->get_users()) {
+            auto user_fmt = fmt_map.at(user);
+
+            if (user_fmt != format::any &&
+                lo.is_format_supported(*node, user_fmt)) {
+                local_formats.insert(user_fmt);
+            }
         }
-    };
 
-    const auto reorder_input_convolution_binary = [&p, &lo](typed_program_node<binary_convolution>& conv_bin_node) {
-        auto conv_bin_prim = conv_bin_node.get_primitive();
-        auto& input_node = conv_bin_node.get_dependency(0);
-        auto&& weights_layout = conv_bin_node.weights(0).get_output_layout();
-
-        std::shared_ptr<reorder> new_input = nullptr;
-
-        if (input_node.type() == reorder::type_id()) {
-            auto reorder_prim = input_node.as<reorder>().typed_desc();
-            auto& reorder_input = input_node.get_dependency(0);
-            auto reorder_layout = input_node.get_output_layout();
-            reorder_layout.data_type = *reorder_prim->output_data_type;
-            new_input = lo.get_reorder(reorder_layout,
-                                       reorder_prim->id,
-                                       layout_optimizer::data_type::input,
-                                       conv_bin_node,
-                                       weights_layout)
-                            .first;
-
-            auto reorder_removed = false;
-            if (new_input && new_input->output_format != format::b_fs_yx_32fp) {
-                auto reorder_input_layout = reorder_input.get_output_layout();
-
-                auto opt_layout =
-                    layout(*new_input->output_data_type, new_input->output_format, reorder_input_layout.size);
-                if (reorder_input_layout == opt_layout) {  // reorder 'breaks' optimal format
-                    if (reorder_prim->subtract_per_feature.empty() && reorder_prim->mean.empty() &&
-                        !reorder_prim->output_padding) {  // just plain reorder
-                        conv_bin_node.replace_dependency(0, reorder_input);
-                        if (input_node.get_users().size() == 0 && !input_node.is_output()) {
-                            reorder_removed = p.extract_and_remove(input_node);
-                        }
-                        new_input = nullptr;
-                    } else {  // change reorder's output layout
-                        reorder_prim->output_format = opt_layout.format;
-                        reorder_prim->output_data_type = opt_layout.data_type;
-                        new_input = nullptr;
-                    }
-                } else {  // current reorder gives bad output, simply change it
-                    reorder_prim->output_format = opt_layout.format;
-                    reorder_prim->output_data_type = opt_layout.data_type;
-                    new_input = nullptr;
-                }
+        for (auto dep : node->get_dependencies()) {
+            if (!dep->is_in_data_flow())
+                continue;
+
+            auto dep_fmt = fmt_map.at(dep);
+
+            if (dep_fmt != format::any &&
+                lo.is_format_supported(*node, dep_fmt)) {
+                local_formats.insert(dep_fmt);
             }
+        }
+
+        if (local_formats.empty())
+            continue;
 
-            if (!reorder_removed)
-                input_node.recalc_output_layout();
-            else
-                conv_bin_node.recalc_output_layout();
-        } else {
-            new_input = lo.get_reorder(input_node.get_output_layout(),
-                                       input_node.id(),
-                                       layout_optimizer::data_type::input,
-                                       conv_bin_node,
-                                       weights_layout)
-                            .first;
+        for (auto new_fmt : local_formats) {
+            fmt_map.at(node) = new_fmt;
+
+            auto reorders_cnt = count_reorders(fmt_map, lo, node);
+
+            if (reorders_cnt.number < best_reorder_cnt.number ||
+                (reorders_cnt.number == best_reorder_cnt.number && reorders_cnt.total_sizes < best_reorder_cnt.total_sizes) ) {
+                best_reorder_cnt = reorders_cnt;
+                best_format = new_fmt;
+            }
         }
 
-        if (new_input) {
-            auto& r_node = p.get_or_create(new_input);
-            p.add_intermediate(r_node, conv_bin_node, 0, r_node.get_dependencies().empty());
-            conv_bin_node.recalc_output_layout();
+        fmt_map.at(node) = best_format;
+    }
+}
+
+template <direction_e dir>
+void insert_reorders_in_dir(program_impl& p, const std::map<program_node*, format::type>& fmt_map, reorder_factory& rf, program_node* node) {
+    auto fmt = fmt_map.at(node);
+
+    auto next_cpy = travel_direction_wrapper<dir>::next_nodes(node);
+    for (auto next : next_cpy) {
+        if (!next->is_in_data_flow())
+            continue;
+
+        if (fmt_map.count(next) > 0 && fmt_map.at(next) == fmt)
+            continue;
+
+        auto next_layout = next->get_output_layout();
+        auto current_layout = node->get_output_layout();
+
+        auto first_layout = travel_direction_wrapper<dir>::first(current_layout, next_layout);
+        auto in_layout = first_layout;
+        auto out_layout = first_layout;
+
+        travel_direction_wrapper<dir>::first(in_layout, out_layout).format = fmt;
+
+        auto reorder_pair = rf.get_reorder(travel_direction_wrapper<dir>::first(node, next)->id(),
+                                           in_layout,
+                                           out_layout);
+        auto reorder = reorder_pair.first;
+
+        if (reorder) {
+            auto& reorder_node = p.get_or_create(reorder);
+            p.add_intermediate(reorder_node,
+                               *travel_direction_wrapper<dir>::second(node, next),
+                               *travel_direction_wrapper<dir>::first(node, next),
+                               !reorder_pair.second);
         }
-    };
+    }
+}
+
+void insert_reorders(program_impl& p, const std::map<program_node*, format::type>& fmt_map, reorder_factory& rf) {
+    auto it = p.get_processing_order().begin();
+    while (it != p.get_processing_order().end()) {
+        auto node = *(it++);
+
+        if (fmt_map.count(node) != 1)
+            continue;
+
+        auto fmt = fmt_map.at(node);
+        if (fmt == format::any)
+            continue;
+
+        insert_reorders_in_dir<direction_e::forwards>(p, fmt_map, rf, node);
+    }
 
-    const auto reorder_input_detection_output = [&p, &lo](typed_program_node<detection_output>& detection_output_node) {
+    it = p.get_processing_order().begin();
+    while (it != p.get_processing_order().end()) {
+        auto node = *(it++);
+
+        if (fmt_map.count(node) != 1)
+            continue;
+
+        auto fmt = fmt_map.at(node);
+        if (fmt == format::any)
+            continue;
+
+        insert_reorders_in_dir<direction_e::backwards>(p, fmt_map, rf, node);
+    }
+}
+
+}  // namespace
+
+void reorder_inputs::run(program_impl& p, layout_optimizer& lo, reorder_factory& rf) {
+    auto fmt_map = get_preferred_formats(p, lo);
+    propagate_formats(p, fmt_map, lo);
+    minimize_local_reorders(p, fmt_map, lo);
+    insert_reorders(p, fmt_map, rf);
+
+    for (auto n : p.get_processing_order()) {
+        n->recalc_output_layout(true);
+    }
+
+    const auto reorder_input_detection_output = [&p, &rf](typed_program_node<detection_output>& detection_output_node) {
         auto detection_output_prim = detection_output_node.get_primitive();
 
         for (size_t i = 0; i < detection_output_node.get_dependencies().size(); i++) {
             auto& input = detection_output_node.get_dependency(i);
-            std::shared_ptr<reorder> new_input = lo.get_reorder(input.get_output_layout(),
-                                                                input.id(),
-                                                                layout_optimizer::data_type::input,
-                                                                detection_output_node,
-                                                                layout{data_types::f32, format::bfyx, tensor{}})
-                                                     .first;
-
-            if (new_input) {
-                p.add_intermediate(new_input, detection_output_node, i);
+            auto new_input = rf.get_reorder(input.id(),
+                                            input.get_output_layout(),
+                                            layout{ data_types::f32, format::bfyx, input.get_output_layout().size });
+
+            if (new_input.first) {
+                p.add_intermediate(new_input.first, detection_output_node, i, !new_input.second);
+            }
+        }
+    };
+
+    const auto reorder_input_binary_convolution = [&p, &rf](typed_program_node<binary_convolution>& binary_conv_node) {
+        auto& input = binary_conv_node.input();
+        auto input_layout = input.get_output_layout();
+        auto new_layout = input_layout;
+        new_layout.data_type = data_types::bin;
+
+        auto reorder = rf.get_reorder(input.id(), input_layout, new_layout);
+
+        if (reorder.first) {
+            p.add_intermediate(reorder.first, binary_conv_node, 0, !reorder.second);
+        }
+    };
+
+    const auto reorder_input_deconvolution = [&p, &lo, &rf](typed_program_node<deconvolution>& deconv_node) {
+        auto& input = deconv_node.input();
+        auto input_layout = input.get_output_layout();
+        auto new_format = lo.get_preferred_format(deconv_node);
+        if (new_format == format::bfzyx_f16) {
+            auto reorder = rf.get_reorder(input.id(), input_layout,
+                layout{ input_layout.data_type, new_format, input_layout.size });
+            if (reorder.first) {
+                p.add_intermediate(reorder.first, deconv_node, 0, !reorder.second);
             }
         }
     };
 
     for (auto& prim : p.get_processing_order()) {
-        // there's an assumption that only convolution will take data/input_layout as input
-        // exception to that rule would be a convolution which takes a reorder as input - see reoder_input above
-        program_helpers::do_for_types<convolution, detection_output, binary_convolution>(
+        program_helpers::do_for_types<detection_output, binary_convolution, deconvolution>(
             *prim,
-            reorder_input,                      // case for convolution
-            reorder_input_detection_output,     // case for detection-output
-            reorder_input_convolution_binary);  // case for binary convolution
+            reorder_input_detection_output,
+            reorder_input_binary_convolution,
+            reorder_input_deconvolution);
     }
 }
index 61cbc9a..e70620e 100644 (file)
@@ -77,8 +77,8 @@ void strided_slice_optimize::run(program_impl& p) {
 
             auto& reshape_prim_node = p.get_or_create(reshape_prim);
 
-            reshape_prim_node.set_output_layout(
-                {node_layout.data_type, node_layout.format, reshape_prim->output_shape});
+            layout output_layout = { node_layout.data_type, node_layout.format, reshape_prim->output_shape };
+            reshape_prim_node.set_output_layout(output_layout);
 
             p.add_intermediate(reshape_prim_node, *node, 0, true);
             p.extract_and_remove(*node);
index 6147f68..11a3eca 100644 (file)
 */
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
-#include "api_impl.h"
 #include <immintrin.h>
+#include <stdint.h>
 
 namespace cldnn {
+
 float half_to_float(uint16_t value) {
     static const uint32_t FLOAT16_EXP_SHIFT = (23 - 10);
     static const uint32_t FLOAT16_EXP_MASK = 0x7C00;
index 993a7f6..adc3490 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/activation_grad.hpp"
+#include "api/activation_grad.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 53e9f34..d5f5e40 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/activation.hpp"
+#include "api/activation.hpp"
 #include "primitive_inst.h"
 #include <memory>
 #include <string>
diff --git a/inference-engine/thirdparty/clDNN/src/include/api_impl.h b/inference-engine/thirdparty/clDNN/src/include/api_impl.h
deleted file mode 100644 (file)
index 456a24b..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-#include "api/C/cldnn.h"
-#include "api/CPP/cldnn_defs.h"
-
-#include <functional>
-#include <stdexcept>
-#include <string>
-
-#define API_CAST(api_type, impl_type)                                                        \
-    inline api_type api_cast(impl_type* value) { return reinterpret_cast<api_type>(value); } \
-    inline impl_type* api_cast(api_type value) { return reinterpret_cast<impl_type*>(value); }
-
-namespace cldnn {
-struct last_err {
-    /// @breif Sets the message of last error
-    void set_last_error_message(const std::string& msg) { _msg = msg; }
-
-    void set_last_exception(const std::exception& ex) { _msg = ex.what(); }
-
-    /// @breif Gets the message of last error
-    const std::string& get_last_error_message() { return _msg; }
-    static last_err& instance();
-
-private:
-    std::string _msg;
-    last_err() : _msg("Operation succeed") {}
-};
-
-// float <--> half convertors
-float half_to_float(uint16_t value);
-uint16_t float_to_half(float value);
-}  // namespace cldnn
-
-template <typename T>
-T exception_handler(cldnn_status default_error,
-                    cldnn_status* status,
-                    const T& default_result,
-                    std::function<T()> func) {
-    // NOTE for implementer: status should not be modified after successful func() call
-    try {
-        if (status)
-            *status = CLDNN_SUCCESS;
-        return func();
-    } catch (const cldnn::error& err) {
-        if (status)
-            *status = err.status();
-        cldnn::last_err::instance().set_last_exception(err);
-
-#ifndef NDEBUG
-        static_cast<void>(default_result);
-        throw;
-#endif
-    } catch (const std::exception& err) {
-        if (status)
-            *status = default_error;
-        cldnn::last_err::instance().set_last_exception(err);
-
-#ifndef NDEBUG
-        static_cast<void>(default_result);
-        throw;
-#endif
-    } catch (...) {
-        if (status)
-            *status = default_error;
-        cldnn::last_err::instance().set_last_error_message("error unknown");
-
-#ifndef NDEBUG
-        static_cast<void>(default_result);
-        throw;
-#endif
-    }
-
-#ifdef NDEBUG
-    return default_result;
-#endif
-}
-
-inline void exception_handler(cldnn_status default_error, cldnn_status* status, std::function<void()> func) {
-    // NOTE for implementer: status should not be modified after successful func() call
-    try {
-        if (status)
-            *status = CLDNN_SUCCESS;
-        func();
-    } catch (const cldnn::error& err) {
-        if (status)
-            *status = err.status();
-        cldnn::last_err::instance().set_last_exception(err);
-#ifndef NDEBUG
-        throw;
-#endif
-    } catch (const std::exception& err) {
-        if (status)
-            *status = default_error;
-        cldnn::last_err::instance().set_last_exception(err);
-
-#ifndef NDEBUG
-        throw;
-#endif
-    } catch (...) {
-        if (status)
-            *status = default_error;
-        cldnn::last_err::instance().set_last_error_message("error unknown");
-#ifndef NDEBUG
-        throw;
-#endif
-    }
-}
index 50bd523..c754b4e 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/apply_adam.hpp"
+#include "api/apply_adam.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index e181bb8..939d221 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/arg_max_min.hpp"
+#include "api/arg_max_min.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
index 4a600b5..be91e85 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/average_unpooling.hpp"
+#include "api/average_unpooling.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 5eb6564..0d2c617 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/batch_norm_grad.hpp"
+#include "api/batch_norm_grad.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index c0d265e..485131e 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/batch_norm.hpp"
+#include "api/batch_norm.hpp"
 #include "primitive_inst.h"
 #include "mutable_data_inst.h"
 #include <string>
index ad12bfd..027499c 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/binary_convolution.hpp"
+#include "api/binary_convolution.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
 
 namespace cldnn {
 
-struct fused_primitive_desc {
-    std::shared_ptr<const primitive> prim;
-    size_t dep_start_idx;
-    std::vector<primitive_id> deps;
-    cldnn_activation_func_t activation;
-    cldnn_activation_additional_params activation_params;
-};
-
 template <>
 struct typed_program_node<binary_convolution> : public typed_program_node_base<binary_convolution> {
     using parent = typed_program_node_base<binary_convolution>;
@@ -63,39 +55,9 @@ public:
 
     bool bias_term() const { return false; }
 
-    void add_fused_primitive(const program_node* p) {
-        fused_primitive_desc local_desc;
-        local_desc.prim = p->get_primitive();
-        local_desc.dep_start_idx = this->get_dependencies().size();
-        local_desc.activation = cldnn_activation_func_t::activation_none;
-        if (p->get_fused_activation_func() != cldnn_activation_func_t::activation_none) {
-            local_desc.activation = p->get_fused_activation_func();
-            local_desc.activation_params = p->get_fused_activation_params();
-        }
-
-        for (size_t i = 1; i < p->get_dependencies().size(); i++) {
-            auto& dep = p->get_dependency(i);
-            this->dependencies.push_back(&dep);
-            local_desc.deps.push_back(dep.id());
-            dep.users.push_back(this);
-        }
-        fused_prims.push_back(local_desc);
-    }
-
-    const std::vector<fused_primitive_desc>& get_fused_primitives() const { return fused_prims; }
-
-    size_t get_fused_inputs_count() const {
-        size_t count = 0;
-        for (auto& fp : get_fused_primitives()) {
-            count += fp.deps.size();
-        }
-        return count;
-    }
-
 private:
     int32_t split;
     bool depthwise_sep_opt;
-    std::vector<fused_primitive_desc> fused_prims;
 };
 
 using binary_convolution_node = typed_program_node<binary_convolution>;
@@ -118,12 +80,6 @@ public:
 
         return dep_memory(1 + index);
     }
-
-    memory_impl& fused_memory(size_t dep_id) const { return dep_memory(1 + node.get_split() + dep_id); }
-
-    bool has_fused_primitives() const { return !node.get_fused_primitives().empty(); }
-
-    size_t get_fused_mem_count() const { return node.get_fused_inputs_count(); }
 };
 
 using binary_convolution_inst = typed_primitive_inst<binary_convolution>;
index 4c39a4a..5d750f6 100644 (file)
@@ -15,7 +15,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <api/CPP/border.hpp>
+#include <api/border.hpp>
 
 #include "primitive_inst.h"
 #include <string>
index c1cfd97..aa9cd1c 100644 (file)
@@ -15,7 +15,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <api/CPP/broadcast.hpp>
+#include <api/broadcast.hpp>
 
 #include "primitive_inst.h"
 #include <string>
index 9d2c544..9899a3e 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/concatenation.hpp"
+#include "api/concatenation.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index 5bff4f7..c03931c 100644 (file)
@@ -15,7 +15,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <api/CPP/condition.hpp>
+#include <api/condition.hpp>
 
 #include "network_impl.h"
 #include "primitive_inst.h"
@@ -49,7 +49,7 @@ private:
         void add_or_change_input_layout(const program_node& node) {
             auto layout = node.get_dependency(0).get_output_layout();
             auto input_id = node.as<condition>().result_id();
-            if (_program == (program_impl::ptr) nullptr) {  // if first run, create input_layout
+            if (_topology.get_primitives().count(input_id) == 0) {
                 _topology.add(std::make_shared<input_layout>(input_id, layout));
                 for (auto& prim : _topology.get_primitives()) {
                     for (auto& inp : prim.second->input) {
@@ -68,8 +68,8 @@ public:
 
     typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
         : parent(prim, prog),
-          _branch_true(*api_cast(this->get_primitive()->topology_true.get())),
-          _branch_false(*api_cast(this->get_primitive()->topology_false.get())) {}
+          _branch_true(*this->get_primitive()->topology_true.get()),
+          _branch_false(*this->get_primitive()->topology_false.get()) {}
 
     program_node& input() const { return get_dependency(0); }
     program_node& compare() const { return get_dependency(1); }
index 29e4c79..08ff773 100644 (file)
@@ -15,7 +15,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <api/CPP/contract.hpp>
+#include <api/contract.hpp>
 
 #include "primitive_inst.h"
 #include <string>
index bda5093..698ad1f 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/convolution_grad_weights.hpp"
+#include "api/convolution_grad_weights.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index 6922b90..d71f0a8 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/convolution.hpp"
+#include "api/convolution.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
@@ -30,15 +30,6 @@ struct typed_program_node<convolution> : public typed_program_node_base<convolut
     using parent = typed_program_node_base<convolution>;
 
 public:
-    struct fused_primitive_desc {
-        std::shared_ptr<const primitive> prim;
-        size_t dep_start_idx;
-        std::vector<primitive_id> deps;
-        cldnn_activation_func_t activation;
-        cldnn_activation_additional_params activation_params;
-    };
-
-
     typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
         : parent(prim, prog),
           split(this->get_primitive()->split()),
@@ -89,7 +80,6 @@ public:
         if (static_cast<int32_t>(idx) >= this->get_split())
             throw std::range_error("quantization factor offset too big");
 
-
         return get_dependency(1 + (1 + 1 * bias_term()) * this->get_split() + idx + get_trans_dep_offset());
     }
 
@@ -108,57 +98,12 @@ public:
                               get_trans_dep_offset());
     }
 
-    program_node& fused_eltwise(size_t idx = 0) const {
-        if (static_cast<int32_t>(idx) >= this->get_split())
-            throw std::range_error("eltwise offset too big");
-
-        int index = 1 + this->get_split()
-                    + (bias_term() ? this->get_split() : 0)
-                    + (weights_quantization_term() ? this->get_split() : 0)
-                    + (output_calibration_term() ? this->get_split() : 0);
-        return get_dependency(static_cast<size_t>(index));
-    }
-
-    void add_fused_primitive(const program_node *p) {
-        fused_primitive_desc local_desc;
-        local_desc.prim = p->get_primitive();
-        local_desc.dep_start_idx = this->get_dependencies().size();
-        local_desc.activation = cldnn_activation_func_t::activation_none;
-        if (p->get_fused_activation_func() != cldnn_activation_func_t::activation_none) {
-            local_desc.activation = p->get_fused_activation_func();
-            local_desc.activation_params = p->get_fused_activation_params();
-        }
-
-        for (size_t i = 0; i < p->get_dependencies().size(); i++) {
-            auto& dep = p->get_dependency(i);
-            if (dep.id() == this->id())
-                continue;
-
-            this->dependencies.push_back(&dep);
-            local_desc.deps.push_back(dep.id());
-            dep.users.push_back(this);
-        }
-        fused_prims.push_back(local_desc);
-    }
-
-    const std::vector<fused_primitive_desc>& get_fused_primitives() const {
-        return fused_prims;
-    }
-
     bool bias_term() const { return get_primitive()->bias.size() > 0; }
 
     bool weights_quantization_term() const { return get_primitive()->weights_quantization_factors.size() > 0; }
 
     bool output_calibration_term() const { return get_primitive()->output_calibration_factors.size() > 0; }
 
-    size_t get_fused_inputs_count() const {
-        size_t count = 0;
-        for (auto& fp : get_fused_primitives()) {
-            count += fp.deps.size();
-        }
-        return count;
-    }
-
     float get_input_qf() const { return input_qf; }
     float get_output_qf() const { return output_qf; }
 
@@ -171,7 +116,6 @@ private:
     uint32_t groups;
     uint32_t deformable_groups;
     bool deformable_mode;
-    std::vector<fused_primitive_desc> fused_prims;
 };
 
 using convolution_node = typed_program_node<convolution>;
@@ -235,23 +179,11 @@ public:
         }
     }
 
-    memory_impl& fused_memory(size_t dep_id) const {
-        int index = 1 + node.get_split()
-                    + (bias_term() ? node.get_split() : 0)
-                    + (weights_quantization_factors_term() ? node.get_split() : 0)
-                    + (output_calibration_factors_term() ? node.get_split() : 0);
-        return dep_memory(index + dep_id);
-    }
-
     bool bias_term() const { return node.bias_term(); }
 
     bool weights_quantization_factors_term() const { return node.weights_quantization_term(); }
 
     bool output_calibration_factors_term() const { return node.output_calibration_term(); }
-
-    bool has_fused_primitives() const { return !node.get_fused_primitives().empty(); }
-
-    size_t get_fused_mem_count() const { return node.get_fused_inputs_count(); }
 };
 
 using convolution_inst = typed_primitive_inst<convolution>;
index f57320b..75369a8 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/crop.hpp"
+#include "api/crop.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index cb6d7a8..e964281 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/custom_gpu_primitive.hpp"
+#include "api/custom_gpu_primitive.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 2aee4f1..782e41d 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/data.hpp"
+#include "api/data.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index f968654..e06c845 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/deconvolution.hpp"
+#include "api/deconvolution.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index ff74468..a622e51 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/convolution.hpp"
+#include "api/convolution.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
@@ -112,8 +112,6 @@ public:
 
 using deformable_conv_inst = typed_primitive_inst<deformable_conv>;
 
-
-
 template <>
 struct typed_program_node<deformable_interp> : public typed_program_node_base<deformable_interp> {
     using parent = typed_program_node_base<deformable_interp>;
index 297dc03..835222c 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/depth_to_space.hpp"
+#include "api/depth_to_space.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 3503e0e..9e495e0 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/detection_output.hpp"
+#include "api/detection_output.hpp"
 #include "primitive_inst.h"
 #include "topology_impl.h"
 #include <string>
index 773e7e6..c2d0204 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/eltwise.hpp"
+#include "api/eltwise.hpp"
 #include "primitive_inst.h"
 #include <memory>
 #include "topology_impl.h"
index 2878ce2..e5ae3bb 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/embed.hpp"
+#include "api/embed.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 039ad87..52df865 100644 (file)
@@ -16,8 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/memory.hpp"
-#include "api_impl.h"
+#include "api/memory.hpp"
 #include "event_impl.h"
 #include "refcounted_obj.h"
 #include "implementation_map.h"
@@ -53,14 +52,14 @@ public:
     explicit engine_impl(const engine_configuration& conf);
     ~engine_impl();
     engine_types type() const { return engine_types::ocl; }
-    refcounted_obj_ptr<memory_impl> allocate_memory(layout layout, uint16_t stream_id);
-    refcounted_obj_ptr<memory_impl> allocate_memory(layout layout,
+    refcounted_obj_ptr<memory_impl> allocate_memory(const layout& layout, uint16_t stream_id);
+    refcounted_obj_ptr<memory_impl> allocate_memory(const layout& layout,
                                                     primitive_id,
                                                     uint32_t,
                                                     std::set<primitive_id>,
                                                     uint16_t stream_id,
                                                     bool reusable = true);
-    refcounted_obj_ptr<memory_impl> reinterpret_buffer(const memory_impl& memory, layout new_layout);
+    refcounted_obj_ptr<memory_impl> reinterpret_buffer(const memory_impl& memory, const layout& new_layout);
     bool is_the_same_buffer(const memory_impl& mem1, const memory_impl& mem2);
 
     refcounted_obj_ptr<event_impl> create_user_event(uint16_t stream_id, bool set = false);
@@ -134,5 +133,3 @@ private:
     memory_pool _memory_pool;
 };
 }  // namespace cldnn
-
-API_CAST(::cldnn_engine, cldnn::engine_impl)
index e8a0a40..517b9c0 100644 (file)
@@ -21,7 +21,7 @@
 #include <array>
 #include <algorithm>
 #include <type_traits>
-#include "api/CPP/layout.hpp"
+#include "api/layout.hpp"
 #include <string>
 #include <utility>
 
index a340a1a..3bfe644 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api_impl.h"
+#include "api/event.hpp"
 #include "refcounted_obj.h"
 
 #include <list>
@@ -33,18 +33,23 @@ public:
     void wait();
     bool is_set();
     virtual bool is_valid() const { return _attached; }
-    virtual void reset() { _attached = false; }
+    virtual void reset() {
+        _attached = false;
+        _set = false;
+        _profiling_captured = false;
+        _profiling_info.clear();
+    }
     // returns true if handler has been successfully added
-    bool add_event_handler(cldnn_event_handler handler, void* data);
+    bool add_event_handler(event_handler handler, void* data);
 
-    const std::list<cldnn_profiling_interval>& get_profiling_info();
+    const std::list<instrumentation::profiling_interval>& get_profiling_info();
 
 private:
     std::mutex _handlers_mutex;
-    std::list<std::pair<cldnn_event_handler, void*>> _handlers;
+    std::list<std::pair<event_handler, void*>> _handlers;
 
     bool _profiling_captured = false;
-    std::list<cldnn_profiling_interval> _profiling_info;
+    std::list<instrumentation::profiling_interval> _profiling_info;
 
 protected:
     bool _set = false;
@@ -54,11 +59,11 @@ protected:
 
     virtual void wait_impl() = 0;
     virtual bool is_set_impl() = 0;
-    virtual bool add_event_handler_impl(cldnn_event_handler, void*) { return true; }
+    virtual bool add_event_handler_impl(event_handler, void*) { return true; }
 
     // returns whether profiling info has been captures successfully and there's no need to call this impl a second time
     // when user requests to get profling info
-    virtual bool get_profiling_info_impl(std::list<cldnn_profiling_interval>&) { return true; }
+    virtual bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>&) { return true; }
 };
 
 struct user_event : virtual public event_impl {
@@ -78,5 +83,3 @@ private:
 };
 
 }  // namespace cldnn
-
-API_CAST(::cldnn_event, cldnn::event_impl)
index 309e7b2..e015f85 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/fully_connected_grad_input.hpp"
+#include "api/fully_connected_grad_input.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index ee4384e..9b63ea6 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/fully_connected_grad_weights.hpp"
+#include "api/fully_connected_grad_weights.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index bfeaaab..8162664 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/fully_connected.hpp"
+#include "api/fully_connected.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index 4737019..460f02c 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api_extension/CPP/fused_conv_bn_scale.hpp"
+#include "api_extension/fused_conv_bn_scale.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
index ede85cb..f7fe3af 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api_extension/CPP/fused_conv_eltwise.hpp"
+#include "api_extension/fused_conv_eltwise.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
@@ -39,9 +39,9 @@ public:
         if (get_primitive()->eltw.with_activation) {
             auto slope = get_primitive()->eltw.activation_negative_slope;
             if (slope == 0.f) {
-                this->set_fused_activation(activation_relu, {});
+                this->add_fused_activation(activation_func::relu, {});
             } else {
-                this->set_fused_activation(activation_relu_negative_slope, { slope, 0.f });
+                this->add_fused_activation(activation_func::relu_negative_slope, { slope, 0.f });
             }
         }
     }
index 8b7c6fb..e54a0a6 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/gather.hpp"
+#include "api/gather.hpp"
 #include "primitive_inst.h"
 #include <string>
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h
new file mode 100644 (file)
index 0000000..64caaac
--- /dev/null
@@ -0,0 +1,49 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <api/gather_tree.hpp>
+
+#include "primitive_inst.h"
+#include <string>
+#include <memory>
+
+namespace cldnn {
+template <>
+struct typed_program_node<gather_tree> : typed_program_node_base<gather_tree> {
+private:
+    using parent = typed_program_node_base<gather_tree>;
+public:
+    using parent::parent;
+    typed_program_node(const std::shared_ptr<gather_tree> prim, program_impl& prog) : parent(prim, prog) {
+    }
+    program_node& input() const { return get_dependency(0); }
+};
+
+using gather_tree_node = typed_program_node<gather_tree>;
+
+template <>
+class typed_primitive_inst<gather_tree> : public typed_primitive_inst_base<gather_tree> {
+    using parent = typed_primitive_inst_base<gather_tree>;
+
+public:
+    static layout calc_output_layout(gather_tree_node const& node);
+    static std::string to_string(gather_tree_node const& node);
+    typed_primitive_inst(network_impl& network, gather_tree_node const& node);
+};
+
+using gather_tree_inst = typed_primitive_inst<gather_tree>;
+
+}  // namespace cldnn
index ff2aea2..1db6ee3 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/gemm.hpp"
+#include "api/gemm.hpp"
 #include "primitive_inst.h"
 #include <string>
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/generic_layer.h b/inference-engine/thirdparty/clDNN/src/include/generic_layer.h
deleted file mode 100644 (file)
index cadb479..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-#include "api/C/cldnn.h"
-
-namespace cldnn {
-/// @brief Changes how data is ordered in memory. Value type is not changed & all information is preserved.
-/// @details Corresponding values are bitwise equal before/after reorder.
-/// Also merged with subtraction layer, which can subtract values while doing reordering.
-CLDNN_BEGIN_PRIMITIVE_DESC(generic_layer)
-/// @brief Requested memory layout.
-cldnn_layout output_layout;
-const void* generic_params;
-
-CLDNN_END_PRIMITIVE_DESC(generic_layer)
-
-CLDNN_DECLARE_PRIMITIVE_TYPE_ID(generic_layer);
-
-}  // namespace cldnn
\ No newline at end of file
index dbd92a9..47f8305 100644 (file)
@@ -16,9 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "generic_layer.h"
-#include "api/CPP/primitive.hpp"
-#include "api/CPP/memory.hpp"
+#include "api/primitive.hpp"
+#include "api/memory.hpp"
 #include "kernel_selector_helper.h"
 #include <vector>
 
@@ -35,7 +34,7 @@ namespace cldnn {
 /// @details Corresponding values are bitwise equal before/after reorder.
 /// Also merged with subtraction layer, which can subtract values while doing reordering.
 /// NOTE THAT THIS WILL SUBTRACT THE SAME VALUES FROM EACH BATCH.
-struct generic_layer : public primitive_base<generic_layer, CLDNN_PRIMITIVE_DESC(generic_layer)> {
+struct generic_layer : public primitive_base<generic_layer> {
     CLDNN_DECLARE_PRIMITIVE(generic_layer)
 
     /// @brief Constructs generic_layer primitive which takes mean subtract values from another primitive.
@@ -50,23 +49,12 @@ struct generic_layer : public primitive_base<generic_layer, CLDNN_PRIMITIVE_DESC
                   const padding& output_padding = padding())
         : primitive_base(id, {input}, output_padding), output_layout(output_layout), generic_params(generic_params) {}
 
-    /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{generic_layer}
-    generic_layer(const dto* dto)
-        : primitive_base(dto),
-          output_layout(dto->output_layout),
-          generic_params(*static_cast<const kernel_selector::generic_kernel_params* const>(dto->generic_params)) {}
-
     /// @brief Requested memory layout.
     layout output_layout;
     const kernel_selector::generic_kernel_params generic_params;
 
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {}; }
-
-    void update_dto(dto& dto) const override {
-        dto.output_layout = output_layout;
-        dto.generic_params = &generic_params;
-    }
 };
 /// @}
 /// @}
index 42b796c..dbb2b61 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/index_select.hpp"
+#include "api/index_select.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
index 324a1a6..bf33de6 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/input_layout.hpp"
+#include "api/input_layout.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index cd799ec..605ab5b 100644 (file)
 namespace cldnn {
 
 template <class PType>
-struct internal_primitive_type_base : public ::cldnn_primitive_type {
+struct internal_primitive_type_base : public primitive_type {
     static_assert(meta::is_internal_primitive<PType>::value,
                   "Primitive type passed to internal_primitive_type_base should derive from internal_primitive");
 
-    [[noreturn]] std::shared_ptr<primitive> from_dto(const CLDNN_PRIMITIVE_DESC(primitive) *) const override {
-        throw std::runtime_error(
-            "Trying to create an internal primitive from dto - internal primitives are intransferable by design");
-    }
-
     [[noreturn]] std::shared_ptr<cldnn::program_node> create_node(program_impl&,
                                                                   const std::shared_ptr<primitive>) const override {
         throw std::runtime_error(
index 5dfadf6..785fdce 100644 (file)
 
 #pragma once
 
-#include "api/C/cldnn.h"
-#include "api/CPP/tensor.hpp"
+#include "api/cldnn.hpp"
+#include "api/tensor.hpp"
+#include "api/eltwise.hpp"
+#include "api/scale.hpp"
+#include "api/quantize.hpp"
+#include "api/activation.hpp"
 
 #include "kernel_selector_params.h"
 #include "kernel_selector_common.h"
@@ -23,6 +27,8 @@
 
 #include <cstdint>
 #include <string>
+#include <vector>
+#include <memory>
 
 using namespace cldnn;
 
@@ -101,9 +107,9 @@ std::string to_host_version(const cldnn::version_t& version);
 kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split = 1, const tensor view_offset = tensor {});
 kernel_selector::weights_tensor convert_weights_tensor(const layout& l);
 layout from_weights_tensor(const kernel_selector::weights_tensor& t);
-kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_activation_func activation_func);
+kernel_selector::activation_function get_kernel_selector_activation_param(activation_func activation_func);
 kernel_selector::activation_function get_kernel_selector_activation_grad_param(
-    cldnn_activation_grad_func activation_grad_func);
+    activation_grad_func activation_grad_func);
 
 template <typename T = std::uint32_t>
 kernel_selector::dim_tensor<T> convert_dim_vector(const tensor& t) {
@@ -117,28 +123,37 @@ kernel_selector::dim_tensor<T> convert_dim_vector(const tensor& t) {
 }
 
 template <typename p_type>
-inline void convert_activation_func_params(const p_type primitive, kernel_selector::base_activation_params& params) {
+inline void convert_activation_func_params(const p_type primitive, std::vector<kernel_selector::base_activation_params>& params) {
     const float negative_slope = primitive->activation_negative_slope;
     if (negative_slope != 0.0f) {
-        params.m = negative_slope;
-        params.function = kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
+        params.emplace_back(kernel_selector::activation_function::RELU_NEGATIVE_SLOPE, negative_slope, 0.0f);
     } else {
-        params.function = kernel_selector::activation_function::RELU;
+        params.emplace_back(kernel_selector::activation_function::RELU, 0.0f, 0.0f);
     }
 }
 
 template <typename arg_t>
-inline void convert_fused_activation_func_params(const arg_t& arg, kernel_selector::base_activation_params& params) {
-    params.m = arg.get_fused_activation_params().a;
-    params.n = arg.get_fused_activation_params().b;
-    params.function = get_kernel_selector_activation_param(arg.get_fused_activation_func());
+inline void convert_fused_activation_func_params(const arg_t& arg, std::vector<kernel_selector::base_activation_params>& params) {
+    for (size_t i = 0; i < arg.get_fused_activations_funcs().size(); i++) {
+        params.emplace_back(get_kernel_selector_activation_param(arg.get_fused_activations_funcs()[i]),
+                            arg.get_fused_activations_params()[i].a,
+                            arg.get_fused_activations_params()[i].b);
+    }
+}
+
+template <typename p_type>
+inline void convert_new_activation_func(const p_type primitive, std::vector<kernel_selector::base_activation_params>& params) {
+    params.insert(params.begin(), {get_kernel_selector_activation_param(primitive->activation_function),
+                                   primitive->additional_params.a,
+                                   primitive->additional_params.b});
 }
 
 template <typename p_type>
-inline void convert_new_activation_func(const p_type primitive, kernel_selector::base_activation_params& params) {
-    params.function = get_kernel_selector_activation_param(primitive->activation_func);
-    params.m = primitive->additional_params.a;
-    params.n = primitive->additional_params.b;
+inline void convert_new_activation_grad_func(const p_type primitive, std::vector<kernel_selector::base_activation_params>& params) {
+    params.insert(params.begin(), {get_kernel_selector_activation_grad_param(primitive->activation_grad_function),
+                                   primitive->additional_params.a,
+                                   primitive->additional_params.b,
+                                   true});
 }
 
 void set_params(const program_node& node, kernel_selector::params& params);
@@ -157,7 +172,44 @@ inline params_t get_default_params(const arg_t& arg, uint32_t split = 1) {
 
     params.layerID = arg.id();
 
-    convert_fused_activation_func_params(arg, params.activation);
+    convert_fused_activation_func_params(arg, params.activations);
+    size_t op_id = 0;
+    for (auto& fused_prim : arg.get_fused_primitives()) {
+        using op_type = kernel_selector::base_params::fused_operation_desc::Type;
+        kernel_selector::base_params::fused_operation_desc desc;
+        if (fused_prim.prim->type == eltwise::type_id()) {
+            desc.type = op_type::ELTWISE;
+        } else if (fused_prim.prim->type == scale::type_id()) {
+            desc.type = op_type::SCALE;
+        } else if (fused_prim.prim->type == quantize::type_id()) {
+            desc.type = op_type::QUANTIZE;
+        } else if (fused_prim.prim->type == activation::type_id()) {
+            desc.type = op_type::ACTIVATION;
+            std::shared_ptr<const primitive> p = fused_prim.prim;
+            auto activation_prim = std::static_pointer_cast<const activation>(p);
+            desc.activation.m = activation_prim->additional_params.a;
+            desc.activation.n = activation_prim->additional_params.b;
+            desc.activation.function = get_kernel_selector_activation_param(activation_prim->activation_function);
+        } else {
+            throw std::runtime_error("Invalid fused primitive type in " + arg.id() + " node");
+        }
+
+        desc.dep_idx_start = fused_prim.dep_start_idx;
+        desc.dep_size = fused_prim.deps.size();
+        desc.op_id = op_id++;
+        desc.output_tensor = convert_data_tensor(fused_prim.output_layout);
+
+        for (size_t i = desc.dep_idx_start; i < desc.dep_idx_start + desc.dep_size; i++) {
+            desc.tensors.push_back(convert_data_tensor(arg.get_dependency(i).get_output_layout()));
+        }
+
+        if (fused_prim.activation != activation_func::none) {
+            desc.activation.m = fused_prim.activation_params.a;
+            desc.activation.n = fused_prim.activation_params.b;
+            desc.activation.function = get_kernel_selector_activation_param(fused_prim.activation);
+        }
+        params.fused_ops.push_back(desc);
+    }
 
     return params;
 }
@@ -186,8 +238,8 @@ inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split
             params.bias.push_back(convert_data_tensor(layout(bias_layout.data_type,
                                                              bias_layout.format,
                                                              {bias_layout.size.batch[0],
-                                                              bias_layout.size.feature[0],
-                                                              bias_layout.size.spatial[0] / static_cast<int>(groups),
+                                                              bias_layout.size.feature[0] / static_cast<int>(groups),
+                                                              bias_layout.size.spatial[0],
                                                               bias_layout.size.spatial[1]}))
                                       .FlattenFeatureAndSpatials());
         }
index 27f8ab5..b381e68 100644 (file)
@@ -53,20 +53,51 @@ class primitive_inst;
 // it's programmers responsiblity to choose between 'get_reorder', which creates reorder to best format
 // for given primitive (or nullptr if it's already optimal) and user shall insert it into it's own topology.
 //  (note: layout_optimizer has internal caching mechanism, so if there's already reorder added for given (mem,format)
-//   pair during 'get_reorder' call, it will be reused);
-// or 'add_weights_for_optimization' which, beside creating the reorder, adds both primitives (data and reorder) to its
-// internal network which allows later to call 'optimize' and get already reordered data to be exchanged in target
-// topology.
+//   pair during 'get_reorder' call, it will be reused).
+
+class reorder_factory {
+public:
+    // pair.first is reorder (may be nullptr if reorder is not needed), pair.second tells if returned reorder was cached
+    // (no need to add it to 'ouputs' etc.) for pair.first == nullptr, pair.second == true
+    std::pair<std::shared_ptr<reorder>, bool> get_reorder(primitive_id src_id, layout in_layout, layout out_layout);
+
+    std::vector<std::pair<std::shared_ptr<primitive>, bool>> get_weights_reorder(
+        primitive_id input_id,
+        const layout& old_layout,
+        const kernel_selector::weights_reorder_params& reorder_params);
+
+private:
+    struct cache_key {
+        primitive_id data_source;
+        layout expected_layout;
+
+        friend bool operator==(cache_key const& lhs, cache_key const& rhs) {
+            return lhs.data_source == rhs.data_source && lhs.expected_layout == rhs.expected_layout;
+        }
+
+        friend bool operator!=(cache_key const& lhs, cache_key const& rhs) { return !(lhs == rhs); }
+
+        friend bool operator<(cache_key const& lhs, cache_key const& rhs) {
+            if (lhs.data_source != rhs.data_source)
+                return (lhs.data_source < rhs.data_source);
+            return lhs.expected_layout < rhs.expected_layout;
+        }
+    };
+
+    std::map<cache_key, std::shared_ptr<reorder>> _cached_reorders;
+    std::map<cache_key, std::shared_ptr<generic_layer>> _cached_generic_reorders;
+};
+
 class layout_optimizer {
 public:
-    enum class data_type { weights, bias, input };
     enum class optimization_attributes_type {
         splitted_convolution,
         group_convolution,
         deformable_convolution,
         bfyx_only_layer,
         only_fsv32_layers,
-        bfyx_f16_network
+        bfyx_f16_network,
+        bfzyx_f16_network
     };
 
     struct optimization_attributes {
@@ -76,6 +107,7 @@ public:
         int32_t bfyx_only_layer = 0;
         int32_t only_fsv32_layers = 0;
         int32_t bfyx_f16_network = 0;
+        int32_t bfzyx_f16_network = 0;
     };
 
 private:
@@ -83,142 +115,79 @@ private:
     // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
     bool _output_size_handling_enabled;
 
-    struct cache_key {
-        primitive_id data_source;
-        layout expected_layout;
-
-        friend bool operator==(cache_key const& lhs, cache_key const& rhs) {
-            return lhs.data_source == rhs.data_source && lhs.expected_layout == rhs.expected_layout;
-        }
-
-        friend bool operator!=(cache_key const& lhs, cache_key const& rhs) { return !(lhs == rhs); }
-
-        friend bool operator<(cache_key const& lhs, cache_key const& rhs) {
-            if (lhs.data_source != rhs.data_source)
-                return (lhs.data_source < rhs.data_source);
-            return lhs.expected_layout < rhs.expected_layout;
-        }
-    };
-
-    std::map<cache_key, std::shared_ptr<reorder>> _cached_reorders;
-    std::map<cache_key, std::shared_ptr<generic_layer>> _cached_generic_layers;
-
     layout get_expected_layout(layout const& current_layout,
-                               data_type type,
                                convolution_node const& node,
                                layout const& output_or_weights_layout);
     layout get_expected_layout(layout const& current_layout,
-                               data_type type,
                                deconvolution_node const& node,
                                layout const& output_or_weights_layout);
     layout get_expected_layout(layout const& current_layout,
-                               data_type type,
-                               fully_connected_node const& node,
-                               layout const& output_or_weights_layout);
-    layout get_expected_layout(layout const& current_layout,
-                               data_type type,
                                detection_output_node const& node,
                                layout const& output_or_weights_layout);
     layout get_expected_layout(layout const& current_layout,
-                               data_type type,
-                               embed_node const& node,
-                               layout const& output_or_weights_layout);
-    layout get_expected_layout(layout const& current_layout,
-                               data_type type,
-                               lstm_gemm_node const& node,
-                               layout const& output_or_weights_layout);
-    layout get_expected_layout(layout const& current_layout,
-                               data_type type,
                                binary_convolution_node const& node,
                                layout const& output_or_weights_layout);
 
     bool convolution_bfyx_opt(const layout& output_layout,
                               const layout& weights_layout,
                               std::shared_ptr<const convolution> conv);
-    bool convolution_byxf_opt(const layout& output_layout,
+    bool convolution_byxf_opt(const layout& input_layout,
+                              const layout& output_layout,
                               const layout& weights_layout,
                               std::shared_ptr<const convolution> conv);
     bool convolution_bfyx_f16_opt(const layout& output_layout,
                                   const layout& weights_layout,
                                   std::shared_ptr<const convolution> conv);
+    bool convolution_bfzyx_f16_opt(const layout& output_layout,
+                                   const layout& weights_layout,
+                                   std::shared_ptr<const convolution> conv);
+    bool deconvolution_bfzyx_f16_opt(const layout& output_layout,
+                                     const layout& weights_layout,
+                                     std::shared_ptr<const deconvolution> conv);
     bool users_for_convolution_byxf_opt(program_node const& node, uint32_t depth);
-    bool deps_depth_in_same_format(program_node const& node, const cldnn::format format, uint32_t depth);
-
-    // pair.first is reorder (may be nullptr if reorder is not needed), pair.second tells if returned reorder was cached
-    // (no need to add it to 'ouputs' etc.) for pair.first == nullptr, pair.second == true
-    std::pair<std::shared_ptr<cldnn::reorder>, bool> create_reorder_if_needed(const layout& current_layout,
-                                                                              const cldnn::primitive_id& memid,
-                                                                              layout const& expected_layout);
-
-    std::pair<std::shared_ptr<cldnn::generic_layer>, bool> create_reorder_from_given_source(
-        const cldnn::primitive_id& memid,
-        layout const& expected_layout,
-        const kernel_selector::weights_reorder_params& reorder_params);
+    bool deps_for_convolution_byxf_opt(program_node const& node, uint32_t depth);
 
 public:
     explicit layout_optimizer(bool output_size_handling_enabled = true);
 
-    // this method creates reorder for data, which is currently in 'data_layout' format, to best format in context of
-    // 'user' primitive. data is used by 'user' in a way described by 'type' (i.e. weights/bias/input). id shall be
-    // primitive_id of data's source (used as reorder's input and for cache checks). user_layout is optional parameter
-    // (required for weights and bias, optional for input) which tells what kind of output 'user'
-    //  is supposed to compute - it's used for example to decide if weights shall be converted to fp16.
-    //
-    // if 'data_layout' is already optimal, nullptr is returned
-    // currently optimizations are supported only for convolution and fully-connected.
-    //
-    // returns a pair<reorder,bool> - where pair.first is a pointer to the reorder primitive and pair.second tells if
-    // it's been reused from cache, pair.second == false means this is a newly created primitive and probably needs to be
-    // added to topology etc.
-    template <class T>
-    auto get_reorder(layout const& data_layout,
-                     primitive_id const& id,
-                     data_type type,
-                     T& node,
-                     layout const& user_layout) ->
-        typename std::enable_if<meta::is_any_of<T,
-                                                convolution_node,
-                                                fully_connected_node,
-                                                deconvolution_node,
-                                                detection_output_node,
-                                                embed_node,
-                                                lstm_gemm_node,
-                                                binary_convolution_node>::value,
-                                meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>>::type {
-        auto expected_layout = get_expected_layout(data_layout, type, node, user_layout);
-        return create_reorder_if_needed(data_layout, id, expected_layout);
-    }
+    format get_preferred_format(program_node& node) {
+        format expected = format::any;
+        auto output_layout = node.get_output_layout();
+
+        if (node.is_type<convolution>()) {
+            auto& conv_node = node.as<convolution>();
+            auto weights_layout = conv_node.weights(0).get_output_layout();
+            expected = get_expected_layout(output_layout, conv_node, weights_layout).format;
+        } else if (node.is_type<binary_convolution>()) {
+            auto& bconv_node = node.as<binary_convolution>();
+            auto weights_layout = bconv_node.weights(0).get_output_layout();
+            expected = get_expected_layout(output_layout, bconv_node, weights_layout).format;
+        } else if (node.is_type<detection_output>()) {
+            expected = get_expected_layout(
+                output_layout,
+                node.as<detection_output>(),
+                layout{ data_types::f32, format::bfyx, tensor{} }).format;
+        } else if (node.is_type<reorder>() || node.is_type<input_layout>()) {
+            expected = node.get_output_layout().format;
+        } else if (node.is_type<deconvolution>()) {
+            auto& deconv_node = node.as<deconvolution>();
+            auto weights_layout = deconv_node.weights(0).get_output_layout();
+            expected = get_expected_layout(output_layout, deconv_node, weights_layout).format;
+        }
 
-    // case for unsupported 'user' primitives
-    template <class T>
-    auto get_reorder(layout const& data_layout,
-                     primitive_id const& id,
-                     data_type type,
-                     T& node,
-                     layout const& user_layout) ->
-        typename std::enable_if<!meta::is_any_of<T,
-                                                 convolution_node,
-                                                 fully_connected_node,
-                                                 deconvolution_node,
-                                                 detection_output_node,
-                                                 embed_node,
-                                                 lstm_gemm_node,
-                                                 binary_convolution_node>::value,
-                                meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>>::type {
-        static_assert(meta::always_false<T>::value,
-                      "Layout optimization for given primitive type is currently unsupported!");
-        return meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>();
+        return expected;
     }
 
-    std::vector<std::pair<std::shared_ptr<primitive>, bool>> get_generic_layer(
-        const kernel_selector::weights_reorder_params& reorder_params,
-        primitive_id input_id,
-        const layout& old_layout,
-        data_type type);
+    bool is_format_supported(program_node& node, format::type fmt);
+
+    // Returns whether reorder between "prev" with format fmt_prev and "next" with format fmt_next
+    // can be fused into next.
+    bool can_fuse_reorder(program_node& prev, program_node& next, format fmt_prev, format fmt_next);
 
     void set_optimization_attribute(optimization_attributes_type attribute, int32_t val);
     optimization_attributes get_optimization_attributes() { return _optimization_attributes; }
 
     bool is_format_optimized(const convolution_node& node, const format& format);
+    bool is_format_optimized(const deconvolution_node& node, const format& format);
 };
 }  // namespace cldnn
index 486fa30..6e07af2 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/lookup_table.hpp"
+#include "api/lookup_table.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
index 35e082b..f433155 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/lrn.hpp"
+#include "api/lrn.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 4ed70cc..6b8cddb 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api_extension/CPP/lstm_dynamic_input.hpp"
+#include "api_extension/lstm_dynamic_input.hpp"
 #include "primitive_inst.h"
 #include "error_handler.h"
 #include <memory>
index 9c2445f..fac24bc 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/lstm_dynamic.hpp"
+#include "api/lstm_dynamic.hpp"
 #include "primitive_inst.h"
 #include "error_handler.h"
 #include <string>
index 22a4ce6..f5d13e3 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api_extension/CPP/lstm_dynamic_timeloop.hpp"
+#include "api_extension/lstm_dynamic_timeloop.hpp"
 #include "primitive_inst.h"
 #include "error_handler.h"
 #include <memory>
index 20aed46..5be0072 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/lstm.hpp"
+#include "api/lstm.hpp"
 #include "primitive_inst.h"
 #include <string>
 
@@ -31,7 +31,7 @@ public:
     program_node& input() const { return get_dependency(0); }
     program_node& cell() const { return get_dependency(1); }
     bool cell_term() const { return !get_primitive()->cell.empty(); }
-    int32_t offset_order() const { return get_primitive()->offset_order; }
+    lstm_weights_order offset_order() const { return get_primitive()->offset_order; }
     float clip() const {
         float clip_val = get_primitive()->clip;
         if (clip_val < 0)
@@ -57,7 +57,7 @@ public:
 
     memory_impl& cell_memory() const { return dep_memory(1); }
     bool cell_term() const { return !argument.cell.empty(); }
-    int32_t offset_order() const { return argument.offset_order; }
+    lstm_weights_order offset_order() const { return argument.offset_order; }
     float clip() const {
         float clip_val = argument.clip;
         if (clip_val < 0)
index 9054262..aff8437 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/lstm.hpp"
+#include "api/lstm.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 36a4bf4..95f9f34 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/lstm.hpp"
+#include "api/lstm.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <vector>
@@ -43,8 +43,8 @@ public:
     bool peepholes_term() const { return !get_primitive()->peepholes.empty(); }
     bool initial_hidden_term() const { return !get_primitive()->initial_hidden.empty(); }
     bool initial_cell_term() const { return !get_primitive()->initial_cell.empty(); }
-    std::vector<cldnn_activation_func> activations() const { return get_primitive()->activations; }
-    std::vector<cldnn_activation_additional_params> activation_params() const {
+    std::vector<activation_func> activations() const { return get_primitive()->activations; }
+    std::vector<activation_additional_params> activation_params() const {
         return get_primitive()->activation_params;
     }
     size_t sequence_len() const { return get_primitive()->input.size(); }
@@ -75,8 +75,8 @@ public:
     bool peepholes_term() const { return !argument.peepholes.empty(); }
     bool initial_hidden_term() const { return !argument.initial_hidden.empty(); }
     bool initial_cell_term() const { return !argument.initial_cell.empty(); }
-    std::vector<cldnn_activation_func> activations() const { return argument.activations; }
-    std::vector<cldnn_activation_additional_params> activation_params() const { return argument.activation_params; }
+    std::vector<activation_func> activations() const { return argument.activations; }
+    std::vector<activation_additional_params> activation_params() const { return argument.activation_params; }
 };
 
 using lstm_inst = typed_primitive_inst<lstm>;
index 5b34910..4e06c0d 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/max_unpooling.hpp"
+#include "api/max_unpooling.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index e202001..5ff6499 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/memory.hpp"
+#include "api/memory.hpp"
 
-#include "api_impl.h"
 #include "engine_impl.h"
 #include "refcounted_obj.h"
 
 namespace cldnn {
 
 struct memory_impl : refcounted_obj<memory_impl> {
-    memory_impl(const engine_impl::ptr& engine, layout layout, uint16_t stream_id, bool reused = false)
-        : _engine(engine), _layout(layout), _stream_id(stream_id), _reused(reused) {}
+    memory_impl(const engine_impl::ptr& engine, const layout& layout, uint16_t stream_id, bool reused = false)
+        : _engine(engine), _layout(layout), _stream_id(stream_id), _reused(reused), _bytes_count(_layout.bytes_count()) {}
 
     virtual ~memory_impl() {
         if (_engine != (engine_impl::ptr) nullptr && !_reused) {
-            _engine->get_memory_pool().subtract_memory_used(_layout.bytes_count());
+            _engine->get_memory_pool().subtract_memory_used(_bytes_count);
         }
     }
     virtual void* lock() = 0;
     virtual void unlock() = 0;
     virtual void fill(unsigned char pattern, event_impl::ptr ev) = 0;
-    size_t size() const { return _layout.bytes_count(); }
+    size_t size() const { return _bytes_count; }
     virtual bool is_allocated_by(const engine_impl& engine) const { return &engine == _engine.get(); }
     const refcounted_obj_ptr<engine_impl>& get_engine() const { return _engine; }
     const layout& get_layout() const { return _layout; }
@@ -49,10 +48,13 @@ protected:
 
 private:
     bool _reused;
+    // layout bytes count, needed because of traits static map destruction
+    // before run of memory_impl destructor, when engine is static
+    size_t _bytes_count;
 };
 
 struct simple_attached_memory : memory_impl {
-    simple_attached_memory(layout layout, void* pointer, uint16_t stream_id)
+    simple_attached_memory(const layout& layout, void* pointer, uint16_t stream_id)
         : memory_impl((engine_impl::ptr) nullptr, layout, stream_id), _pointer(pointer) {}
 
     void* lock() override { return _pointer; }
@@ -92,5 +94,3 @@ private:
 };
 
 }  // namespace cldnn
-
-API_CAST(::cldnn_memory, cldnn::memory_impl)
index 890a5a5..d915b37 100644 (file)
@@ -16,9 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/layout.hpp"
-#include "api/CPP/primitive.hpp"
-#include "api_impl.h"
+#include "api/layout.hpp"
+#include "api/primitive.hpp"
 
 #include "refcounted_obj.h"
 
index 3eb0efd..8de3f6f 100644 (file)
@@ -16,7 +16,7 @@
 #pragma once
 
 #include <type_traits>
-#include "api/CPP/meta_utils.hpp"
+#include "api/meta_utils.hpp"
 #include "internal_primitive.h"
 
 namespace cldnn {
index 46f1f93..6b57b1a 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/mutable_data.hpp"
+#include "api/mutable_data.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index c197f56..6c8ec37 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/mvn.hpp"
+#include "api/mvn.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 4206431..0b99503 100644 (file)
@@ -17,9 +17,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "api/CPP/network.hpp"
+#include "api/network.hpp"
 
-#include "api_impl.h"
 #include "engine_impl.h"
 #include "event_impl.h"
 #include "program_impl.h"
@@ -111,5 +110,3 @@ private:
     void check_names();
 };
 }  // namespace cldnn
-
-API_CAST(::cldnn_network, cldnn::network_impl)
index 6135b42..4e64f4b 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/normalize.hpp"
+#include "api/normalize.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 9c4daed..49fc93d 100644 (file)
@@ -15,7 +15,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include <api/CPP/one_hot.hpp>
+#include <api/one_hot.hpp>
 
 #include "primitive_inst.h"
 #include <string>
index e404ec9..2bdf5f1 100644 (file)
@@ -162,14 +162,13 @@ private:
     void run(program_impl& p) override;
 };
 
-class prepare_binarization : public base_pass {
+class prepare_quantization : public base_pass {
 public:
-    prepare_binarization() : base_pass("prepare_binarization") {}
+    prepare_quantization() : base_pass("prepare_quantization") {}
 
 private:
     void run(program_impl& p) override;
-    void prepare_packed_quantize(program_impl& p, program_node& node);
-    void prepare_fusing(program_impl& p, program_node& node);
+    void prepare_packed_quantize(program_impl& p);
 };
 
 class prepare_conv_eltw_fusing : public base_pass {
@@ -220,21 +219,23 @@ public:
 
 private:
     void run(program_impl& p) override;
-    void fuse_skip_layers(program_impl& p, program_node* node);
-    void fuse_conv_bn_scale(program_impl& p, program_node* node);
+    void fuse_reorders(program_impl& p);
+    void fuse_activations(program_impl& p);
+    void fuse_skip_layers(program_impl& p);
+    void fuse_simple_primitives(program_impl &p);
     layout_optimizer& _lo;
 };
 
 class pre_optimize_bias : public base_pass {
 public:
-    explicit pre_optimize_bias(layout_optimizer& lo_ref);
+    explicit pre_optimize_bias(reorder_factory& rf_ref);
 
 private:
     void run(program_impl& p) override;
-    virtual void run(program_impl& p, layout_optimizer& lo);
+    virtual void run(program_impl& p, reorder_factory& rf);
     template <typename T>
-    void optimize_bias(T& node, layout_optimizer& lo, program_impl& p);
-    layout_optimizer& _lo;
+    void optimize_bias(T& node, reorder_factory& rf, program_impl& p);
+    reorder_factory& _rf;
 };
 
 class prepare_padding : public base_pass {
@@ -258,14 +259,26 @@ private:
 
 class post_optimize_weights : public base_pass {
 public:
-    explicit post_optimize_weights(layout_optimizer& lo_ref);
+    explicit post_optimize_weights(reorder_factory& rf_ref);
 
 private:
+    struct weights_bias_offset {
+        size_t weights_offset;
+        size_t bias_offset;
+
+        // When using this ctor weights offset is added to the bias_offset
+        weights_bias_offset(const size_t w_offset, const size_t b_offset)
+            : weights_offset(w_offset)
+            , bias_offset(weights_offset + b_offset)
+        {}
+    };
+
     void run(program_impl& p) override;
-    virtual void run(program_impl& p, layout_optimizer& lo);
-    template <typename T>
-    void optimize_weights(T& node, layout_optimizer& lo, program_impl& p);
-    layout_optimizer& _lo;
+    template<typename T>
+    weights_bias_offset get_weights_bias_offset(const T& node);
+    template<typename T>
+    void optimize_weights(T& node, program_impl& p);
+    reorder_factory& _rf;
 };
 
 class propagate_constants : public base_pass {
@@ -288,21 +301,24 @@ private:
 
 class remove_redundant_reorders : public base_pass {
 public:
-    explicit remove_redundant_reorders(bool bfyx_to_bfyx_f16_opt = false);
+    explicit remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing = false, bool update_implementations = false);
     void run(program_impl& p) override;
 
 private:
-    bool bfyx_to_bfyx_f16_opt;
+    layout_optimizer& lo;
+    bool enable_reorder_fusing;
+    bool update_implementations;
 };
 
 class reorder_inputs : public base_pass {
 public:
-    explicit reorder_inputs(layout_optimizer& lo_ref);
+    reorder_inputs(layout_optimizer& lo_ref, reorder_factory& rf_ref);
 
 private:
     void run(program_impl& p) override;
-    virtual void run(program_impl& p, layout_optimizer& lo);
+    virtual void run(program_impl& p, layout_optimizer& lo, reorder_factory& rf);
     layout_optimizer& _lo;
+    reorder_factory& _rf;
 };
 
 class trim_to_outputs : public base_pass {
index 064f4d9..8ad0b37 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/permute.hpp"
+#include "api/permute.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index 7796978..39fb338 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/pooling.hpp"
+#include "api/pooling.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index d91a910..2e51852 100644 (file)
@@ -17,8 +17,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "api/CPP/primitive.hpp"
-#include "api/CPP/concatenation.hpp"
+#include "api/primitive.hpp"
+#include "api/concatenation.hpp"
 
 #include "event_impl.h"
 #include "memory_impl.h"
@@ -86,7 +86,7 @@ public:
 
     memory_impl& dep_memory(size_t index) const { return dependencies().at(index)->output_memory(); }
     memory_impl& output_memory() const { return *_output; }
-    size_t inputs_memory_count() const { return _node.get_primitive()->input.size(); }
+    size_t inputs_memory_count() const { return _node.get_primitive()->input_size(); }
     primitive_type_id type() const { return _node.type(); }
     primitive_id id() const { return _node.id(); }
     primitive_id org_id() const { return _node.get_org_primitive_id(); }
@@ -115,6 +115,14 @@ public:
 
     void build_deps();
 
+    memory_impl& fused_memory(size_t dep_id) const {
+        return dep_memory(get_fused_mem_offset() + dep_id);
+    }
+
+    bool has_fused_primitives() const { return !_node.get_fused_primitives().empty(); }
+    size_t get_fused_mem_count() const { return _node.get_fused_inputs_count(); }
+    size_t get_fused_mem_offset() const { return _node.get_fused_primitives()[0].dep_start_idx; }
+
 protected:
     primitive_inst(network_impl& network, program_node const& node, bool allocate_memory);
 
index 3089c5c..072b157 100644 (file)
@@ -16,9 +16,9 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/memory.hpp"
-#include "api/CPP/primitive.hpp"
-#include "api/CPP/program.hpp"
+#include "api/memory.hpp"
+#include "api/primitive.hpp"
+#include "api/program.hpp"
 
 #include "topology_impl.h"
 
@@ -32,22 +32,22 @@ struct program_node;
 struct primitive_impl;
 class primitive_inst;
 struct program_impl;
-}  // namespace cldnn
-struct cldnn_primitive_type {
-    virtual ~cldnn_primitive_type() = default;
 
-    virtual std::shared_ptr<cldnn::primitive> from_dto(const CLDNN_PRIMITIVE_DESC(primitive) * dto) const = 0;
-    virtual std::shared_ptr<cldnn::program_node> create_node(cldnn::program_impl& program,
-                                                             const std::shared_ptr<cldnn::primitive> prim) const = 0;
-    virtual std::shared_ptr<cldnn::primitive_inst> create_instance(cldnn::network_impl& network,
-                                                                   const cldnn::program_node& node) const = 0;
-    virtual std::unique_ptr<cldnn::primitive_impl> choose_impl(cldnn::engine_impl& engine,
-                                                               const cldnn::program_node& node) const = 0;
-    virtual bool does_an_implementation_exist(cldnn::engine_impl& engine, const cldnn::program_node& node) const = 0;
-    virtual bool does_possible_implementation_exist(cldnn::engine_impl& engine,
-                                                    const cldnn::program_node& node) const = 0;
-    virtual cldnn::layout calc_output_layout(const cldnn::program_node& node) const = 0;
-    virtual std::string to_string(const cldnn::program_node& node) const = 0;
+struct primitive_type {
+    virtual ~primitive_type() = default;
+
+    virtual std::shared_ptr<program_node> create_node(program_impl& program,
+                                                      const std::shared_ptr<primitive> prim) const = 0;
+    virtual std::shared_ptr<primitive_inst> create_instance(network_impl& network,
+                                                            const program_node& node) const = 0;
+    virtual std::unique_ptr<primitive_impl> choose_impl(engine_impl& engine,
+                                                        const program_node& node) const = 0;
+    virtual bool does_an_implementation_exist(engine_impl& engine, const program_node& node) const = 0;
+    virtual bool does_possible_implementation_exist(engine_impl& engine,
+                                                    const program_node& node) const = 0;
+    virtual layout calc_output_layout(const program_node& node) const = 0;
+    virtual std::string to_string(const program_node& node) const = 0;
 
     virtual bool is_internal_type() const { return false; }
 };
+}  // namespace cldnn
index d7e464d..0b9033a 100644 (file)
 
 namespace cldnn {
 template <class PType>
-struct primitive_type_base : ::cldnn_primitive_type {
+struct primitive_type_base : primitive_type {
     static_assert(meta::is_api_primitive<PType>::value,
                   "Primitive type passed to primitive_type_base should derive from cldnn::primitive");
 
-    std::shared_ptr<primitive> from_dto(const CLDNN_PRIMITIVE_DESC(primitive) * dto) const override {
-        if (dto->type != this)
-            throw std::invalid_argument("primitive_type_base::from_dto: primitive type mismatch");
-
-        return std::make_shared<PType>(as_dto<PType>(dto));
-    }
-
     std::shared_ptr<cldnn::program_node> create_node(program_impl& program,
                                                      const std::shared_ptr<primitive> prim) const override {
         if (prim->type != this)
index 9adf088..d77b11d 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/prior_box.hpp"
+#include "api/prior_box.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
@@ -32,7 +32,7 @@ struct typed_program_node<prior_box> : typed_program_node_base<prior_box> {
     program_node& input() const { return get_dependency(0); }
 
     void calc_result();
-    memory_impl& get_result_buffer() const { return *result; }
+    memory_impl::ptr get_result_buffer() const { return result; }
 
 private:
     memory_impl::ptr result;
index ac38579..9277513 100644 (file)
@@ -65,8 +65,14 @@ struct program_helpers {
 
     // helper function which creates single-element array if it's given anything
     // other than std::vector.
-    // std::vector case -> does not wrap, returns t as-is
-    static const primitive::fixed_size_vector_ref& wrap_if_single(primitive::fixed_size_vector_ref const& t) {
+    // std::vector case -> does not wrap
+    template <typename T>
+    static std::vector<T>& wrap_if_single(std::vector<T>& t) {
+        return t;
+    }
+
+    template <typename T>
+    static const std::vector<T>& wrap_if_single(const std::vector<T>& t) {
         return t;
     }
 
@@ -103,10 +109,10 @@ struct program_helpers {
     }
     static void merge_buffers(engine_impl& engine,
                               program_node& node,
-                              layout target_layout,
+                              const layout& target_layout,
                               size_t begin_offset,
                               size_t end_offset);
     static layout get_weights_layout(typed_program_node<cldnn::data>& data_node, int32_t split);
     static std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2);
 };
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace cldnn
index 9af2a2a..e19547d 100644 (file)
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include "api/CPP/program.hpp"
+#include "api/program.hpp"
 
 #include "refcounted_obj.h"
 #include "engine_impl.h"
@@ -49,7 +49,7 @@ struct program_impl : public refcounted_obj<program_impl> {
     friend class prepare_padding;            // to be removed when possible
     friend class propagate_constants;        // to be removed when possible
     friend class prepare_primitive_fusing;   // to be removed when possible
-    friend class prepare_binarization;       // to be removed when possible
+    friend class prepare_quantization;       // to be removed when possible
     friend class prepare_conv_eltw_fusing;   // to be removed when possible
     friend class reorder_inputs;             // to be removed when possible
     friend class remove_redundant_reorders;  // to be removed when possible
@@ -60,9 +60,13 @@ public:
     public:
         typedef std::list<program_node*> list_of_nodes;
         typedef list_of_nodes::const_iterator const_iterator;
+        typedef list_of_nodes::const_reverse_iterator const_reverse_iterator;
         typedef list_of_nodes::iterator node_iterator;
+        typedef list_of_nodes::reverse_iterator node_reverse_iterator;
         const_iterator begin() const { return _processing_order.begin(); }
         const_iterator end() const { return _processing_order.end(); }
+        const_reverse_iterator rbegin() const { return _processing_order.rbegin(); }
+        const_reverse_iterator rend() const { return _processing_order.rend(); }
 
         void calc_processing_order_visit(program_node* node);
         void calc_processing_order(program_impl& p);
@@ -191,6 +195,9 @@ public:
     // returns if 'node' has been extracted and removed successfully
     bool extract_and_remove(program_node& node);
 
+    // Fuses two nodes into fused_node and removes peer_node from graph
+    void fuse_nodes(program_node& fused_node, program_node& peer_node);
+
     // returns if 'node' has been removed
     bool remove_if_dangling(program_node& node);
 
@@ -242,6 +249,7 @@ private:
     void build_program(bool is_internal);
     void init_graph();
     void set_options();
+    void set_layout_optimizer_attributes(layout_optimizer& lo);
 
     void apply_opt_pass(base_pass& pass);
 
@@ -301,5 +309,3 @@ private:
 };
 
 }  // namespace cldnn
-
-API_CAST(::cldnn_program, cldnn::program_impl)
index f87dfa0..43cbb77 100644 (file)
@@ -18,7 +18,8 @@
 #include <set>
 #include <array>
 
-#include "api/CPP/primitive.hpp"
+#include "api/primitive.hpp"
+#include "api/activation.hpp"
 #include "internal_primitive.h"
 
 #include "meta_utils.h"
@@ -32,6 +33,7 @@ namespace cldnn {
 struct program_impl;
 class reorder_inputs;
 class graph_initializations;
+class prepare_quantization;
 
 template <class T>
 struct typed_program_node;
@@ -42,6 +44,16 @@ struct internal_primitive_type_base;
 class json_composite;
 class xml_composite;
 
+
+struct fused_primitive_desc {
+    std::shared_ptr<const primitive> prim;
+    size_t dep_start_idx;
+    std::vector<primitive_id> deps;
+    activation_func activation;
+    activation_additional_params activation_params;
+    layout output_layout = layout(data_types::f32, format::bfyx, tensor());
+};
+
 /*
     Base class for all primitives which wraps API class and extends it to be used
     in graph context.
@@ -58,7 +70,7 @@ struct program_node {
     friend class compile_graph;                     // to be removed when possible
     friend class graph_initializations;             // to be removed when possible
     friend class prepare_primitive_fusing;          // to be removed when possible
-    friend class prepare_binarization;              // to be removed when possible
+    friend class prepare_quantization;              // to be removed when possible
     friend class prepare_conv_eltw_fusing;          // to be removed when possible
     friend class prepare_conv_eltw_read_write_opt;  // to be removed when possible
     friend class propagate_constants;               // to be removed when possible
@@ -153,7 +165,7 @@ public:
 
     // sets cached output layout to an arbitrary value, invalidates users if new layout differs from previous one and @p
     // invalidate_users_if_changed is set to true returns whether output layout has changed
-    bool set_output_layout(layout new_layout, bool invalidate_users_if_changed = true);
+    bool set_output_layout(layout& new_layout, bool invalidate_users_if_changed = true);
 
     // forces recalculation of cached output layout, invalidates users if new layout is different than previous one and
     // @p invalidate_users_if_changed is set to true returns whether output layout has changed
@@ -182,16 +194,31 @@ public:
     bool is_marked(uint8_t val) const { return user_mark == val; }
     uint8_t get_user_mark() const { return user_mark; }
 
-    void set_fused_activation(cldnn_activation_func activation_func,
-                              cldnn_activation_additional_params additional_params) {
-        fused_activation.activation_func = activation_func;
-        fused_activation.additional_params = additional_params;
+    void add_fused_activation(activation_func activation_func,
+                              activation_additional_params additional_params) {
+        fused_activations.emplace_back(activation_func, additional_params);
     }
 
-    cldnn_activation_func get_fused_activation_func() const { return fused_activation.activation_func; }
+    std::vector<activation_func> get_fused_activations_funcs() const {
+        std::vector<activation_func> funcs;
+        std::transform(fused_activations.begin(),
+                       fused_activations.end(),
+                       std::back_inserter(funcs),
+                       [](fused_activation_params const& p) { return p.func; });
+        return funcs;
+    }
 
-    cldnn_activation_additional_params get_fused_activation_params() const {
-        return fused_activation.additional_params;
+    std::vector<activation_additional_params> get_fused_activations_params() const {
+        std::vector<activation_additional_params> params;
+        std::transform(fused_activations.begin(),
+                       fused_activations.end(),
+                       std::back_inserter(params),
+                       [](fused_activation_params const& p) { return p.params; });
+        return params;
+    }
+
+    void copy_fused_activation(const program_node& rhs) {
+        fused_activations = rhs.fused_activations;
     }
 
     // check/set if the node can be optimized out (removed from the network)
@@ -257,6 +284,33 @@ public:
         return reused_memory_color;
     }
 
+    virtual void add_fused_primitive(fused_primitive_desc& desc) {
+        fused_prims.push_back(desc);
+    }
+
+    virtual void add_fused_primitives(std::vector<fused_primitive_desc> descs) {
+        fused_prims.insert(fused_prims.end(), descs.begin(), descs.end());
+    }
+
+    const std::vector<fused_primitive_desc>& get_fused_primitives() const { return fused_prims; }
+
+    size_t get_fused_inputs_count() const {
+        size_t count = 0;
+        for (auto& fp : get_fused_primitives()) {
+            count += fp.deps.size();
+        }
+        return count;
+    }
+
+    bool has_fused_primitives() const { return !get_fused_primitives().empty(); }
+
+    layout get_fused_output_layout() const {
+        auto fused_prims = get_fused_primitives();
+        if (fused_prims.empty())
+            return layout(data_types::f32, format::bfyx, tensor());
+        return fused_prims.back().output_layout;
+    }
+
 protected:
     std::shared_ptr<primitive> desc;
     program_impl& myprog;
@@ -279,7 +333,7 @@ protected:
     uint8_t user_mark = 0;
     bool optimized = false;
     bool share_buffer = true;
-    std::array<bool, CLDNN_TENSOR_DIM_MAX> _support_padding_in_axis = {};  // zero-initialization
+    std::array<bool, tensor_dim_max> _support_padding_in_axis = {};  // zero-initialization
 
     mutable bool has_reused_memory = false;
     mutable uint32_t reused_memory_color = 0;
@@ -287,12 +341,18 @@ protected:
     const primitive_id org_id;
 
     struct fused_activation_params {
-        cldnn_activation_func activation_func = activation_none;
-        cldnn_activation_additional_params additional_params = {0.0f, 0.0f};
-    };
+        activation_func func = activation_func::none;
+        activation_additional_params params = {0.0f, 0.0f};
 
-    fused_activation_params fused_activation;
+        fused_activation_params() {}
+
+        fused_activation_params(activation_func _func, activation_additional_params _params) :
+                func(_func),
+                params(_params) {}
+    };
 
+    std::vector<fused_activation_params> fused_activations;
+    std::vector<fused_primitive_desc> fused_prims;
     void invalidate_users() const;
 };
 
@@ -303,6 +363,7 @@ struct api_typed_program_node_base : public program_node {
                   "PType should name a non-const, non-volatile type derived from cldnn::primitive but not from "
                   "cldnn::internal_primitive");
     friend class cldnn::graph_initializations;
+    friend class cldnn::prepare_quantization;
     friend struct cldnn::program_impl;
     friend class cldnn::reorder_inputs;
 
index a8c8d3b..0ffb3ac 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/proposal.hpp"
+#include "api/proposal.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <vector>
index 9da44c2..66b6020 100644 (file)
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #pragma once
-#include "api/CPP/pyramid_roi_align.hpp"
+#include "api/pyramid_roi_align.hpp"
 #include "primitive_inst.h"
 
 #include <memory>
index 75522ec..250b6ee 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/quantize.hpp"
+#include "api/quantize.hpp"
 #include "primitive_inst.h"
 #include <string>
 
@@ -31,11 +31,13 @@ public:
 
     program_node& input(size_t index = 0) const { return get_dependency(index); }
     size_t inputs_count() const { return get_dependencies().size(); }
-    void set_packed_binary_output(bool _packed_binary_output) { packed_binary_output = _packed_binary_output; }
-    bool get_packed_binary_output() const { return packed_binary_output; }
+    void set_output_data_type(data_types dt) { out_dt = dt; dt_changed = true; }
+    data_types get_output_data_type() const { return out_dt; }
+    bool has_custom_out_dt() const { return dt_changed; }
 
 private:
-    bool packed_binary_output = false;
+    data_types out_dt;
+    bool dt_changed = false;
 };
 
 using quantize_node = typed_program_node<quantize>;
index 7aa4ad6..9963505 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/reduce.hpp"
+#include "api/reduce.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index ff94cca..0a285b2 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/region_yolo.hpp"
+#include "api/region_yolo.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index fa5caab..e4824d0 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/reorder.hpp"
+#include "api/reorder.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index 1fd9e98..4f85286 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/reorg_yolo.hpp"
+#include "api/reorg_yolo.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 751276d..2ee1e45 100644 (file)
@@ -16,8 +16,9 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/reshape.hpp"
+#include "api/reshape.hpp"
 #include "primitive_inst.h"
+#include "error_handler.h"
 #include <string>
 #include <memory>
 
@@ -33,10 +34,13 @@ struct typed_program_node<reshape> : public typed_program_node_base<reshape> {
 public:
     using parent::parent;
 
-    program_node& input() const { return get_dependency(0); }
+    program_node& input() const {
+        CLDNN_ERROR_LESS_THAN(id(), "the number of dependencies", dependencies.size(), "1", 1, "ERROR: the node has no input");
+        return get_dependency(0);
+    }
 
     bool is_in_place() const {
-        if (this->is_output() || this->get_fused_activation_func() != activation_none)
+        if (this->is_output() || !this->get_fused_activations_funcs().empty())
             return false;
         return (!this->get_output_layout().data_padding && !input().get_output_layout(false).data_padding);
     }
index 0141146..631b591 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/reverse_sequence.hpp"
+#include "api/reverse_sequence.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 70d3d7e..b323d37 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/roi_pooling.hpp"
+#include "api/roi_pooling.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 0d15828..29815de 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/scale_grad_input.hpp"
+#include "api/scale_grad_input.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index b7f22f8..ecef6d1 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/scale_grad_weights.hpp"
+#include "api/scale_grad_weights.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index fa12b18..6b7f28c 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/scale.hpp"
+#include "api/scale.hpp"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
index 2046c8a..ffddcff 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include <api/CPP/select.hpp>
+#include <api/select.hpp>
 
 #include "primitive_inst.h"
 #include <string>
index 0fd82fa..168c4ec 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/shuffle_channels.hpp"
+#include "api/shuffle_channels.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index bb89fc2..8c93cc9 100644 (file)
@@ -14,8 +14,8 @@
 
 #pragma once
 
-#include <api/CPP/layout.hpp>
-#include <api/CPP/tensor.hpp>
+#include <api/layout.hpp>
+#include <api/tensor.hpp>
 
 #include <algorithm>
 #include <cassert>
index 1b4fc39..1b36293 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/softmax.hpp"
+#include "api/softmax.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index a764efc..88cc8d1 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/softmax_loss_grad.hpp"
+#include "api/softmax_loss_grad.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 0d60123..97add44 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/split.hpp"
+#include "api/split.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 1aedff1..16a2243 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/strided_slice.hpp"
+#include "api/strided_slice.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 7751c23..5bc64ff 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/tile.hpp"
+#include "api/tile.hpp"
 #include "primitive_inst.h"
 #include <string>
 
index 52a5752..f865a0d 100644 (file)
@@ -15,9 +15,9 @@
 */
 #pragma once
 #include <string>
-#include "api/CPP/tensor.hpp"
-#include "api/CPP/layout.hpp"
-#include "api/CPP/primitive.hpp"
+#include "api/tensor.hpp"
+#include "api/layout.hpp"
+#include "api/primitive.hpp"
 #include <memory>
 
 namespace cldnn {
@@ -93,6 +93,8 @@ inline std::string fmt_to_str(format fmt) {
             return "bfwzyx";
         case format::fs_b_yx_fsv32:
             return "fs_b_yx_fsv32";
+        case format::bfzyx_f16:
+            return "bfzyx_f16";
 
         case format::winograd_2x3_s1_weights:
             return "winograd_2x3_s1_weights";
@@ -134,7 +136,10 @@ inline std::string fmt_to_str(format fmt) {
             return "os_is_y_x8_osv8_isv4";
         case format::os_is_yx_osv32_isv32p:
             return "os_is_yx_osv32_isv32p";
-
+        case format::o_i_zyx_i16_o16:
+            return "o_i_zyx_i16_o16";
+        case format::i_o_zyx_o16_i16:
+            return "i_o_zyx_o16_i16";
         default:
             return "unknown (" + std::to_string(fmt.value) + ")";
     }
index 41ca28b..c8b465c 100644 (file)
@@ -16,9 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/primitive.hpp"
-#include "api/CPP/input_layout.hpp"
-#include "api_impl.h"
+#include "api/primitive.hpp"
+#include "api/input_layout.hpp"
 #include "refcounted_obj.h"
 
 #include <map>
@@ -55,7 +54,7 @@ public:
         }
     }
 
-    void change_input_layout(const primitive_id& id, layout new_layout) {
+    void change_input_layout(const primitive_id& id, const layout& new_layout) {
         auto& inp_layout = this->at(id);
         if (inp_layout->type != input_layout::type_id()) {
             throw std::runtime_error("Primitive: " + id + " is not input_layout.");
@@ -76,5 +75,3 @@ private:
     topology_map _primitives;
 };
 }  // namespace cldnn
-
-API_CAST(::cldnn_topology, cldnn::topology_impl)
index 86ea8af..bc19b5c 100644 (file)
@@ -16,7 +16,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/upsampling.hpp"
+#include "api/upsampling.hpp"
 #include "primitive_inst.h"
 #include <memory>
 #include "topology_impl.h"
index a959071..9fc03be 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id index_select_type_id() {
+primitive_type_id index_select::type_id() {
     static primitive_type_base<index_select> instance;
     return &instance;
 }
index b1bcf8a..3e8f2c9 100644 (file)
@@ -25,7 +25,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id input_layout_type_id() {
+primitive_type_id input_layout::type_id() {
     static primitive_type_base<input_layout> instance;
     return &instance;
 }
index a79cfee..72b9881 100644 (file)
@@ -143,6 +143,8 @@ kernel_selector::data_layout to_data_layout(format f) {
             return kernel_selector::data_layout::fs_b_yx_fsv32;
         case format::bfwzyx:
             return kernel_selector::data_layout::bfwzyx;
+        case format::bfzyx_f16:
+            return kernel_selector::data_layout::bfzyx_f16;
         default:
             return kernel_selector::data_layout::bfyx;
     }
@@ -252,6 +254,10 @@ kernel_selector::weights_layout to_weights_layout(format f) {
             return kernel_selector::weights_layout::bf_lyx_yx;
         case format::oiyx_o16:
             return kernel_selector::weights_layout::oiyx_o16;
+        case format::o_i_zyx_i16_o16:
+            return kernel_selector::weights_layout::o_i_zyx_i16_o16;
+        case format::i_o_zyx_o16_i16:
+            return kernel_selector::weights_layout::i_o_zyx_o16_i16;
         default:
             throw std::invalid_argument("Unable to convert tensor layout " + fmt_to_str(f) + " to weights layout");
     }
@@ -319,6 +325,10 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
             return cldnn::format::os_is_yx_osv32_isv32p;
         case kernel_selector::weights_layout::oizyx:
             return cldnn::format::bfzyx;
+        case kernel_selector::weights_layout::o_i_zyx_i16_o16:
+            return cldnn::format::o_i_zyx_i16_o16;
+        case kernel_selector::weights_layout::i_o_zyx_o16_i16:
+            return cldnn::format::i_o_zyx_o16_i16;
         default:
             return cldnn::format::bfyx;
     }
@@ -424,83 +434,83 @@ layout from_weights_tensor(const kernel_selector::weights_tensor& l) {
     return layout(type, format, t);
 }
 
-kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_activation_func activation_func) {
-    switch (activation_func) {
-        case activation_none:
+kernel_selector::activation_function get_kernel_selector_activation_param(activation_func activation) {
+    switch (activation) {
+        case cldnn::activation_func::none:
             return kernel_selector::activation_function::NONE;
-        case activation_logistic:
+        case cldnn::activation_func::logistic:
             return kernel_selector::activation_function::LOGISTIC;
-        case activation_hyperbolic_tan:
+        case cldnn::activation_func::hyperbolic_tan:
             return kernel_selector::activation_function::HYPERBOLIC_TAN;
-        case activation_relu:
+        case cldnn::activation_func::relu:
             return kernel_selector::activation_function::RELU;
-        case activation_relu_negative_slope:
+        case cldnn::activation_func::relu_negative_slope:
             return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
-        case activation_clamp:
+        case cldnn::activation_func::clamp:
             return kernel_selector::activation_function::CLAMP;
-        case activation_softrelu:
+        case cldnn::activation_func::softrelu:
             return kernel_selector::activation_function::SOFTRELU;
-        case activation_abs:
+        case cldnn::activation_func::abs:
             return kernel_selector::activation_function::ABS;
-        case activation_linear:
+        case cldnn::activation_func::linear:
             return kernel_selector::activation_function::LINEAR;
-        case activation_square:
+        case cldnn::activation_func::square:
             return kernel_selector::activation_function::SQUARE;
-        case activation_sqrt:
+        case cldnn::activation_func::sqrt:
             return kernel_selector::activation_function::SQRT;
-        case activation_elu:
+        case cldnn::activation_func::elu:
             return kernel_selector::activation_function::ELU;
-        case activation_sin:
+        case cldnn::activation_func::sin:
             return kernel_selector::activation_function::SIN;
-        case activation_asin:
+        case cldnn::activation_func::asin:
             return kernel_selector::activation_function::ASIN;
-        case activation_sinh:
+        case cldnn::activation_func::sinh:
             return kernel_selector::activation_function::SINH;
-        case activation_asinh:
+        case cldnn::activation_func::asinh:
             return kernel_selector::activation_function::ASINH;
-        case activation_cos:
+        case cldnn::activation_func::cos:
             return kernel_selector::activation_function::COS;
-        case activation_acos:
+        case cldnn::activation_func::acos:
             return kernel_selector::activation_function::ACOS;
-        case activation_cosh:
+        case cldnn::activation_func::cosh:
             return kernel_selector::activation_function::COSH;
-        case activation_acosh:
+        case cldnn::activation_func::acosh:
             return kernel_selector::activation_function::ACOSH;
-        case activation_log:
+        case cldnn::activation_func::log:
             return kernel_selector::activation_function::LOG;
-        case activation_log2:
+        case cldnn::activation_func::log2:
             return kernel_selector::activation_function::LOG2;
-        case activation_exp:
+        case cldnn::activation_func::exp:
             return kernel_selector::activation_function::EXP;
-        case activation_tan:
+        case cldnn::activation_func::tan:
             return kernel_selector::activation_function::TAN;
-        case activation_atan:
+        case cldnn::activation_func::atan:
             return kernel_selector::activation_function::ATAN;
-        case activation_atanh:
+        case cldnn::activation_func::atanh:
             return kernel_selector::activation_function::ATANH;
-        case activation_floor:
+        case cldnn::activation_func::floor:
             return kernel_selector::activation_function::FLOOR;
-        case activation_ceil:
+        case cldnn::activation_func::ceil:
             return kernel_selector::activation_function::CEIL;
-        case activation_negative:
+        case cldnn::activation_func::negative:
             return kernel_selector::activation_function::NEGATIVE;
-        case activation_not:
+        case cldnn::activation_func::negation:
             return kernel_selector::activation_function::NOT;
-        case activation_pow:
+        case cldnn::activation_func::pow:
             return kernel_selector::activation_function::POW;
-        case activation_erf:
+        case cldnn::activation_func::erf:
             return kernel_selector::activation_function::ERF;
-        case activation_reciprocal:
+        case cldnn::activation_func::reciprocal:
             return kernel_selector::activation_function::RECIPROCAL;
-        case activation_selu:
+        case cldnn::activation_func::selu:
             return kernel_selector::activation_function::SELU;
-        case activation_sign:
+        case cldnn::activation_func::sign:
             return kernel_selector::activation_function::SIGN;
-        case activation_softplus:
+        case cldnn::activation_func::softplus:
             return kernel_selector::activation_function::SOFTPLUS;
-        case activation_softsign:
+        case cldnn::activation_func::softsign:
             return kernel_selector::activation_function::SOFTSIGN;
-        case activation_hard_sigmoid:
+        case cldnn::activation_func::hard_sigmoid:
             return kernel_selector::activation_function::HARD_SIGMOID;
         default:
             throw std::runtime_error("Unknown activation function");
@@ -509,13 +519,13 @@ kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_
 }
 
 kernel_selector::activation_function get_kernel_selector_activation_grad_param(
-    cldnn_activation_grad_func activation_grad_func) {
+    activation_grad_func activation_grad_func) {
     switch (activation_grad_func) {
-        case activation_grad_none:
+        case cldnn::activation_grad_func::none:
             return kernel_selector::activation_function::NONE_GRAD;
-        case activation_grad_relu:
+        case cldnn::activation_grad_func::relu:
             return kernel_selector::activation_function::RELU_GRAD;
-        case activation_grad_relu_negative_slope:
+        case cldnn::activation_grad_func::relu_negative_slope:
             return kernel_selector::activation_function::RELU_NEGATIVE_SLOPE_GRAD;
         default:
             throw std::runtime_error("Unknown activation_grad function");
index 60f5c54..4136d97 100644 (file)
 
 using namespace cldnn;
 
+std::pair<std::shared_ptr<reorder>, bool> reorder_factory::get_reorder(primitive_id src_id,
+                                                                       layout in_layout,
+                                                                       layout out_layout
+) {
+    if (in_layout == out_layout)
+        return std::make_pair(nullptr, true);
+
+    cache_key ckey{ src_id, out_layout };
+    auto itr = _cached_reorders.find(ckey);
+    if (itr != _cached_reorders.end())
+        return std::make_pair(itr->second, true);
+
+    auto count = _cached_reorders.size();
+    std::stringstream ss;
+    ss << src_id << "_reorder_" << count;
+
+    auto reorder = std::make_shared<cldnn::reorder>(ss.str(), src_id, out_layout);
+    _cached_reorders[ckey] = reorder;
+
+    return std::make_pair(reorder, false);
+}
+
+std::vector<std::pair<std::shared_ptr<primitive>, bool>> reorder_factory::get_weights_reorder(
+    primitive_id input_id,
+    const layout& old_layout,
+    const kernel_selector::weights_reorder_params& reorder_params) {
+
+    if (reorder_params.engine == kernel_selector::weights_reorder_params::Engine::NONE)
+        return {};
+
+    std::vector<std::pair<std::shared_ptr<primitive>, bool>> ret;
+
+    if (reorder_params.engine == kernel_selector::weights_reorder_params::Engine::CPU &&
+        reorder_params.cpuKernel != nullptr) {
+        const auto intermediate_format = from_weights_layout(reorder_params.cpuKernel->GetExpectedInputLayout());
+        const auto intermediate_type = from_weights_type(reorder_params.cpuKernel->GetExpectedInputType());
+        if (intermediate_format != old_layout.format || intermediate_type != old_layout.data_type) {
+            const layout intermediate_layout = { intermediate_type,
+                                                intermediate_format,
+                                                old_layout.size.transform(intermediate_format, 1) };
+
+            auto reorder = get_reorder(input_id, old_layout, intermediate_layout);
+            if (reorder.first) {
+                ret.push_back(reorder);
+                input_id = reorder.first->id;
+            }
+        }
+    }
+
+    // TODO: Add conversion of WeightsTensor to cldnn::tensor to have not flattened shape
+    // layout expected_layout = from_weights_tensor(reorder_params.dest);
+
+    auto new_dtype = from_weights_type(reorder_params.dest.GetDType());
+    const auto bpp = data_type_traits::size_of(new_dtype);
+    tensor expected_size = { 1, 1, 1, (tensor::value_type)(reorder_params.dest.PhysicalSizeInBytes() / bpp) };
+
+    bool toImageType = IsImageType(reorder_params.dest.GetLayout());
+    bool toDynamicLSTMType = IsDynamicLSTMType(reorder_params.dest.GetLayout());
+    if (toImageType || toDynamicLSTMType)
+        expected_size = old_layout.size;
+
+    layout expected_layout = { new_dtype,
+                              toImageType ? from_weights_layout(reorder_params.dest.GetLayout())
+                                          : format::bfyx,  // simple linear format (flatten to x channel)
+                              expected_size };
+
+    cache_key ckey{ input_id, expected_layout };
+    auto itr = _cached_generic_reorders.find(ckey);
+    if (itr != _cached_generic_reorders.end()) {
+        ret.push_back(std::make_pair(itr->second, true));
+    } else {
+        auto count = _cached_generic_reorders.size();
+        std::stringstream ss;
+        ss << input_id << "_generic_layer_" << count;
+
+        auto reorder = std::make_shared<cldnn::generic_layer>(ss.str(), input_id, expected_layout, reorder_params);
+        _cached_generic_reorders[ckey] = reorder;
+        ret.push_back(std::make_pair(reorder, false));
+    }
+
+    return ret;
+}
+
+bool layout_optimizer::is_format_supported(program_node& node, format::type fmt) {
+    if (node.is_type<fully_connected>() && fmt == format::byxf)
+        return false;
+
+    if (node.is_type<input_layout>())
+        return node.get_output_layout().format == fmt;
+
+    auto& engine = node.get_program().get_engine();
+    auto prev_layout = node.get_output_layout();
+    auto new_layout = prev_layout;
+    new_layout.format = fmt;
+    node.set_output_layout(new_layout, false);
+
+    auto supported = node.type()->does_possible_implementation_exist(engine, node);
+
+    node.set_output_layout(prev_layout, false);
+
+    return supported;
+}
+
+bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, format fmt_prev, format fmt_next) {
+    auto prev_simple = fmt_prev == format::bfyx || fmt_prev == format::byxf || fmt_prev == format::yxfb;
+    auto next_simple = fmt_next == format::bfyx || fmt_next == format::byxf || fmt_next == format::yxfb;
+    auto prev_output_layout = prev.get_output_layout();
+    auto next_output_layout = next.get_output_layout();
+
+    if (next.is_type<reorder>())
+        return true;
+
+    if (next.is_type<pooling>() && prev_simple && next_simple)
+        return true;
+
+    if (next.is_type<eltwise>() && prev_simple && next_simple)
+        return true;
+
+    if (next.is_type<fully_connected>() &&
+        (fmt_prev == format::bfyx || fmt_prev == format::yxfb || fmt_prev == format::bfyx_f16 || fmt_prev == format::fs_b_yx_fsv32))
+        return true;
+
+    if (next.is_type<convolution>() &&
+        fmt_prev == format::bfyx &&
+        (fmt_next == format::fs_b_yx_fsv32 ||
+        (fmt_next == format::bfyx_f16 && next_output_layout.size.feature[0] >= 16 && prev_output_layout.size.feature[0] == 3)))
+        return true;
+
+    return false;
+}
+
+
 namespace {
 bool should_use_winograd_2x3_s1(std::shared_ptr<const convolution> const& prim,
                                 layout const& input_layout,
@@ -94,7 +226,8 @@ bool layout_optimizer::convolution_bfyx_opt(layout const& output_layout,
     return false;
 }
 
-bool layout_optimizer::convolution_byxf_opt(layout const& output_layout,
+bool layout_optimizer::convolution_byxf_opt(const layout& input_layout,
+                                            layout const& output_layout,
                                             const layout& weights_layout,
                                             std::shared_ptr<const convolution> conv) {
     // A set of rules that define when byxf mem format has better performance
@@ -103,7 +236,7 @@ bool layout_optimizer::convolution_byxf_opt(layout const& output_layout,
          weights_layout.size.batch[0] % 64 == 0 && conv->stride.spatial[0] == 1 && conv->stride.spatial[1] == 1 &&
          conv->input_offset.spatial[0] == 0 && conv->input_offset.spatial[1] == 0) ||
         // Winograd
-        should_use_winograd_2x3_s1(conv, output_layout, weights_layout, _output_size_handling_enabled))
+        should_use_winograd_2x3_s1(conv, input_layout, weights_layout, _output_size_handling_enabled))
         return true;
 
     return false;
@@ -130,74 +263,97 @@ bool layout_optimizer::convolution_bfyx_f16_opt(layout const& input_layout,
     return false;
 }
 
+bool layout_optimizer::convolution_bfzyx_f16_opt(layout const& input_layout,
+    const layout& weights_layout,
+    std::shared_ptr<const convolution> conv) {
+    // A set of rules that define when bfzyx_f16 mem format can be used
+    if ((input_layout.format == format::bfzyx ||
+        input_layout.format == format::bfzyx_f16) &&
+        (input_layout.data_type == data_types::f32 ||
+            input_layout.data_type == data_types::f16) &&
+        input_layout.size.batch[0] == 1 &&
+        weights_layout.size.batch[0] % 16 == 0 &&
+        (input_layout.size.feature[0] / conv->split()) % 16 == 0 &&
+        conv->dilation == tensor(1) && conv->groups == 1)
+        return true;
+    return false;
+}
+
+bool layout_optimizer::deconvolution_bfzyx_f16_opt(layout const& input_layout,
+    const layout& weights_layout,
+    std::shared_ptr<const deconvolution> deconv) {
+    // A set of rules that define when bfzyx_f16 mem format can be used
+    if ((input_layout.format == format::bfzyx ||
+        input_layout.format == format::bfzyx_f16) &&
+        (input_layout.data_type == data_types::f32 ||
+            input_layout.data_type == data_types::f16) &&
+        input_layout.size.batch[0] == 1 &&
+        weights_layout.size.batch[0] % 16 == 0 && input_layout.size.feature[0] % 16 == 0 &&
+        deconv->split() == 1)
+        return true;
+    return false;
+}
+
 bool layout_optimizer::users_for_convolution_byxf_opt(program_node const& node, uint32_t depth) {
     // This function checks if byxf optimization can be applied to the required depth of node's users.
     // Setting depth to 1 will check only node's users, depth = 2 are user's users etc.
     if (depth == 0)
         return true;
 
-    bool use_byxf = false;
     for (auto& user : node.get_users()) {
         // primitives that support transitions byxf->other format and other format->byxf are valid for byxf opt
         if (user->type() == cldnn::eltwise::type_id() || user->type() == cldnn::pooling::type_id()) {
-            use_byxf = users_for_convolution_byxf_opt(*user, depth - 1);
+            if (!users_for_convolution_byxf_opt(*user, depth - 1))
+                return false;
         // convolution that is capable to use byxf and is performant is also valid for byxf opt
         } else if (user->type() == cldnn::convolution::type_id()) {
             auto conv_prim = user->as<convolution>().get_primitive();
-            if (convolution_byxf_opt(user->calc_output_layout(),
+            if (convolution_byxf_opt(node.get_output_layout(),
+                                     user->calc_output_layout(),
                                      user->get_dependency(1).get_output_layout(),
                                      conv_prim)) {
-                use_byxf = users_for_convolution_byxf_opt(*user, depth - 1);
+                if (!users_for_convolution_byxf_opt(*user, depth - 1))
+                    return false;
             } else {
-                use_byxf = false;
-                break;
+                return false;
             }
         } else {
-            use_byxf = false;
-            break;
+            return false;
         }
     }
-    return use_byxf;
+    return true;
 }
 
-bool layout_optimizer::deps_depth_in_same_format(program_node const& node, const cldnn::format format, uint32_t depth) {
+bool layout_optimizer::deps_for_convolution_byxf_opt(program_node const& node, uint32_t depth) {
     // This function checks if requested format is the same for node's users in the required depth.
     // Setting depth to 1 will check only node's dependencies, depth = 2 are dep's dependencies etc.
     if (depth == 0)
         return true;
 
-    bool same_format = false;
     for (auto& dep : node.get_dependencies()) {
         // skip data and generic_layers
-        if (dep->type() == cldnn::data::type_id() || dep->type() == cldnn::generic_layer::type_id())
+        if (dep->is_type<data>() || dep->is_type<generic_layer>())
             continue;
 
-        // if dependency is of type reorder and format is different then skip it and move to its dependency
-        // further in graph such reorders could be optimized out
-        if (dep->type() == cldnn::reorder::type_id() && dep->get_dependencies().size() == 1 &&
-            dep->get_output_layout().format != format) {
-            same_format = deps_depth_in_same_format(dep->get_dependency(0), format, depth);
-        } else if (dep->get_output_layout().format == format) {
-            // if dependency is of type reorder and format is the same, check if its users are primitives with support
-            // for different input and output formats if that is true then graph optimizer will optimize such reorder and
-            // layout for its dependency will be changed
-            if (dep->type() == cldnn::reorder::type_id() &&
-                (dep->get_dependency(0).type() == cldnn::eltwise::type_id() ||
-                 dep->get_dependency(0).type() == cldnn::pooling::type_id()) &&
-                dep->get_dependencies().size() == 1)
-                same_format = deps_depth_in_same_format(dep->get_dependency(0), format, depth - 1);
-            else
-                same_format = deps_depth_in_same_format(*dep, format, depth - 1);
-        } else {
-            same_format = false;
-            break;
+        if (dep->is_type<convolution>()) {
+            auto& conv_dep = dep->as<convolution>();
+            if (!convolution_byxf_opt(conv_dep.input().get_output_layout(),
+                                      conv_dep.get_output_layout(),
+                                      conv_dep.weights().get_output_layout(),
+                                      conv_dep.get_primitive())) {
+                return false;
+            }
+        } else if (!dep->is_type<pooling>() && !dep->is_type<eltwise>()) {
+            return false;
         }
+
+        if (!deps_for_convolution_byxf_opt(*dep, depth - 1))
+            return false;
     }
-    return same_format;
+    return true;
 }
 
 layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             data_type type,
                                              convolution_node const& node,
                                              layout const& output_or_weights_layout) {
     auto prim = node.get_primitive();
@@ -210,150 +366,85 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
         if (conv_node->get_groups() > 1) dw_prev_check = true;
     }
 
-    if (type == data_type::weights || type == data_type::bias) {
-        expected_data_type = output_or_weights_layout.data_type;
+    if (_optimization_attributes.bfzyx_f16_network &&
+         convolution_bfzyx_f16_opt(node.get_dependency(0).get_output_layout(),
+                                   output_or_weights_layout, prim)) {
+        expected_tensor = current_layout.size;
+        expected_format = cldnn::format::bfzyx_f16;
+    } else if (current_layout.format == format::bfzyx) {
+        expected_tensor = current_layout.size;
+        expected_format = cldnn::format::bfzyx;
+    } else if ((_optimization_attributes.bfyx_f16_network &&
+                convolution_bfyx_f16_opt(node.get_dependency(0).get_output_layout(), output_or_weights_layout, prim)) ||
+                node.get_dependency(0).get_output_layout().format == format::bfyx_f16) {
+        expected_tensor = current_layout.size;
+        expected_format = cldnn::format::bfyx_f16;
+    } else if (current_layout.data_type == data_types::f16 &&
+                layout_optimizer::convolution_byxf_opt(node.input().get_output_layout(), current_layout, output_or_weights_layout, prim) &&
+                (users_for_convolution_byxf_opt(node, 2) ||
+                 deps_for_convolution_byxf_opt(node, 2)) &&
+                // todo: remove this condition when yxfb optimizations will be disabled
+                current_layout.format != cldnn::format::yxfb && current_layout.size.batch[0] == 1 &&
+                prim->dilation == tensor {1} && !node.get_transposed() &&
+                !dw_prev_check && node.get_groups() == 1) {
+        expected_tensor = current_layout.size;
+        expected_format = cldnn::format::byxf;
+    } else if ((current_layout.data_type == data_types::f16 && current_layout.format == format::fs_b_yx_fsv32) ||
+                (current_layout.data_type == data_types::f16 && prim->split() == 1 && prim->groups == 1 &&
+                current_layout.size.feature[0] % 32 == 0 && current_layout.size.batch[0] != 1 &&
+                current_layout.size.batch[0] <= 16 && _optimization_attributes.splitted_convolution == 0 &&
+                _optimization_attributes.group_convolution == 0 &&
+                _optimization_attributes.deformable_convolution == 0 &&
+                _optimization_attributes.only_fsv32_layers == 1)) {
+        // fp16 32 features things
+        if (prim->split() != 1 ||
+            current_layout.size.batch[0] == 1)  // escape to bfyx format for unsupported node
+            expected_format = format::bfyx;
+        else
+            expected_format = format::fs_b_yx_fsv32;
+    } else if (current_layout.format == format::b_fs_yx_fsv4 ||
+                current_layout.format == format::os_is_yx_osv16_isv4) {
+        // imad case
+        // nothing to do, just go out from here.
+    } else if (current_layout.data_type == data_types::i8) {  // mmad case
+        expected_tensor = current_layout.size;
+        expected_format = current_layout.format;  // cldnn::format::byxf_af32;
+    } else if (layout_optimizer::convolution_bfyx_opt(current_layout, output_or_weights_layout, prim) ||
+                (_output_size_handling_enabled && prim->with_output_size) || node.get_transposed()) {
+        // commented out due to performance reasons, maybe enable in future
+        /*if (current_layout.data_type == data_types::f32 &&
+        current_layout.size.batch[0] % 16 == 0 &&
+        current_layout.format == format::bfyx &&
+        output_or_weights_layout.size.spatial[0] == 1 && output_or_weights_layout.size.spatial[1] == 1 &&
+        prim->stride.spatial[0] == 1 && prim->stride.spatial[1] == 1 &&
+        prim->input_offset.spatial[0] == 0 && prim->input_offset.spatial[1] == 0 &&
+        !node.get_transposed())
+    {
+        if (!((current_layout.size.feature[0] % 8) == 0 && (current_layout.size.spatial[0] *
+    current_layout.size.spatial[1]) == 16 && current_layout.data_padding == padding{ { 0,0,0,0 }, 0 }))
+        {
+            expected_tensor = current_layout.size.transform(cldnn::format::bf8_xy16, 1);
+            expected_format = cldnn::format::bf8_xy16;
+        }
     }
-
-    switch (type) {
-        case data_type::bias:  // convolution bias
-            expected_tensor = cldnn::tensor(1, 1, static_cast<tensor::value_type>(current_layout.count()), 1);
-            expected_format = cldnn::format::bfyx;
-            break;
-
-        case data_type::input:  // convolution input
-            if (current_layout.format == format::bfzyx) {
-                expected_tensor = current_layout.size;
+    else*/
+        {
+            expected_tensor = current_layout.size;
+            if (current_layout.format == format::bfzyx_f16)
                 expected_format = cldnn::format::bfzyx;
-            } else if (_optimization_attributes.bfyx_f16_network &&
-                       convolution_bfyx_f16_opt(current_layout, output_or_weights_layout, prim)) {
-                expected_tensor = current_layout.size;
-                expected_format = cldnn::format::bfyx_f16;
-            } else if (current_layout.data_type == data_types::f16 &&
-                       layout_optimizer::convolution_byxf_opt(current_layout, output_or_weights_layout, prim) &&
-                       (users_for_convolution_byxf_opt(node, 2) ||
-                        deps_depth_in_same_format(node, cldnn::format::byxf, 2)) &&
-                       // todo: remove this condition when yxfb optimizations will be disabled
-                       current_layout.format != cldnn::format::yxfb && current_layout.size.batch[0] == 1 &&
-                       prim->dilation == tensor {1} && !node.get_transposed() &&
-                       !dw_prev_check && node.get_groups() == 1) {
-                expected_tensor = current_layout.size;
-                expected_format = cldnn::format::byxf;
-            } else if ((current_layout.data_type == data_types::f16 && current_layout.format == format::fs_b_yx_fsv32) ||
-                     (current_layout.data_type == data_types::f16 && prim->split() == 1 && prim->groups == 1 &&
-                      current_layout.size.feature[0] % 32 == 0 && current_layout.size.batch[0] != 1 &&
-                      current_layout.size.batch[0] <= 16 && _optimization_attributes.splitted_convolution == 0 &&
-                      _optimization_attributes.group_convolution == 0 &&
-                      _optimization_attributes.deformable_convolution == 0 &&
-                      _optimization_attributes.only_fsv32_layers == 1)) {
-                // fp16 32 features things
-                if (output_or_weights_layout.size.feature[0] == 3 ||  // use bfyx -> fs_byx_fsv32 convolution
-                    prim->split() != 1 ||
-                    current_layout.size.batch[0] == 1)  // escape to bfyx format for unsupported node
-                    expected_format = format::bfyx;
-                else
-                    expected_format = format::fs_b_yx_fsv32;
-            } else if (current_layout.format == format::b_fs_yx_fsv4 ||
-                     current_layout.format == format::os_is_yx_osv16_isv4) {
-                // imad case
-                // nothing to do, just go out from here.
-            } else if (current_layout.data_type == data_types::i8) {  // mmad case
-                expected_tensor = current_layout.size;
-                expected_format = current_layout.format;  // cldnn::format::byxf_af32;
-            } else if (layout_optimizer::convolution_bfyx_opt(current_layout, output_or_weights_layout, prim) ||
-                       (_output_size_handling_enabled && prim->with_output_size) || node.get_transposed()) {
-                // commented out due to performance reasons, maybe enable in future
-                /*if (current_layout.data_type == data_types::f32 &&
-                current_layout.size.batch[0] % 16 == 0 &&
-                current_layout.format == format::bfyx &&
-                output_or_weights_layout.size.spatial[0] == 1 && output_or_weights_layout.size.spatial[1] == 1 &&
-                prim->stride.spatial[0] == 1 && prim->stride.spatial[1] == 1 &&
-                prim->input_offset.spatial[0] == 0 && prim->input_offset.spatial[1] == 0 &&
-                !node.get_transposed())
-            {
-                if (!((current_layout.size.feature[0] % 8) == 0 && (current_layout.size.spatial[0] *
-            current_layout.size.spatial[1]) == 16 && current_layout.data_padding == padding{ { 0,0,0,0 }, 0 }))
-                {
-                    expected_tensor = current_layout.size.transform(cldnn::format::bf8_xy16, 1);
-                    expected_format = cldnn::format::bf8_xy16;
-                }
-            }
-            else*/
-                {
-                    expected_tensor = current_layout.size;
-                    expected_format = cldnn::format::bfyx;
-                }
-
-            } else {
-                expected_tensor = current_layout.size;
-                expected_format = cldnn::format::yxfb;
-            }
-
-            break;
-
-        default:
-            throw std::runtime_error(
-                "Unsupported data type in layout_optimizer::get_expected_layout for convolution primitive");
-    }
-
-    return layout(expected_data_type, expected_format, expected_tensor);
-}
-
-layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             data_type type,
-                                             fully_connected_node const& node,
-                                             layout const& output_or_weights_layout) {
-    auto prim = node.get_primitive();
-    auto expected_tensor = current_layout.size;
-    auto expected_data_type = current_layout.data_type;
-    auto expected_format = current_layout.format;
-
-    if (type == data_type::weights || type == data_type::bias) {
-        expected_data_type = output_or_weights_layout.data_type;
-    }
-
-    switch (type) {
-        case data_type::bias:  // fc bias
-            expected_tensor = cldnn::tensor(1, 1, static_cast<tensor::value_type>(current_layout.count()), 1);
-            expected_format = cldnn::format::bfyx;
-            break;
-
-        default:
-            throw std::runtime_error(
-                "Unsupported data type in layout_optimizer::get_expected_layout for fully-connected primitive");
-    }
-
-    return layout(expected_data_type, expected_format, expected_tensor);
-}
-
-layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             data_type type,
-                                             lstm_gemm_node const& node,
-                                             layout const& output_or_weights_layout) {
-    auto prim = node.get_primitive();
-    auto expected_tensor = current_layout.size;
-    auto expected_data_type = current_layout.data_type;
-    auto expected_format = current_layout.format;
-
-    if (type == data_type::weights || type == data_type::bias) {
-        expected_data_type = output_or_weights_layout.data_type;
-    }
-
-    switch (type) {
-        case data_type::bias:
-            expected_tensor = cldnn::tensor(1, 1, static_cast<tensor::value_type>(current_layout.count()), 1);
-            expected_format = cldnn::format::bfyx;
-            break;
+            else
+                expected_format = cldnn::format::bfyx;
+        }
 
-        default:
-            throw std::runtime_error(
-                "Unsupported data type in layout_optimizer::get_expected_layout for fully-connected primitive");
+    } else {
+        expected_tensor = current_layout.size;
+        expected_format = cldnn::format::yxfb;
     }
 
     return layout(expected_data_type, expected_format, expected_tensor);
 }
 
 layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             data_type type,
                                              deconvolution_node const& node,
                                              layout const& output_or_weights_layout) {
     auto prim = node.get_primitive();
@@ -361,26 +452,15 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
     auto expected_data_type = current_layout.data_type;
     auto expected_format = current_layout.format;
 
-    if (type == data_type::weights || type == data_type::bias) {
-        expected_data_type = output_or_weights_layout.data_type;
-    }
-
-    switch (type) {
-        case data_type::bias:  // convolution bias
-            expected_tensor = cldnn::tensor(1, 1, static_cast<tensor::value_type>(current_layout.count()), 1);
-            expected_format = cldnn::format::bfyx;
-            break;
-
-        default:
-            throw std::runtime_error(
-                "Unsupported data type in layout_optimizer::get_expected_layout for deconvolution primitive");
+    if (_optimization_attributes.bfzyx_f16_network &&
+        deconvolution_bfzyx_f16_opt(current_layout, output_or_weights_layout, prim)) {
+        expected_tensor = current_layout.size;
+        expected_format = cldnn::format::bfzyx_f16;
     }
-
     return layout(expected_data_type, expected_format, expected_tensor);
 }
 
 layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             data_type type,
                                              detection_output_node const& node,
                                              layout const& output_or_weights_layout) {
     auto prim = node.get_primitive();
@@ -388,161 +468,20 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
     auto expected_data_type = data_types::f32;
     auto expected_format = output_or_weights_layout.format;
 
-    if (type != data_type::input)
-        CLDNN_ERROR_MESSAGE(prim->id, "detection_output only supports optimization of its output (no weights/biases)");
-
-    return layout(expected_data_type, expected_format, expected_tensor);
-}
-
-layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             data_type type,
-                                             embed_node const& node,
-                                             layout const& output_or_weights_layout) {
-    auto prim = node.get_primitive();
-    auto expected_tensor = current_layout.size;
-    auto expected_data_type = current_layout.data_type;
-    auto expected_format = current_layout.format;
-
-    if (type == data_type::weights || type == data_type::bias) {
-        expected_data_type = output_or_weights_layout.data_type;
-    }
-
-    switch (type) {
-        case data_type::bias:
-            expected_tensor = cldnn::tensor(1, 1, static_cast<tensor::value_type>(current_layout.count()), 1);
-            expected_format = cldnn::format::bfyx;
-            break;
-
-        default:
-            throw std::runtime_error(
-                "Unsupported data type in layout_optimizer::get_expected_layout for embed primitive");
-    }
-
     return layout(expected_data_type, expected_format, expected_tensor);
 }
 
 layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             data_type type,
                                              binary_convolution_node const& node,
                                              layout const& output_or_weights_layout) {
     auto prim = node.get_primitive();
     auto expected_tensor = current_layout.size;
     auto expected_data_type = data_types::bin;
-    auto expected_format = current_layout.format;
-
-    if (type == data_type::weights || type == data_type::bias) {
-        expected_data_type = output_or_weights_layout.data_type;
-    }
-
-    switch (type) {
-        case data_type::bias:  // convolution bias
-            expected_tensor = cldnn::tensor(1, 1, static_cast<tensor::value_type>(current_layout.count()), 1);
-            expected_format = cldnn::format::bfyx;
-            break;
-
-        case data_type::input:  // convolution input
-            expected_tensor = current_layout.size;
-            expected_format = cldnn::format::b_fs_yx_32fp;
-            break;
-
-        default:
-            throw std::runtime_error(
-                "Unsupported data type in layout_optimizer::get_expected_layout for binary_convolution primitive");
-    }
+    auto expected_format = cldnn::format::b_fs_yx_32fp;
 
     return layout(expected_data_type, expected_format, expected_tensor);
 }
 
-std::pair<std::shared_ptr<cldnn::reorder>, bool> layout_optimizer::create_reorder_if_needed(
-    const layout& current_layout,
-    const cldnn::primitive_id& memid,
-    layout const& expected_layout) {
-    if (current_layout != expected_layout) {
-        cache_key ckey{memid, expected_layout};
-        auto itr = _cached_reorders.find(ckey);
-        if (itr != _cached_reorders.end())
-            return std::make_pair(itr->second, true);
-
-        auto count = _cached_reorders.size();
-        std::stringstream ss;
-        ss << "reorder_" << count << "_" << memid;
-
-        auto reorder = std::make_shared<cldnn::reorder>(ss.str(), memid, expected_layout);
-        _cached_reorders[ckey] = reorder;
-        return std::make_pair(reorder, false);
-    }
-
-    return std::make_pair(nullptr, true);
-}
-
-std::pair<std::shared_ptr<cldnn::generic_layer>, bool> layout_optimizer::create_reorder_from_given_source(
-    const cldnn::primitive_id& memid,
-    layout const& expected_layout,
-    const kernel_selector::weights_reorder_params& reorder_params) {
-    cache_key ckey{memid, expected_layout};
-    auto itr = _cached_generic_layers.find(ckey);
-    if (itr != _cached_generic_layers.end())
-        return std::make_pair(itr->second, true);
-
-    auto count = _cached_generic_layers.size();
-    std::stringstream ss;
-    ss << memid << "_generic_layer_" << count;
-
-    auto reorder = std::make_shared<cldnn::generic_layer>(ss.str(), memid, expected_layout, reorder_params);
-    _cached_generic_layers[ckey] = reorder;
-    return std::make_pair(reorder, false);
-}
-
-std::vector<std::pair<std::shared_ptr<primitive>, bool>> layout_optimizer::get_generic_layer(
-    const kernel_selector::weights_reorder_params& reorder_params,
-    primitive_id input_id,
-    const layout& old_layout,
-    data_type type) {
-    if (reorder_params.engine == kernel_selector::weights_reorder_params::Engine::NONE || type != data_type::weights)
-        return {};
-
-    std::vector<std::pair<std::shared_ptr<primitive>, bool>> ret;
-
-    if (reorder_params.engine == kernel_selector::weights_reorder_params::Engine::CPU &&
-        reorder_params.cpuKernel != nullptr) {
-        const auto intermediate_format = from_weights_layout(reorder_params.cpuKernel->GetExpectedInputLayout());
-        const auto intermediate_type = from_weights_type(reorder_params.cpuKernel->GetExpectedInputType());
-        if (intermediate_format != old_layout.format || intermediate_type != old_layout.data_type) {
-            const layout intermediate_layout = {intermediate_type,
-                                                intermediate_format,
-                                                old_layout.size.transform(intermediate_format, 1)};
-
-            auto reorder = create_reorder_if_needed(old_layout, input_id, intermediate_layout);
-            if (reorder.first) {
-                ret.push_back(reorder);
-                input_id = reorder.first->id;
-            }
-        }
-    }
-
-    // TODO: Add conversion of WeightsTensor to cldnn::tensor to have not flattened shape
-    // layout expected_layout = from_weights_tensor(reorder_params.dest);
-
-    auto new_dtype = from_weights_type(reorder_params.dest.GetDType());
-    const auto bpp = data_type_traits::size_of(new_dtype);
-    tensor expected_size = {1, 1, 1, (tensor::value_type)(reorder_params.dest.PhysicalSizeInBytes() / bpp)};
-
-    bool toImageType = IsImageType(reorder_params.dest.GetLayout());
-    if (toImageType)
-        expected_size = old_layout.size;
-
-    layout expected_layout = {new_dtype,
-                              toImageType ? from_weights_layout(reorder_params.dest.GetLayout())
-                                          : format::bfyx,  // simple linear format (flatten to x channel)
-                              expected_size};
-
-    auto reorder = create_reorder_from_given_source(input_id, expected_layout, reorder_params);
-    if (reorder.first)
-        ret.push_back(reorder);
-
-    return ret;
-}
-
 void layout_optimizer::set_optimization_attribute(optimization_attributes_type attribute, int32_t val) {
     switch (attribute) {
         case optimization_attributes_type::splitted_convolution:
@@ -563,6 +502,9 @@ void layout_optimizer::set_optimization_attribute(optimization_attributes_type a
         case optimization_attributes_type::bfyx_f16_network:
             _optimization_attributes.bfyx_f16_network = val;
             break;
+        case optimization_attributes_type::bfzyx_f16_network:
+            _optimization_attributes.bfzyx_f16_network = val;
+            break;
         default:
             throw std::out_of_range("unsupported layout optimization attribute");
     }
@@ -578,8 +520,25 @@ bool layout_optimizer::is_format_optimized(const convolution_node& node, const f
             return convolution_bfyx_f16_opt(input_layout, weights_layout, prim) &&
                    // Work-around for inability to use bfyx_f16 and winograd together
                    !should_use_winograd_2x3_s1(prim, input_layout, weights_layout, _output_size_handling_enabled);
+        case format::bfzyx_f16:
+            return convolution_bfzyx_f16_opt(input_layout, weights_layout, prim);
         default:
             throw std::invalid_argument(
                 "[Layout optimizer] Other formats in is_format_optimized(...) method are not implemented!");
     }
 }
+
+bool layout_optimizer::is_format_optimized(const deconvolution_node& node, const format& format) {
+    auto input_layout = node.input().get_output_layout();
+    auto weights_layout = node.weights().get_output_layout();
+    auto prim = node.get_primitive();
+
+    switch (format) {
+    case format::bfzyx_f16:
+        return deconvolution_bfzyx_f16_opt(input_layout, weights_layout, prim);
+    default:
+        throw std::invalid_argument(
+            "[Layout optimizer] Other formats in is_format_optimized(...) method are not implemented!");
+    }
+}
+
index 61a422b..8f2ba63 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lookup_table_type_id() {
+primitive_type_id lookup_table::type_id() {
     static primitive_type_base<lookup_table> instance;
     return &instance;
 }
index 6f9e0d3..dba1fb3 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lrn_type_id() {
+primitive_type_id lrn::type_id() {
     static primitive_type_base<lrn> instance;
     return &instance;
 }
@@ -39,7 +39,7 @@ std::string lrn_inst::to_string(lrn_node const& node) {
     auto k = desc->k;
     auto alpha = desc->alpha;
     auto beta = desc->beta;
-    auto norm_region = desc->norm_region == cldnn_lrn_norm_region::cldnn_lrn_norm_region_across_channel
+    auto norm_region = desc->norm_region == lrn_norm_region::lrn_norm_region_across_channel
                            ? "across channel"
                            : "within channel";
     auto& input = node.input();
index 061ab9b..5eb99df 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lstm_type_id() {
+primitive_type_id lstm::type_id() {
     static primitive_type_base<lstm> instance;
     return &instance;
 }
index 86e0c72..657d1e2 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lstm_dynamic_type_id() {
+primitive_type_id lstm_dynamic::type_id() {
     static primitive_type_base<lstm_dynamic> instance;
     return &instance;
 }
index fc23642..4d11734 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lstm_dynamic_input_type_id() {
+primitive_type_id lstm_dynamic_input::type_id() {
     static primitive_type_base<lstm_dynamic_input> instance;
     return &instance;
 }
index 6dd38d3..32f6276 100644 (file)
@@ -23,7 +23,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lstm_dynamic_timeloop_type_id() {
+primitive_type_id lstm_dynamic_timeloop::type_id() {
     static primitive_type_base<lstm_dynamic_timeloop> instance;
     return &instance;
 }
@@ -79,7 +79,6 @@ void lstm_dynamic_timeloop_node::reverse_optional_outputs_connections() {
     }
 }
 
-
 size_t lstm_dynamic_timeloop_node::get_dependency_idx(std::string val) const {
     auto ret = get_param_list_index(val);
     CLDNN_ERROR_EQUAL(id(),
index 9981e72..2672148 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lstm_elt_type_id() {
+primitive_type_id lstm_elt::type_id() {
     static primitive_type_base<lstm_elt> instance;
     return &instance;
 }
index 15abf73..580ab10 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id lstm_gemm_type_id() {
+primitive_type_id lstm_gemm::type_id() {
     static primitive_type_base<lstm_gemm> instance;
     return &instance;
 }
index f400419..b3a3c9d 100644 (file)
@@ -23,7 +23,7 @@
 #include <memory>
 
 namespace cldnn {
-primitive_type_id max_unpooling_type_id() {
+primitive_type_id max_unpooling::type_id() {
     static primitive_type_base<max_unpooling> instance;
     return &instance;
 }
diff --git a/inference-engine/thirdparty/clDNN/src/memory.cpp b/inference-engine/thirdparty/clDNN/src/memory.cpp
new file mode 100644 (file)
index 0000000..39161c0
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "api/memory.hpp"
+#include "memory_impl.h"
+#include "engine_impl.h"
+
+namespace cldnn {
+
+memory memory::allocate(const engine& engine, const layout& layout, uint16_t stream_id) {
+    size_t size = layout.bytes_count();
+    if (size == 0)
+        throw std::invalid_argument("size should be more than 0");
+
+    return memory(engine.get()->allocate_memory(layout, stream_id).detach());
+}
+
+size_t memory::count() const {
+    return get_layout().count();
+}
+
+size_t memory::size() const {
+    return _impl->size();
+}
+
+const layout& memory::get_layout() const {
+    return _impl->get_layout();
+}
+
+int memory::get_stream_id() const {
+    return _impl->get_stream_id();
+}
+
+bool memory::is_allocated_by(const engine& engine) const {
+    return _impl->is_allocated_by(*engine.get());
+}
+
+bool memory::is_the_same_buffer(const memory& other) const {
+    if (_impl == other.get())
+        return true;
+
+    if (_impl->get_engine() != other.get()->get_engine())
+        return false;
+
+    // User memory, check te pointers
+    if (!_impl->get_engine())
+        return lock_impl() == other.lock_impl();
+
+    // Engine memory, let it decide
+    return _impl->get_engine()->is_the_same_buffer(*_impl, *other.get());
+}
+
+memory memory::attach_impl(const cldnn::layout& layout, void* ptr, uint16_t stream_id) {
+    return memory(new simple_attached_memory(layout, ptr, stream_id));
+}
+
+void* memory::lock_impl() const {
+    return _impl->lock();
+}
+
+void memory::unlock() const {
+    _impl->unlock();
+}
+
+void memory::retain() {
+    _impl->add_ref();
+}
+void memory::release() {
+    _impl->release();
+}
+
+}  // namespace cldnn
index 7798437..85d07e8 100644 (file)
@@ -32,6 +32,7 @@
 #include <string>
 #include <utility>
 #include <set>
+#include <stdexcept>
 
 namespace cldnn {
 memory_record::memory_record(memory_set users,
@@ -43,13 +44,13 @@ memory_record::memory_record(memory_set users,
 memory_impl::ptr memory_pool::alloc_memory(const layout& layout, uint16_t stream_id) {
     auto context = _engine->get_context();
     if (layout.bytes_count() > context->get_engine_info().max_alloc_mem_size) {
-        throw error("exceeded max size of memory object allocation", CLDNN_ALLOC_SIZE_EXCEEDED);
+        throw std::runtime_error("exceeded max size of memory object allocation");
     }
 
     add_memory_used(layout.bytes_count());
 
     if (_max_peak_memory_used > context->get_engine_info().max_global_mem_size) {
-        throw error("exceeded global device memory", CLDNN_GLOBAL_SIZE_EXCEEDED);
+        throw std::runtime_error("exceeded global device memory");
     }
 
     try {
@@ -66,9 +67,9 @@ memory_impl::ptr memory_pool::alloc_memory(const layout& layout, uint16_t stream
             case CL_OUT_OF_RESOURCES:
             case CL_OUT_OF_HOST_MEMORY:
             case CL_INVALID_BUFFER_SIZE:
-                throw error("out of GPU resources", CLDNN_OUT_OF_RESOURCES);
+                throw std::runtime_error("out of GPU resources");
             default:
-                throw error("GPU buffer allocation failed", CLDNN_ERROR);
+                throw std::runtime_error("GPU buffer allocation failed");
         }
     }
 }
index 53222ff..cfece38 100644 (file)
@@ -26,7 +26,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id mutable_data_type_id() {
+primitive_type_id mutable_data::type_id() {
     static primitive_type_base<mutable_data> instance;
     return &instance;
 }
@@ -46,7 +46,7 @@ memory_impl::ptr attach_or_copy_data(network_impl& network, memory_impl& mem) {
 }  // namespace
 
 mutable_data_node::typed_program_node(const std::shared_ptr<mutable_data> dprim, program_impl& prog)
-    : parent(dprim, prog), mem(api_cast(dprim->mem.get())) {
+    : parent(dprim, prog), mem(dprim->mem.get()) {
     recalc_output_layout(false);
     can_share_buffer(false);
     fill_memory();
index b264a19..cd35de5 100644 (file)
@@ -20,7 +20,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id mvn_type_id() {
+primitive_type_id mvn::type_id() {
     static primitive_type_base<mvn> instance;
     return &instance;
 }
index f1e147b..49ef8aa 100644 (file)
@@ -19,9 +19,9 @@
 #include "engine_impl.h"
 #include "event_impl.h"
 #include "program_impl.h"
-#include "api/CPP/data.hpp"
-#include "api/CPP/mutable_data.hpp"
-#include "api/CPP/input_layout.hpp"
+#include "api/data.hpp"
+#include "api/mutable_data.hpp"
+#include "api/input_layout.hpp"
 
 #include "error_handler.h"
 #include "primitive_inst.h"
@@ -36,6 +36,8 @@
 #include <vector>
 #include <memory>
 #include <set>
+#include <utility>
+#include <map>
 
 // #define DEBUG_DUMP_PATH "cldnn_dump/"
 
 #endif
 
 namespace cldnn {
+
+network::network(program const& program, uint16_t stream_id)
+    : _impl(program.get()->get_engine().allocate_network(*program.get(), stream_id).detach()) {}
+
+engine network::get_engine() const {
+    auto impl = engine_impl::ptr(&_impl->get_engine());
+    return engine(impl.detach());
+}
+
+program network::get_program() const {
+    auto impl = program_impl::cptr(&_impl->get_program());
+    return program(const_cast<program_impl*>(impl.detach()));
+}
+
+void network::set_input_data(const primitive_id& id, const memory& mem) const {
+    _impl->set_input_data(id, *mem.get());
+}
+
+void network::set_learning_rate(const float lr) {
+    _impl->set_learning_rate(lr);
+}
+
+float network::get_learning_rate() {
+    return _impl->get_learning_rate();
+}
+
+uint16_t network::get_stream_id() {
+    return _impl->get_stream_id();
+}
+
+std::string network::get_primitive_info(const primitive_id& id) const {
+    return _impl->get_primitive_info(id);
+}
+
+std::vector<primitive_info> network::get_primitives_info() {
+    return _impl->get_primitives_info();
+}
+
+std::vector<std::pair<std::string, std::vector<primitive_info>>> network::get_optimization_steps_info() {
+    return _impl->get_optimizer_passes_info();
+}
+
+std::vector<primitive_id> network::get_executed_primitive_ids() const {
+    return _impl->get_executed_primitive_ids();
+}
+
+std::vector<primitive_id> network::get_all_primitive_ids() const {
+    return _impl->get_all_primitive_ids();
+}
+
+std::vector<primitive_id> network::get_all_primitive_org_ids() const {
+    return _impl->get_all_primitive_org_ids();
+}
+
+std::vector<primitive_id> network::get_output_ids() const {
+    return _impl->get_output_ids();
+}
+
+memory network::get_output_memory(const primitive_id& output_id) const {
+    auto out_mem = memory_impl::ptr(&_impl->get_primitive(output_id)->output_memory());
+    return memory(out_mem.detach());
+}
+
+event network::get_primitive_event(const primitive_id& output_id) const {
+    auto out_event = _impl->get_primitive_event(output_id);
+    return event(out_event.detach());
+}
+
+std::map<primitive_id, network_output> network::execute(const std::vector<event>& dependencies) const {
+    std::vector<refcounted_obj_ptr<event_impl>> dep_impls(dependencies.size());
+
+    std::transform(
+        dependencies.begin(),
+        dependencies.end(),
+        dep_impls.begin(),
+        [](const event& ev) {
+            return event_impl::ptr(ev.get());
+    });
+
+    _impl->execute(dep_impls);
+
+    auto output_ids = get_output_ids();
+    std::map<primitive_id, network_output> result;
+    for (auto& id : output_ids) {
+        result.emplace(id, get_output(id));
+    }
+    return result;
+}
+
+void network::retain() {
+    _impl->add_ref();
+}
+
+void network::release() {
+    _impl->release();
+}
+
 #ifdef DEBUG_DUMP_PATH
 static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) {
 #if defined HALF_HALF_HPP
@@ -142,11 +241,13 @@ void dump<uint32_t>(memory_impl& mem, std::ofstream& file_stream) {
 
     for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) {
         for (cldnn::tensor::value_type f = 0; f < (cldnn::tensor::value_type)ceil_div(size.feature[0], 32); ++f) {
-            for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) {
-                for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) {
-                    cldnn::tensor t(cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, 0, 0));
-                    size_t input_it = mem.get_layout().get_linear_offset(t);
-                    file_stream << mem_ptr[input_it] << std::endl;
+            for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) {
+                for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) {
+                    for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) {
+                        cldnn::tensor t(cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, z, 0));
+                        size_t input_it = mem.get_layout().get_linear_offset(t);
+                        file_stream << mem_ptr[input_it] << std::endl;
+                    }
                 }
             }
         }
@@ -364,6 +465,7 @@ void network_impl::execute(const std::vector<refcounted_obj_ptr<event_impl>>& ev
         {
             log_memory_to_file(get_primitive(inst->id())->output_memory(), layer_name + "_dst_0");
         }
+
         get_engine().flush_network(_stream_id);
 #endif
     }
index ea3e751..4baf057 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id normalize_type_id() {
+primitive_type_id normalize::type_id() {
     static primitive_type_base<normalize> instance;
     return &instance;
 }
@@ -55,10 +55,10 @@ std::string normalize_inst::to_string(normalize_node const& node) {
 }
 
 normalize_inst::typed_primitive_inst(network_impl& network, normalize_node const& node) : parent(network, node) {
-    /// Scale x dimension should be 1 (if all channels have the same scale) or equal to input feature size (one scale per channel).
+    /// Scale f dimension should be 1 (if all channels have the same scale) or equal to input feature size (one scale per channel).
     auto scale_layout = node.scale().get_output_layout();
     auto scale_size = scale_layout.size;
-    auto scale_feature_size = scale_size.spatial[0];
+    auto scale_feature_size = scale_size.feature[0];
     auto input_layout = node.input().get_output_layout();
     auto input_feature_size = input_layout.size.feature[0];
 
index c779bea..9345b35 100644 (file)
 #include <vector>
 
 namespace cldnn {
-primitive_type_id one_hot_type_id() {
+primitive_type_id one_hot::type_id() {
     static primitive_type_base<one_hot> instance;
     return &instance;
 }
 
-static bool is_output_bfzyx(layout input, int32_t axis) {
+static bool is_output_bfzyx(const layout& input, int32_t axis) {
     if (input.format == format::bfzyx)
         return true;
     if (axis == 4)
index 9a60303..c40a713 100644 (file)
@@ -26,7 +26,7 @@
 
 namespace cldnn {
 
-primitive_type_id permute_type_id() {
+primitive_type_id permute::type_id() {
     static primitive_type_base<permute> instance;
     return &instance;
 }
index 51b01e8..c07b18d 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id pooling_type_id() {
+primitive_type_id pooling::type_id() {
     static primitive_type_base<pooling> instance;
     return &instance;
 }
index c4ceb89..134a1fe 100644 (file)
@@ -26,7 +26,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id prior_box_type_id() {
+primitive_type_id prior_box::type_id() {
     static primitive_type_base<prior_box> instance;
     return &instance;
 }
index 5c349aa..24496dc 100644 (file)
 #include "program_impl.h"
 #include "sliding_window_utils.h"
 
+#include "roi_pooling_inst.h"
+#include "reorg_yolo_inst.h"
+#include "eltwise_inst.h"
+#include "softmax_inst.h"
+#include "permute_inst.h"
+#include "custom_gpu_primitive_inst.h"
+#include "binary_convolution_inst.h"
+#include "upsampling_inst.h"
+#include "reshape_inst.h"
+#include "activation_inst.h"
+#include "scale_inst.h"
 #include "convolution_inst.h"
 #include "concatenation_inst.h"
 #include "crop_inst.h"
 #include <map>
 #include <utility>
 #include <set>
+#include <stdexcept>
+
+program::program(engine const& engine, topology const& topology, build_options const& options)
+    : _impl(engine.get()->build_program(*topology.get(), options).detach()) {}
+
+void program::retain() {
+    _impl->add_ref();
+}
+
+void program::release() {
+    _impl->release();
+}
 
 program_impl::program_impl(engine_impl& engine_ref,
                            topology_impl const& topology,
@@ -228,12 +251,12 @@ void program_impl::prepare_nodes(std::set<std::shared_ptr<program_node>> const&
     for (const auto& node : nodes_map) {
         auto node_ptr = node.second;
         if (node_ptr == nullptr)
-            throw error("NULL pointer in nodes_map.", CLDNN_ERROR);
+            throw std::runtime_error("NULL pointer in nodes_map.");
         // ToDo: avoid O(n^2) run time here (pass map instead of set?)
         bool found = false;
         for (const auto& src_node : nodes) {
             if (src_node == nullptr)
-                throw error("NULL pointer in nodes_map.", CLDNN_ERROR);
+                throw std::runtime_error("NULL pointer in nodes_map.");
             if (node.first == src_node->get_primitive()->id) {
                 copy_node_dependencies(node_ptr.get(), src_node.get());
                 found = true;
@@ -258,7 +281,7 @@ void program_impl::prepare_nodes(topology_impl const& topology) {
     for (const auto& node : nodes_map) {
         auto node_ptr = node.second.get();
         if (node_ptr == nullptr)
-            throw error("NULL pointer in nodes_map.", CLDNN_ERROR);
+            throw std::runtime_error("NULL pointer in nodes_map.");
         add_node_dependencies(node_ptr);
         if (node_ptr->dependencies.size() == 0) {
             inputs.push_back(node_ptr);
@@ -365,13 +388,18 @@ void program_impl::pre_optimize_graph(bool is_internal) {
     }
 
     layout_optimizer lo(output_size_handling_enabled);
+    set_layout_optimizer_attributes(lo);
+
+    reorder_factory rf;
     if (options.get<build_option_type::optimize_data>()->enabled()) {
+        apply_opt_pass<prepare_quantization>();
+
         apply_opt_pass<prepare_primitive_fusing>(lo);
 
-        apply_opt_pass<reorder_inputs>(lo);
+        apply_opt_pass<reorder_inputs>(lo, rf);
 
-        // this code should be moved to post compilation after kernel selector will support handling reorder bias
-        apply_opt_pass<pre_optimize_bias>(lo);
+        // TODO this code should be moved to post compilation after kernel selector will support handling reorder bias
+        apply_opt_pass<pre_optimize_bias>(rf);
 
         // passes regarding conv + eltwise optimizations
 
@@ -388,7 +416,7 @@ void program_impl::pre_optimize_graph(bool is_internal) {
 
     apply_opt_pass<prepare_padding>(output_size_handling_enabled);
 
-    apply_opt_pass<remove_redundant_reorders>(lo.get_optimization_attributes().bfyx_f16_network);
+    apply_opt_pass<remove_redundant_reorders>(lo, options.get<build_option_type::optimize_data>()->enabled());
 
     if (options.get<build_option_type::optimize_data>()->enabled()) {
         // Fuse conv + eltw after padding preparations
@@ -404,8 +432,6 @@ void program_impl::pre_optimize_graph(bool is_internal) {
         apply_opt_pass<propagate_constants>();
     }
 
-    apply_opt_pass<prepare_binarization>();
-
     // try to fuse buffers (i.e. depth_concat in bfyx format) after padding calculations
     if (options.get<build_option_type::optimize_data>()->enabled()) {
         apply_opt_pass<prepare_buffer_fusing>();
@@ -419,10 +445,11 @@ void program_impl::post_optimize_graph(bool is_internal) {
     // input reorder for fully connected if necessary
     apply_opt_pass<post_input_reorder>();
 
+    reorder_factory rf;
     layout_optimizer lo;
-    apply_opt_pass<post_optimize_weights>(lo);
+    apply_opt_pass<post_optimize_weights>(rf);
 
-    apply_opt_pass<remove_redundant_reorders>();  // TODO: do we need it at this place also?
+    apply_opt_pass<remove_redundant_reorders>(lo, false, true);  // TODO: do we need it at this place also?
 
     if (!is_internal) {
         // ToDo remove hidden dependencies from propagate_constants pass
@@ -748,9 +775,8 @@ void program_impl::add_intermediate(program_node& node,
         }
     }
     if (!node_found) {
-        throw error("Trying to add intermediate node in between " + next.id() + " and dependecy " + prev.id() +
-                        " but they are not connected in this way.",
-                    CLDNN_ERROR);
+        throw std::runtime_error("Trying to add intermediate node in between " + next.id() + " and dependecy " + prev.id() +
+                        " but they are not connected in this way.");
     }
     add_intermediate(node, next, idx, connect_int_node_with_old_dep, move_usrs_of_prev_to_node);
 }
@@ -810,10 +836,10 @@ void program_impl::swap_names(program_node& node1, program_node& node2) {
 
 void program_impl::replace_all_usages(program_node& old_node, program_node& new_node) {
     auto itr = old_node.users.begin();
-    bool end = (itr == old_node.users.end());
-    while (!end) {
+    auto cnt = old_node.users.size();
+    while (cnt != 0) {
+        cnt--;
         auto& usage = (*itr++);
-        end = (itr == old_node.users.end());
         usage->replace_dependency(old_node, new_node);
     }
 }
@@ -927,6 +953,53 @@ bool program_impl::extract_and_remove(program_node& node) {
     return true;
 }
 
+void program_impl::fuse_nodes(program_node &fused_node, program_node &peer_node) {
+    auto peer_layout = peer_node.get_output_layout();
+    fused_primitive_desc local_desc;
+    local_desc.prim = peer_node.get_primitive();
+    local_desc.dep_start_idx = fused_node.get_dependencies().size();
+    local_desc.output_layout = peer_layout;
+    local_desc.activation = activation_func::none;
+    if (!peer_node.get_fused_activations_funcs().empty()) {
+        if (peer_node.get_fused_activations_funcs().size() > 1)
+            CLDNN_ERROR_MESSAGE(peer_node.id(), "Fused primitive descriptor doesn't support > 1 activation functions in a peer node");
+
+        local_desc.activation = peer_node.get_fused_activations_funcs()[0];
+        local_desc.activation_params = peer_node.get_fused_activations_params()[0];
+    }
+
+    cldnn::padding needed_padding = padding::max(peer_layout.data_padding,
+                                                 fused_node.get_output_layout().data_padding);
+
+    // Add new dependencies to the fused_node
+    for (size_t i = 0; i < peer_node.get_dependencies().size(); i++) {
+        auto& dep = peer_node.get_dependency(i);
+        if (dep.id() == fused_node.id())
+            continue;
+        fused_node.dependencies.push_back(&dep);
+        local_desc.deps.push_back(dep.id());
+        dep.users.push_back(&fused_node);
+    }
+    fused_node.add_fused_primitive(local_desc);
+    // This shouldn't happen, but who knows...
+    if (peer_node.has_fused_primitives()) {
+        fused_node.add_fused_primitives(peer_node.get_fused_primitives());
+    }
+    add_optimized_primitive_info(peer_node.id(), { fused_node.id() });
+
+    // Remove all edges connected with peer node
+    while (peer_node.get_dependencies().size() > 0) {
+        auto& dep = peer_node.get_dependency(peer_node.get_dependencies().size() - 1);
+        remove_connection(dep, peer_node);
+    }
+    replace_all_usages(peer_node, fused_node);
+
+    // Update output layout. Recalculation is not needed.
+    fused_node.merge_output_padding(needed_padding);
+    fused_node.set_output_layout(peer_layout, false);
+    fused_node.recalc_output_layout(true);
+}
+
 void program_impl::remove_nodes(std::list<program_node*>& to_remove) {
     for (auto const& node : to_remove) {
         if (node->is_input()) {
@@ -968,17 +1041,13 @@ void program_impl::dump_program(const char* stage,
                                 bool with_full_info,
                                 std::function<bool(program_node const&)> const& filter) const {
     std::string path = get_dir_path(options);
-    if (path.empty()) {
+    if (path.empty() || !with_full_info) {
         return;
     }
 
     std::ofstream graph(path + "cldnn_program_" + std::to_string(prog_id) + "_" + stage + ".graph");
     dump_graph_init(graph, *this, filter);
 
-    if (!with_full_info) {
-        return;
-    }
-
     graph.open(path + "cldnn_program_" + std::to_string(prog_id) + "_" + stage + ".info");
     dump_graph_info(graph, *this, filter);
 
@@ -1042,3 +1111,105 @@ const program_impl::graph_optimizer_info& program_impl::get_optimizer_passes_inf
 const program_impl::primitives_info& program_impl::get_primitives_info() const { return prim_info; }
 
 void program_impl::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); }
+
+void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
+    // first pass to set layout optimization_attributes for topology
+    bool can_use_fsv32 = true;
+    bool can_use_f16 = true;
+    size_t total_conv_layers = 0;
+    size_t total_dw_conv_layers = 0;
+    size_t total_grouped_conv_layers = 0;
+    size_t opt_conv_layers_bfyx_f16 = 0;
+    size_t opt_conv_layers_bfzyx_f16 = 0;
+    size_t opt_deconv_layers_bfzyx_f16 = 0;
+
+    for (auto& node : get_processing_order()) {
+        auto& prim = *node;
+        if (prim.type() == cldnn::convolution::type_id()) {
+            if (prim.as<convolution>().get_primitive()->split() > 1)
+                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::splitted_convolution, 1);
+
+            if (prim.as<convolution>().get_primitive()->groups > 1)
+                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::group_convolution, 1);
+
+            if (prim.as<convolution>().get_primitive()->deformable_mode)
+                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::deformable_convolution, 1);
+
+            uint32_t ifm = static_cast<uint32_t>(node->get_dependency(0).get_output_layout().size.feature[0]);
+            if (prim.as<convolution>().get_primitive()->groups == ifm)
+                total_dw_conv_layers++;
+            else if (prim.as<convolution>().get_primitive()->groups > 1 || prim.as<convolution>().get_primitive()->split() > 1)
+                total_grouped_conv_layers++;
+
+            if (lo.is_format_optimized(prim.as<convolution>(), format::bfyx_f16))
+                opt_conv_layers_bfyx_f16++;
+
+            if (lo.is_format_optimized(prim.as<convolution>(), format::bfzyx_f16))
+                opt_conv_layers_bfzyx_f16++;
+
+            total_conv_layers++;
+        }
+        if (prim.type() == cldnn::deconvolution::type_id()) {
+            if (lo.is_format_optimized(prim.as<deconvolution>(), format::bfzyx_f16))
+                opt_deconv_layers_bfzyx_f16 += 1;
+        }
+
+        // list of layers that do not support yxfb or perform worse than bfyx
+        if (prim.type() == cldnn::detection_output::type_id() || prim.type() == cldnn::proposal::type_id() ||
+            prim.type() == cldnn::roi_pooling::type_id() || prim.type() == cldnn::deconvolution::type_id() ||
+            prim.type() == cldnn::upsampling::type_id() || prim.type() == cldnn::reorg_yolo::type_id())
+            lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfyx_only_layer, 1);
+
+        // Check if all layers in topology support fs_byx_fsv32 format
+        if (prim.is_in_data_flow() &&
+            prim.type() != cldnn::convolution::type_id() &&
+            prim.type() != cldnn::pooling::type_id() &&
+            prim.type() != cldnn::eltwise::type_id() &&
+            prim.type() != cldnn::fully_connected::type_id() &&
+            prim.type() != cldnn::reorder::type_id() &&
+            prim.type() != cldnn::permute::type_id() &&
+            prim.type() != cldnn::reshape::type_id() &&
+            prim.type() != cldnn::input_layout::type_id() &&
+            prim.type() != cldnn::activation::type_id() &&
+            prim.type() != cldnn::softmax::type_id()) {
+            can_use_fsv32 = false;
+        }
+
+        if (prim.is_in_data_flow() &&
+            prim.type() != cldnn::convolution::type_id() &&
+            prim.type() != cldnn::activation::type_id() &&
+            prim.type() != cldnn::pooling::type_id() &&
+            prim.type() != cldnn::eltwise::type_id() &&
+            prim.type() != cldnn::permute::type_id() &&
+            prim.type() != cldnn::reshape::type_id() &&
+            prim.type() != cldnn::detection_output::type_id() &&
+            prim.type() != cldnn::binary_convolution::type_id() &&
+            prim.type() != cldnn::quantize::type_id() &&
+            prim.type() != cldnn::custom_gpu_primitive::type_id() &&
+            prim.type() != cldnn::concatenation::type_id() &&
+            prim.type() != cldnn::fully_connected::type_id() &&
+            prim.type() != cldnn::reorder::type_id() &&
+            prim.type() != cldnn::input_layout::type_id() &&
+            prim.type() != cldnn::softmax::type_id() &&
+            prim.type() != cldnn::prior_box::type_id() &&
+            prim.type() != cldnn::scale::type_id())
+            can_use_f16 = false;
+    }
+
+    // Due to fact that single winograd convolution is faster than bfyx_f16 and
+    // using them together leads do redundant reorders, whole topology switch
+    // will be performed if at least half of layers can use bfyx_f16.
+    bool should_use_bfyx_f16_conv = can_use_f16 &&
+                                    ((opt_conv_layers_bfyx_f16 / static_cast<float>(total_conv_layers)) > 0.5f) &&
+                                    total_conv_layers > 11 &&
+                                    total_grouped_conv_layers == 0;  // conv with groups are not supported correctly yet
+
+    if (can_use_fsv32)
+        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::only_fsv32_layers, 1);
+
+    if (should_use_bfyx_f16_conv)
+        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfyx_f16_network, 1);
+
+    if (opt_conv_layers_bfzyx_f16 >= 1 || opt_deconv_layers_bfzyx_f16 >= 1)
+        lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfzyx_f16_network, 1);
+}
index 07d1bb2..a765fa1 100644 (file)
@@ -26,7 +26,7 @@ namespace cldnn {
 // helper function for merging the weights/biases buffers on cpu side for depthwise separable convolution optimization
 void program_helpers::merge_buffers(engine_impl& engine,
                                     program_node& node,
-                                    layout target_layout,
+                                    const layout& target_layout,
                                     size_t begin_offset,
                                     size_t end_offset) {
     memory_impl::ptr data_to_allocate = engine.allocate_memory(target_layout, 0);
@@ -87,17 +87,19 @@ std::pair<bool, bool> program_helpers::are_layouts_identical(layout const& l1, l
         (l1.format == format::fs_b_yx_fsv32 && l2.format != format::fs_b_yx_fsv32) ||
         (l2.format == format::fs_b_yx_fsv32 && l1.format != format::fs_b_yx_fsv32) ||
         (l1.format == format::bfyx_f16 && l2.format != format::bfyx_f16) ||
-        (l2.format == format::bfyx_f16 && l1.format != format::bfyx_f16))
+        (l2.format == format::bfyx_f16 && l1.format != format::bfyx_f16) ||
+        (l1.format == format::bfzyx_f16 && l2.format != format::bfzyx_f16) ||
+        (l2.format == format::bfzyx_f16 && l1.format != format::bfzyx_f16))
         return {false, false};
 
     auto l1_pitch = l1.get_pitches();
     auto l2_pitch = l2.get_pitches();
 
     // ignore pitches which will never be used (for dims with size == 1)
-    for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
+    for (size_t i = 0; i < tensor_dim_max; ++i)
         if (l1.size.raw[i] == 1)
             l1_pitch.raw[i] = 0;
-    for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
+    for (size_t i = 0; i < tensor_dim_max; ++i)
         if (l2.size.raw[i] == 1)
             l2_pitch.raw[i] = 0;
 
index 1805d83..28e551f 100644 (file)
@@ -103,6 +103,23 @@ std::unique_ptr<json_composite> program_node::desc_to_json() const {
     node_info->add("in data flow", bool_to_str(data_flow));
     node_info->add("output", bool_to_str(output));
 
+
+    json_composite fused_nodes_info;
+    size_t index = 0;
+    for (auto& fused_desc : get_fused_primitives()) {
+        json_composite fused_node_info;
+        fused_node_info.add("id", fused_desc.prim->id);
+        fused_node_info.add("dependencies", fused_desc.deps);
+        fused_node_info.add("dep start_idx", fused_desc.dep_start_idx);
+        json_composite output_layout_info;
+        output_layout_info.add("data type", dt_to_str(fused_desc.output_layout.data_type));
+        output_layout_info.add("format", fmt_to_str(output_layout.format));
+        output_layout_info.add("size", output_layout.size.to_string());
+        fused_node_info.add("output layout", output_layout_info);
+        fused_nodes_info.add("fused primitive idx " + std::to_string(index++), fused_node_info);
+    }
+    node_info->add("fused primitives", fused_nodes_info);
+
     std::vector<std::string> deps_ptrs;
     {
         bool empty = true;
@@ -165,7 +182,9 @@ bool program_node::is_detached(bool whole_branch) {
     return true;
 }
 
-layout program_node::calc_output_layout() const { return type()->calc_output_layout(*this); }
+layout program_node::calc_output_layout() const {
+    return type()->calc_output_layout(*this);
+}
 
 layout program_node::get_output_layout(bool invalidate_users_if_changed) {
     if (valid_output_layout)
@@ -189,7 +208,7 @@ layout program_node::get_non_padded_output_layout(bool invalidate_users_if_chang
     return result;
 }
 
-bool program_node::set_output_layout(layout new_layout, bool invalidate_users_if_changed) {
+bool program_node::set_output_layout(layout& new_layout, bool invalidate_users_if_changed) {
     merge_output_padding(new_layout.data_padding);
     new_layout.data_padding = output_layout.data_padding;
     bool changed = (new_layout != output_layout);
@@ -202,7 +221,8 @@ bool program_node::set_output_layout(layout new_layout, bool invalidate_users_if
 }
 
 bool program_node::recalc_output_layout(bool invalidate_users_if_changed) {
-    return set_output_layout(calc_output_layout(), invalidate_users_if_changed);
+    auto output_layout = calc_output_layout();
+    return set_output_layout(output_layout, invalidate_users_if_changed);
 }
 
 bool program_node::has_padded_dependency() {
index d7cd57a..b30f554 100644 (file)
@@ -32,7 +32,7 @@ static void generate_anchors(unsigned base_size,
                              bool shift_anchors,
                              bool round_ratios);
 
-primitive_type_id proposal_type_id() {
+primitive_type_id proposal::type_id() {
     static primitive_type_base<proposal> instance;
     return &instance;
 }
index 471128a..fe4fdd8 100644 (file)
@@ -20,7 +20,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id pyramid_roi_align_type_id() {
+primitive_type_id pyramid_roi_align::type_id() {
     static primitive_type_base<pyramid_roi_align> instance;
     return &instance;
 }
index 47d5ba2..c320168 100644 (file)
@@ -24,7 +24,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id quantize_type_id() {
+primitive_type_id quantize::type_id() {
     static primitive_type_base<quantize> instance;
     return &instance;
 }
@@ -33,16 +33,16 @@ layout quantize_inst::calc_output_layout(quantize_node const& node) {
     auto desc = node.get_primitive();
 
     auto input_layout = node.input().get_output_layout();
-    auto input_format = input_layout.format;
+    auto output_format = input_layout.format;
+    auto out_dt = input_layout.data_type;
+    if (node.get_primitive()->output_data_type)
+        out_dt = *node.get_primitive()->output_data_type;
 
-    bool is_packed_binarization = desc->levels == 2 &&
-                                  node.get_users().size() == 1 &&
-                                  node.get_users().front()->is_type<binary_convolution>();
+    if (out_dt == data_types::bin) {
+        output_format = format::b_fs_yx_32fp;
+    }
 
-    if (is_packed_binarization)
-        return layout{data_types::bin, format::b_fs_yx_32fp, input_layout.size};
-    else
-        return layout{input_layout.data_type, input_format, input_layout.size};
+    return layout{out_dt, output_format, input_layout.size};
 }
 
 std::string quantize_inst::to_string(quantize_node const& node) {
index 640f227..d650948 100644 (file)
@@ -24,7 +24,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id reduce_type_id() {
+primitive_type_id reduce::type_id() {
     static primitive_type_base<reduce> instance;
     return &instance;
 }
index d6e2735..c8b347d 100644 (file)
@@ -20,7 +20,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id region_yolo_type_id() {
+primitive_type_id region_yolo::type_id() {
     static primitive_type_base<region_yolo> instance;
     return &instance;
 }
index cbdee7d..3c9e9ec 100644 (file)
@@ -25,7 +25,7 @@
 
 namespace cldnn {
 
-primitive_type_id reorder_type_id() {
+primitive_type_id reorder::type_id() {
     static primitive_type_base<reorder> instance;
     return &instance;
 }
@@ -38,6 +38,10 @@ layout reorder_inst::calc_output_layout(reorder_node const& node) {
     auto ofmt = node.get_primitive()->output_format;
     auto op = node.get_primitive()->output_padding;
 
+    if (ofmt == format::any) {
+        ofmt = ifmt;
+    }
+
     if (ofmt.is_winograd() && ifmt.is_winograd()) {
         if (ofmt == ifmt)
             return layout(odt, ofmt, input_layout.size, op);
@@ -151,7 +155,7 @@ layout reorder_inst::calc_output_layout(reorder_node const& node) {
     }
 
     if (ofmt == format::bs_xs_xsv8_bsv8 || ofmt == format::bs_xs_xsv8_bsv16 || ofmt == format::bs_x_bsv16 ||
-        ofmt == format::bfzyx || ifmt == format::bfzyx) {
+        ofmt == format::bfzyx || ifmt == format::bfzyx || ofmt == format::bfzyx_f16 || ifmt == format::bfzyx_f16) {
         return layout(odt, ofmt, input_layout.size.transform(ofmt, 1), op);
     } else if (ofmt != ifmt && (ofmt == format::bfwzyx || ifmt == format::bfwzyx)) {
         // TODO Shouldn't transform be called every time ifmt != ofmt?
index 9c07234..bd28aba 100644 (file)
@@ -20,7 +20,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id reorg_yolo_type_id() {
+primitive_type_id reorg_yolo::type_id() {
     static primitive_type_base<reorg_yolo> instance;
     return &instance;
 }
index ea43f42..a7e07f4 100644 (file)
@@ -24,7 +24,7 @@
 
 namespace cldnn {
 
-primitive_type_id reshape_type_id() {
+primitive_type_id reshape::type_id() {
     static primitive_type_base<reshape> instance;
     return &instance;
 }
index afa6bdf..df6c68f 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id reverse_sequence_type_id() {
+primitive_type_id reverse_sequence::type_id() {
     static primitive_type_base<reverse_sequence> instance;
     return &instance;
 }
index 3967e0f..6cbaf40 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id roi_pooling_type_id() {
+primitive_type_id roi_pooling::type_id() {
     static primitive_type_base<roi_pooling> instance;
     return &instance;
 }
index eef446d..ab62bf8 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id scale_type_id() {
+primitive_type_id scale::type_id() {
     static primitive_type_base<scale> instance;
     return &instance;
 }
index 3620b55..471e526 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id scale_grad_input_type_id() {
+primitive_type_id scale_grad_input::type_id() {
     static primitive_type_base<scale_grad_input> instance;
     return &instance;
 }
index 4a65ba5..e906335 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id scale_grad_weights_type_id() {
+primitive_type_id scale_grad_weights::type_id() {
     static primitive_type_base<scale_grad_weights> instance;
     return &instance;
 }
index 14032c0..4264eef 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id select_type_id() {
+primitive_type_id select::type_id() {
     static primitive_type_base<select> instance;
     return &instance;
 }
index 9d42563..e01b730 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id shuffle_channels_type_id() {
+primitive_type_id shuffle_channels::type_id() {
     static primitive_type_base<shuffle_channels> instance;
     return &instance;
 }
index eccebd2..36e8205 100644 (file)
@@ -20,7 +20,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id softmax_type_id() {
+primitive_type_id softmax::type_id() {
     static primitive_type_base<softmax> instance;
     return &instance;
 }
index d19b0cc..13e6b1a 100644 (file)
@@ -20,7 +20,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id softmax_loss_grad_type_id() {
+primitive_type_id softmax_loss_grad::type_id() {
     static primitive_type_base<softmax_loss_grad> instance;
     return &instance;
 }
index 1562d97..5ffd7bd 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id split_type_id() {
+primitive_type_id split::type_id() {
     static primitive_type_base<split> instance;
     return &instance;
 }
index d63d794..339e0fa 100644 (file)
@@ -23,7 +23,7 @@
 #include <vector>
 
 namespace cldnn {
-primitive_type_id strided_slice_type_id() {
+primitive_type_id strided_slice::type_id() {
     static primitive_type_base<strided_slice> instance;
     return &instance;
 }
index 5f9772f..48c15e3 100644 (file)
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id tile_type_id() {
+primitive_type_id tile::type_id() {
     static primitive_type_base<tile> instance;
     return &instance;
 }
diff --git a/inference-engine/thirdparty/clDNN/src/topology.cpp b/inference-engine/thirdparty/clDNN/src/topology.cpp
new file mode 100644 (file)
index 0000000..3c1d6e3
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "api/topology.hpp"
+#include "topology_impl.h"
+#include <vector>
+#include <memory>
+
+namespace cldnn {
+
+topology::topology() : _impl(new topology_impl()) {}
+
+const std::vector<primitive_id> topology::get_primitive_ids() const {
+    return _impl->get_primitives_id();
+}
+
+void topology::change_input_layout(primitive_id id, const layout& new_layout) {
+    if (new_layout.format < format::any || new_layout.format >= format::format_num)
+        throw std::invalid_argument("Unknown format of layout.");
+
+    if (new_layout.data_type != data_types::f16 && new_layout.data_type != data_types::f32 &&
+        new_layout.data_type != data_types::i8 && new_layout.data_type != data_types::bin &&
+        new_layout.data_type != data_types::u8 && new_layout.data_type != data_types::i32 &&
+        new_layout.data_type != data_types::i64)
+        throw std::invalid_argument("Unknown data_type of layout.");
+
+    _impl->change_input_layout(id, new_layout);
+}
+
+void topology::add_primitive(std::shared_ptr<primitive> desc) {
+    _impl->add(desc);
+}
+
+void topology::retain() {
+    _impl->add_ref();
+}
+
+void topology::release() {
+    _impl->release();
+}
+
+}  // namespace cldnn
index 405b0c6..2521f8e 100644 (file)
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id upsampling_type_id() {
+primitive_type_id upsampling::type_id() {
     static primitive_type_base<upsampling> instance;
     return &instance;
 }
@@ -31,14 +31,13 @@ layout upsampling_inst::calc_output_layout(upsampling_node const& node) {
            "Output data type forcing is not supported for upsampling_node!");
     auto desc = node.get_primitive();
     auto input_layout = node.input().get_output_layout();
-    auto scale = desc->scale;
 
-    auto result_sizes = tensor(input_layout.size.batch[0],
-                               input_layout.size.feature[0],
-                               static_cast<size_t>(input_layout.size.spatial[0] * scale),
-                               static_cast<size_t>(input_layout.size.spatial[1] * scale));
-    auto result = layout({input_layout.data_type, input_layout.format, result_sizes});
+    auto result_sizes = desc->output_size;
+
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input batch size", input_layout.size.batch[0], "output batch size", result_sizes.batch[0], "");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input feature size", input_layout.size.feature[0], "output feature size", result_sizes.feature[0], "");
 
+    auto result = layout({input_layout.data_type, input_layout.format, result_sizes});
     return result;
 }
 
@@ -62,7 +61,7 @@ std::string upsampling_inst::to_string(upsampling_node const& node) {
 
     primitive_description << "id: " << desc->id << ", type: upsampling"
                           << "\n\tinput_1: " << input_1.id() << ", count: " << input_1.get_output_layout().count()
-                          << ",  size: " << input_1.get_output_layout().size << "\n\tscale: " << desc->scale
+                          << ",  size: " << input_1.get_output_layout().size
                           << "\n\tnum_filter: " << desc->num_filter << "\n\tsample_type: " << str_type
                           << "\n\twith activation: " << activation << ", slope: " << desc->activation_negative_slope
                           << "\n\toutput padding lower size: " << desc->output_padding.lower_size()
index 7f906cd..63ee610 100644 (file)
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # ========================================= Name / Output settings =====================================
 
 set(CLDNN_BUILD__PROJ             "clDNN_unit_tests")
@@ -126,7 +125,6 @@ add_executable("${CLDNN_BUILD__PROJ}"
 set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}")
 set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME   "${CLDNN_BUILD__PROJ_OUTPUT_NAME}")
 
-
 # Set library dependencies
 target_link_libraries("${CLDNN_BUILD__PROJ}"
     "${CLDNN_BUILD__PROJ__clDNN}"
@@ -137,7 +135,7 @@ if(WIN32)
 elseif((NOT ANDROID) AND (UNIX))
   target_link_libraries("${CLDNN_BUILD__PROJ}" pthread)
 endif()
-target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES} OpenCL)
+target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES})
 
 # =================================== Custom pre- and post-steps =======================================
 
index c7509ff..06a7ece 100644 (file)
 // limitations under the License.
 */
 
-
-
 #include <gtest/gtest.h>
-#include "api/CPP/engine.hpp"
+#include "api/engine.hpp"
 #include "test_utils/test_utils.h"
-#include "api/CPP/input_layout.hpp"
-#include "api/CPP/network.hpp"
+#include "api/input_layout.hpp"
+#include "api/network.hpp"
 
 using namespace tests;
 using namespace cldnn;
@@ -37,12 +35,12 @@ TEST(events_pool, DISABLED_basic_test)
 
     topology topology;
     topology.add(input_layout("input", { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}}));
-    topology.add(activation("relu", "input", activation_relu));
-    topology.add(activation("relu1", "relu", activation_relu));
-    topology.add(activation("relu2", "relu1", activation_relu));
-    topology.add(activation("relu3", "relu2", activation_relu));
-    topology.add(activation("relu4", "relu3", activation_relu));
-    topology.add(activation("relu5", "relu4", activation_relu));
+    topology.add(activation("relu", "input", activation_func::relu));
+    topology.add(activation("relu1", "relu", activation_func::relu));
+    topology.add(activation("relu2", "relu1", activation_func::relu));
+    topology.add(activation("relu3", "relu2", activation_func::relu));
+    topology.add(activation("relu4", "relu3", activation_func::relu));
+    topology.add(activation("relu5", "relu4", activation_func::relu));
 
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
index 1793d9d..2b6d9d6 100644 (file)
 */
 
 #include <gtest/gtest.h>
-#include "api/CPP/engine.hpp"
+#include "api/engine.hpp"
 #include "test_utils/test_utils.h"
-#include "api/CPP/network.hpp"
-#include "api/CPP/topology.hpp"
-#include "api/CPP/input_layout.hpp"
-#include "api/CPP/activation.hpp"
-#include "api/C/input_layout.h"
-#include "api/C/activation.h"
-#include "api/C/cldnn.h"
+#include "api/network.hpp"
+#include "api/topology.hpp"
+#include "api/input_layout.hpp"
+#include "api/activation.hpp"
+#include "api/cldnn.hpp"
 
 #include "test_utils.h"
 
@@ -123,7 +121,7 @@ TEST(gpu_engine, user_context)
     auto input_mem = cldnn::memory::allocate(engine, inp_lay);
     tests::set_values<float>(input_mem, { 1.0f, 2.0f, 3.0f, 4.0f });
     auto inp = input_layout("input", inp_lay);
-    auto activ = activation("this_needs_queue", "input", cldnn_activation_func::activation_abs);
+    auto activ = activation("this_needs_queue", "input", activation_func::abs);
     topo.add(inp, activ);
     network net(engine, topo);
 
index 200936c..0867a79 100644 (file)
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #include <gtest/gtest.h>
 
 #include "test_utils/uniform_quantized_real_distribution.hpp"
@@ -28,7 +27,6 @@
 #include <type_traits>
 #include <utility>
 
-
 namespace cldnn { namespace tests {
 
 template <typename RealType>
@@ -47,28 +45,23 @@ protected:
     /// @brief Expected result_type of uniform_quantized_real_distribution.
     using expected_uqr_dist_rt = typename std::conditional<!std::is_same<RealType, void>::value, RealType, float>::type;
 
-
     void SetUp() override {}
 
     void TearDown() override {}
 };
 
-
 using uniform_quantized_real_distribution_test_types = ::testing::Types<void, float, double, long double>;
 TYPED_TEST_CASE(uniform_quantized_real_distribution_test, uniform_quantized_real_distribution_test_types);
 
-
 TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_default)
 {
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(0);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(1);
     const unsigned expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 1U;
 
-
     uqr_dist_param dist_param_instance1;
     using actual_uqr_dist_rt = typename decltype(dist_param_instance1)::distribution_type::result_type;
 
@@ -83,7 +76,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     // Any
     auto expected_a       = static_cast<expected_uqr_dist_rt>(-130);
     auto expected_b       = static_cast<expected_uqr_dist_rt>(244);
@@ -95,7 +87,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
     EXPECT_EQ(dist_param_instance1.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance1.significand_rand_bits(), expected_srb);
 
-
     // Zero
     expected_a   = static_cast<expected_uqr_dist_rt>(57);
     expected_b   = static_cast<expected_uqr_dist_rt>(73);
@@ -107,7 +98,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
     EXPECT_EQ(dist_param_instance2.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance2.significand_rand_bits(), expected_srb);
 
-
     // Almost Maximum
     expected_a   = static_cast<expected_uqr_dist_rt>(-65);
     expected_b   = static_cast<expected_uqr_dist_rt>(-45);
@@ -119,7 +109,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
     EXPECT_EQ(dist_param_instance3.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance3.significand_rand_bits(), expected_srb);
 
-
     // Maximum
     expected_a   = static_cast<expected_uqr_dist_rt>(0);
     expected_b   = static_cast<expected_uqr_dist_rt>(0);
@@ -131,7 +120,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
     EXPECT_EQ(dist_param_instance4.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance4.significand_rand_bits(), expected_srb);
 
-
     // Over Maximum
     expected_a   = static_cast<expected_uqr_dist_rt>(-4);
     expected_b   = static_cast<expected_uqr_dist_rt>(-1);
@@ -145,7 +133,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
     EXPECT_EQ(dist_param_instance5.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance5.significand_rand_bits(), expected_srb);
 
-
     // Throw std::invalid_argument (a > b)
     expected_a   = static_cast<expected_uqr_dist_rt>(40);
     expected_b   = static_cast<expected_uqr_dist_rt>(39);
@@ -155,7 +142,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
         uqr_dist_param dist_param_instance6(expected_a, expected_b, test_srb);
     }, std::invalid_argument);
 
-
     // Throw std::invalid_argument (a is infinite)
     expected_a   = -std::numeric_limits<expected_uqr_dist_rt>::infinity();
     expected_b   = static_cast<expected_uqr_dist_rt>(39);
@@ -165,7 +151,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_a_b_srb)
         uqr_dist_param dist_param_instance7(expected_a, expected_b, test_srb);
     }, std::invalid_argument);
 
-
     // Throw std::invalid_argument (b is infinite)
     expected_a   = static_cast<expected_uqr_dist_rt>(40);
     expected_b   = std::numeric_limits<expected_uqr_dist_rt>::infinity();
@@ -181,11 +166,9 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_srb)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a = static_cast<expected_uqr_dist_rt>(0);
     const auto expected_b = static_cast<expected_uqr_dist_rt>(1);
 
-
     // Any
     unsigned expected_srb = 4U;
 
@@ -195,7 +178,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_srb)
     EXPECT_EQ(dist_param_instance1.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance1.significand_rand_bits(), expected_srb);
 
-
     // Zero
     expected_srb = 0U;
 
@@ -205,7 +187,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_srb)
     EXPECT_EQ(dist_param_instance2.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance2.significand_rand_bits(), expected_srb);
 
-
     // Almost Maximum
     expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 4U;
 
@@ -215,7 +196,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_srb)
     EXPECT_EQ(dist_param_instance3.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance3.significand_rand_bits(), expected_srb);
 
-
     // Maximum
     expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 1U;
 
@@ -225,7 +205,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_srb)
     EXPECT_EQ(dist_param_instance4.b(),                     expected_b);
     EXPECT_EQ(dist_param_instance4.significand_rand_bits(), expected_srb);
 
-
     // Over Maximum
     expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 1U;
 
@@ -243,12 +222,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_copy)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-102);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(73);
     const unsigned expected_srb = 3U;
 
-
     uqr_dist_param dist_param_instance1(expected_a, expected_b, expected_srb);
     uqr_dist_param dist_param_instance2(dist_param_instance1);
 
@@ -262,12 +239,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_construct_move)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-101);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(75);
     const unsigned expected_srb = 2U;
 
-
     uqr_dist_param dist_param_instance1(expected_a, expected_b, expected_srb);
     uqr_dist_param dist_param_instance2(std::move(dist_param_instance1));
 
@@ -281,12 +256,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_assign_copy)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-112);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(70);
     const unsigned expected_srb = 4U;
 
-
     uqr_dist_param dist_param_instance1(expected_a, expected_b, expected_srb);
     uqr_dist_param dist_param_instance2(2U);
     dist_param_instance2 = dist_param_instance1;
@@ -301,12 +274,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_assign_move)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-102);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(35);
     const unsigned expected_srb = 1U;
 
-
     uqr_dist_param dist_param_instance1(expected_a, expected_b, expected_srb);
     uqr_dist_param dist_param_instance2(2U);
     dist_param_instance2 = std::move(dist_param_instance1);
@@ -325,7 +296,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_equality_compare)
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(35);
     const unsigned expected_srb = 1U;
 
-
     uqr_dist_param dist_param_instance1(expected_a, expected_b, expected_srb);
     uqr_dist_param dist_param_instance2(2U);
     uqr_dist_param dist_param_instance3 = dist_param_instance1;
@@ -351,18 +321,15 @@ TYPED_TEST(uniform_quantized_real_distribution_test, param_equality_compare)
     EXPECT_FALSE(dist_param_instance3 != dist_param_instance3);
 }
 
-
 TYPED_TEST(uniform_quantized_real_distribution_test, construct_default)
 {
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(0);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(1);
     const unsigned expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 1U;
 
-
     uqr_dist dist_instance1;
     using actual_uqr_dist_rt = typename decltype(dist_instance1)::result_type;
 
@@ -377,7 +344,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     // Any
     auto expected_a       = static_cast<expected_uqr_dist_rt>(-137);
     auto expected_b       = static_cast<expected_uqr_dist_rt>(271);
@@ -389,7 +355,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
     EXPECT_EQ(dist_instance1.b(),                     expected_b);
     EXPECT_EQ(dist_instance1.significand_rand_bits(), expected_srb);
 
-
     // Zero
     expected_a   = static_cast<expected_uqr_dist_rt>(47);
     expected_b   = static_cast<expected_uqr_dist_rt>(63);
@@ -401,7 +366,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
     EXPECT_EQ(dist_instance2.b(),                     expected_b);
     EXPECT_EQ(dist_instance2.significand_rand_bits(), expected_srb);
 
-
     // Almost Maximum
     expected_a   = static_cast<expected_uqr_dist_rt>(-55);
     expected_b   = static_cast<expected_uqr_dist_rt>(-15);
@@ -413,7 +377,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
     EXPECT_EQ(dist_instance3.b(),                     expected_b);
     EXPECT_EQ(dist_instance3.significand_rand_bits(), expected_srb);
 
-
     // Maximum
     expected_a   = static_cast<expected_uqr_dist_rt>(2);
     expected_b   = static_cast<expected_uqr_dist_rt>(2);
@@ -425,7 +388,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
     EXPECT_EQ(dist_instance4.b(),                     expected_b);
     EXPECT_EQ(dist_instance4.significand_rand_bits(), expected_srb);
 
-
     // Over Maximum
     expected_a   = static_cast<expected_uqr_dist_rt>(-3);
     expected_b   = static_cast<expected_uqr_dist_rt>(0);
@@ -439,7 +401,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
     EXPECT_EQ(dist_instance5.b(),                     expected_b);
     EXPECT_EQ(dist_instance5.significand_rand_bits(), expected_srb);
 
-
     // Throw std::invalid_argument (a > b)
     expected_a   = static_cast<expected_uqr_dist_rt>(-40);
     expected_b   = static_cast<expected_uqr_dist_rt>(-80);
@@ -449,7 +410,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
         uqr_dist dist_instance6(expected_a, expected_b, test_srb);
     }, std::invalid_argument);
 
-
     // Throw std::invalid_argument (a is infinite)
     expected_a   = -std::numeric_limits<expected_uqr_dist_rt>::infinity();
     expected_b   = static_cast<expected_uqr_dist_rt>(-80);
@@ -459,7 +419,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_a_b_srb)
         uqr_dist dist_instance7(expected_a, expected_b, test_srb);
     }, std::invalid_argument);
 
-
     // Throw std::invalid_argument (b is infinite)
     expected_a   = static_cast<expected_uqr_dist_rt>(-40);
     expected_b   = std::numeric_limits<expected_uqr_dist_rt>::infinity();
@@ -476,12 +435,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_param)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(2);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(17);
     const unsigned expected_srb = 3U;
 
-
     uqr_dist_param dist_param_instance1(expected_a, expected_b, expected_srb);
     uqr_dist dist_instance1(dist_param_instance1);
 
@@ -499,11 +456,9 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_srb)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a = static_cast<expected_uqr_dist_rt>(0);
     const auto expected_b = static_cast<expected_uqr_dist_rt>(1);
 
-
     // Any
     unsigned expected_srb = 3U;
 
@@ -513,7 +468,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_srb)
     EXPECT_EQ(dist_instance1.b(),                     expected_b);
     EXPECT_EQ(dist_instance1.significand_rand_bits(), expected_srb);
 
-
     // Zero
     expected_srb = 0U;
 
@@ -523,7 +477,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_srb)
     EXPECT_EQ(dist_instance2.b(),                     expected_b);
     EXPECT_EQ(dist_instance2.significand_rand_bits(), expected_srb);
 
-
     // Almost Maximum
     expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 2U;
 
@@ -533,7 +486,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_srb)
     EXPECT_EQ(dist_instance3.b(),                     expected_b);
     EXPECT_EQ(dist_instance3.significand_rand_bits(), expected_srb);
 
-
     // Maximum
     expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 1U;
 
@@ -543,7 +495,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_srb)
     EXPECT_EQ(dist_instance4.b(),                     expected_b);
     EXPECT_EQ(dist_instance4.significand_rand_bits(), expected_srb);
 
-
     // Over Maximum
     expected_srb = std::numeric_limits<expected_uqr_dist_rt>::digits - 1U;
 
@@ -561,12 +512,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_copy)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-122);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(33);
     const unsigned expected_srb = 5U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
     uqr_dist dist_instance2(dist_instance1);
 
@@ -580,12 +529,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, construct_move)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(0);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(15);
     const unsigned expected_srb = 1U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
     uqr_dist dist_instance2(std::move(dist_instance1));
 
@@ -599,12 +546,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, assign_copy)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-1);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(1);
     const unsigned expected_srb = 3U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
     uqr_dist dist_instance2(2U);
     dist_instance2 = dist_instance1;
@@ -619,12 +564,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, assign_move)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-107);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(-36);
     const unsigned expected_srb = 2U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
     uqr_dist dist_instance2(3U);
     dist_instance2 = std::move(dist_instance1);
@@ -639,12 +582,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, get_param)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-22);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(-17);
     const unsigned expected_srb = 2U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
 
     EXPECT_EQ(dist_instance1.param().a(),                     expected_a);
@@ -658,14 +599,12 @@ TYPED_TEST(uniform_quantized_real_distribution_test, set_param)
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-122);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(-67);
     const unsigned expected_srb = 3U;
 
     uqr_dist dist_instance_ref(expected_a, expected_b, expected_srb);
 
-
     // Custom Parameters
     uqr_dist dist_instance1(1U);
     dist_instance1.param(uqr_dist_param(expected_a, expected_b, expected_srb));
@@ -676,7 +615,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, set_param)
 
     EXPECT_TRUE(dist_instance1 == dist_instance_ref);
 
-
     // From Other Distribution
     uqr_dist dist_instance2(2U);
     dist_instance2.param(dist_instance1.param());
@@ -694,12 +632,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, get_member_param_equivalenc
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(22);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(27);
     const unsigned expected_srb = 4U;
 
-
     // Default Constructor
     uqr_dist_param dist_param_instance1;
     uqr_dist dist_instance1;
@@ -712,7 +648,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, get_member_param_equivalenc
     EXPECT_EQ(dist_instance1.b(),                     dist_param_instance1.b());
     EXPECT_EQ(dist_instance1.significand_rand_bits(), dist_param_instance1.significand_rand_bits());
 
-
     // Constructor (a, b, srb)
     uqr_dist_param dist_param_instance2(expected_a, expected_b, expected_srb);
     uqr_dist dist_instance2(expected_a, expected_b, expected_srb);
@@ -725,7 +660,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, get_member_param_equivalenc
     EXPECT_EQ(dist_instance2.b(),                     dist_param_instance2.b());
     EXPECT_EQ(dist_instance2.significand_rand_bits(), dist_param_instance2.significand_rand_bits());
 
-
     // Constructor (srb)
     uqr_dist_param dist_param_instance3(expected_srb);
     uqr_dist dist_instance3(expected_srb);
@@ -744,12 +678,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, get_min)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-99);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(-97);
     const unsigned expected_srb = 3U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
 
     EXPECT_EQ(dist_instance1.min(), expected_a);
@@ -760,12 +692,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, get_max)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-99);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(-97);
     const unsigned expected_srb = 3U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
 
     EXPECT_EQ(dist_instance1.max(), expected_b);
@@ -776,12 +706,10 @@ TYPED_TEST(uniform_quantized_real_distribution_test, equality_compare)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(102);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(105);
     const unsigned expected_srb = 4U;
 
-
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
     uqr_dist dist_instance2(2U);
     uqr_dist dist_instance3(dist_instance1);
@@ -812,14 +740,12 @@ TYPED_TEST(uniform_quantized_real_distribution_test, serialize)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(-77);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(17);
     const unsigned expected_srb = 4U;
 
     uqr_dist dist_instance1(expected_a, expected_b, expected_srb);
 
-
     // Preserve Stream Formatting #1
     const auto before_flags1 = std::cout.flags();
     const auto before_fill1  = std::cout.fill();
@@ -834,7 +760,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, serialize)
     EXPECT_EQ(before_fill1,  after_fill1);
     EXPECT_EQ(before_prec1,  after_prec1);
 
-
     // Preserve Stream Formatting #2
     std::wstringstream ss2;
     ss2 << std::oct << std::setprecision(5) << std::boolalpha << std::setfill(ss2.widen('#'));
@@ -852,7 +777,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, serialize)
     EXPECT_EQ(before_fill2,  after_fill2);
     EXPECT_EQ(before_prec2,  after_prec2);
 
-
     // Preserve Stream Formatting #3
     std::wstringstream ss3;
     ss3 << std::dec << std::setprecision(5) << std::right << std::setw(400) << std::skipws
@@ -871,7 +795,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, serialize)
     EXPECT_EQ(before_fill3,  after_fill3);
     EXPECT_EQ(before_prec3,  after_prec3);
 
-
     // Serialize Do Not Change Internal State.
     std::wstringstream ss4;
     ss4 << std::oct << std::setprecision(5) << std::boolalpha << std::setfill(ss4.widen('#'));
@@ -887,7 +810,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, deserialize)
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(82);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(99);
     const unsigned expected_srb = 5U;
@@ -896,7 +818,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, deserialize)
 
     uqr_dist dist_instance_base(dist_instance_ref);
 
-
     // Valid Deserialization (Narrow String)
     std::stringstream ss1, ss1_1, ss1_2;
     uqr_dist dist_instance1, dist_instance2;
@@ -921,7 +842,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, deserialize)
 
     EXPECT_EQ(ss1_1.str(), ss1_2.str());
 
-
     // Valid Deserialization (Wide String)
     std::wstringstream ss2, ss2_1, ss2_2;
     uqr_dist dist_instance3, dist_instance4;
@@ -946,7 +866,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, deserialize)
 
     EXPECT_EQ(ss2_1.str(), ss2_2.str());
 
-
     // Invalid Deserialization
     std::wstringstream ss3;
     uqr_dist dist_instance5(dist_instance_ref);
@@ -964,13 +883,11 @@ TYPED_TEST(uniform_quantized_real_distribution_test, deserialize)
     EXPECT_TRUE(dist_instance5 == dist_instance_ref);
 }
 
-
-TYPED_TEST(uniform_quantized_real_distribution_test, generate_random)
+TYPED_TEST(uniform_quantized_real_distribution_test, DISABLED_generate_random)
 {
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     constexpr auto val_zero = static_cast<expected_uqr_dist_rt>(0);
 
     const auto expected_fract   = static_cast<expected_uqr_dist_rt>(0.5);
@@ -978,7 +895,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random)
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(-92);
     const unsigned expected_srb = 4U;
 
-
     std::mt19937_64 g1;
     uqr_dist dist1(expected_a, expected_b, expected_srb);
 
@@ -1003,7 +919,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_degen_a_b)
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(110);
     const unsigned expected_srb = 3U;
 
-
     std::mt19937_64 g1;
     uqr_dist dist1(expected_a, expected_b, expected_srb);
 
@@ -1015,7 +930,7 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_degen_a_b)
     }
 }
 
-TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_degen_srb)
+TYPED_TEST(uniform_quantized_real_distribution_test, DISABLED_generate_random_degen_srb)
 {
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
@@ -1024,7 +939,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_degen_srb)
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(-92);
     const unsigned expected_srb = 0U;
 
-
     std::mt19937_64 g1;
     uqr_dist dist1(expected_a, expected_b, expected_srb);
 
@@ -1041,7 +955,7 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_degen_srb)
     std::cout << "a: " << count_a << ", b: " << count_b << std::endl;
 }
 
-TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_c9)
+TYPED_TEST(uniform_quantized_real_distribution_test, DISABLED_generate_random_c9)
 {
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
@@ -1050,7 +964,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_c9)
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(2);
     const unsigned expected_srb = 3U;
 
-
     std::mt19937_64 g1;
     uqr_dist dist1(expected_a, expected_b, expected_srb);
 
@@ -1075,7 +988,7 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_c9)
     std::cout << std::endl;
 }
 
-TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_c17)
+TYPED_TEST(uniform_quantized_real_distribution_test, DISABLED_generate_random_c17)
 {
     using uqr_dist             = typename TestFixture::uqr_dist;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
@@ -1084,7 +997,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_c17)
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(4);
     const unsigned expected_srb = 4U;
 
-
     std::mt19937_64 g1;
     uqr_dist dist1(expected_a, expected_b, expected_srb);
 
@@ -1109,13 +1021,12 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_c17)
     std::cout << std::endl;
 }
 
-TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_param)
+TYPED_TEST(uniform_quantized_real_distribution_test, DISABLED_generate_random_param)
 {
     using uqr_dist             = typename TestFixture::uqr_dist;
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     constexpr auto val_zero = static_cast<expected_uqr_dist_rt>(0);
 
     auto expected_fract   = static_cast<expected_uqr_dist_rt>(0.125);
@@ -1128,7 +1039,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_param)
     const auto test_b       = static_cast<expected_uqr_dist_rt>(18);
     const unsigned test_srb = 5U;
 
-
     // Temporary Switch Of Param
     std::mt19937_64 g1;
     uqr_dist dist1(test_a, test_b, test_srb);
@@ -1145,7 +1055,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_param)
         EXPECT_EQ(std::modf(rnd_val / expected_fract, &actual_ipart), val_zero);
     }
 
-
     // Original Param
     expected_fract = test_fract;
     expected_a     = test_a;
@@ -1168,18 +1077,16 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_param)
     }
 }
 
-TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_equivalence)
+TYPED_TEST(uniform_quantized_real_distribution_test, DISABLED_generate_random_equivalence)
 {
     using uqr_dist             = typename TestFixture::uqr_dist;
     using uqr_dist_param       = typename TestFixture::uqr_dist_param;
     using expected_uqr_dist_rt = typename TestFixture::expected_uqr_dist_rt;
 
-
     const auto expected_a       = static_cast<expected_uqr_dist_rt>(16);
     const auto expected_b       = static_cast<expected_uqr_dist_rt>(32);
     const unsigned expected_srb = 5U;
 
-
     // Equivalent Initialization.
     std::mt19937_64 g1, g2, g3, g4, g5, g6, g7, g8, g9;
     uqr_dist dist1(expected_a, expected_b, expected_srb), dist1_1(expected_a, expected_b, expected_srb);
@@ -1209,7 +1116,6 @@ TYPED_TEST(uniform_quantized_real_distribution_test, generate_random_equivalence
         EXPECT_EQ(rnd_val1, rnd_val5);
     }
 
-
     // Equivalent Assignment And Serialization.
     dist6.reset();
     dist6 = dist1;
index 7cc7865..2637144 100644 (file)
 #include <cmath>
 #include <gtest/gtest.h>
 #include <algorithm>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/activation_grad.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/data.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/activation_grad.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 #include "test_utils/float16.h"
 
@@ -60,13 +60,13 @@ TEST(activation_grad_f16_fw_gpu, basic_bfyx_all_functions)
         FLOAT16(32.0f), FLOAT16(-32.0f), FLOAT16(32.0f), FLOAT16(52.0f), FLOAT16(12.0f),
         FLOAT16(12.0f), FLOAT16(12.0f), FLOAT16(12.0f), FLOAT16(-12.0f), FLOAT16(12.0f) });
 
-    std::vector<cldnn_activation_grad_func> funcs = {
-        activation_grad_none,
-        activation_grad_relu,
-        activation_grad_relu_negative_slope,
+    std::vector<activation_grad_func> funcs = {
+        activation_grad_func::none,
+        activation_grad_func::relu,
+        activation_grad_func::relu_negative_slope,
     };
 
-    cldnn_activation_additional_params params = { 0.5f, 2.5f };
+    activation_additional_params params = { 0.5f, 2.5f };
     set_values(input_params, { FLOAT16(params.a), FLOAT16(params.b) });
 
     for (uint8_t i = 0; i < 2; i++)
@@ -114,13 +114,13 @@ TEST(activation_grad_f16_fw_gpu, basic_bfyx_all_functions)
             {
                 switch (func)
                 {
-                case activation_grad_none:
+                case activation_grad_func::none:
                     EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]), float16_to_float32(output_ptr[i]));
                     break;
-                case activation_grad_relu:
+                case activation_grad_func::relu:
                     EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]) * (float16_to_float32(input_ptr[i]) > 0), float16_to_float32(output_ptr[i]));
                     break;
-                case activation_grad_relu_negative_slope:
+                case activation_grad_func::relu_negative_slope:
                     EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]) * ((float16_to_float32(input_ptr[i]) > 0) + params.a * (float16_to_float32(input_ptr[i]) <= 0)), float16_to_float32(output_ptr[i]));
                     break;
                 default:
@@ -159,13 +159,13 @@ TEST(activation_grad_f32_fw_gpu, basic_bfyx_all_functions)
         32.0f, -32.0f, 32.0f, 52.0f, 12.0f,
         12.0f, 12.0f, 12.0f, -12.0f, 12.0f });
 
-    std::vector<cldnn_activation_grad_func> funcs = {
-        activation_grad_none,
-        activation_grad_relu,
-        activation_grad_relu_negative_slope,
+    std::vector<activation_grad_func> funcs = {
+        activation_grad_func::none,
+        activation_grad_func::relu,
+        activation_grad_func::relu_negative_slope,
     };
 
-    cldnn_activation_additional_params params = { 0.5f, 2.5f };
+    activation_additional_params params = { 0.5f, 2.5f };
     set_values(input_params, { params.a, params.b });
 
     for (uint8_t i = 0; i < 2; i++)
@@ -213,13 +213,13 @@ TEST(activation_grad_f32_fw_gpu, basic_bfyx_all_functions)
             {
                 switch (func)
                 {
-                case activation_grad_none:
+                case activation_grad_func::none:
                     EXPECT_FLOAT_EQ(input_grad_ptr[i], output_ptr[i]);
                     break;
-                case activation_grad_relu:
+                case activation_grad_func::relu:
                     EXPECT_FLOAT_EQ(input_grad_ptr[i] * (input_ptr[i] > 0), output_ptr[i]);
                     break;
-                case activation_grad_relu_negative_slope:
+                case activation_grad_func::relu_negative_slope:
                     EXPECT_FLOAT_EQ(input_grad_ptr[i] * ((input_ptr[i] > 0) + params.a * (input_ptr[i] <= 0)), output_ptr[i]);
                     break;
                 default:
@@ -228,4 +228,4 @@ TEST(activation_grad_f32_fw_gpu, basic_bfyx_all_functions)
             }
         }
     }
-}
\ No newline at end of file
+}
index 7c36f3a..cd25357 100644 (file)
 #include <cmath>
 #include <gtest/gtest.h>
 #include <algorithm>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/activation.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/data.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/activation.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 #include "test_utils/float16.h"
-#include "api/CPP/reorder.hpp"
+#include "api/reorder.hpp"
 
 using namespace cldnn;
 using namespace tests;
 
-
 TEST(activation_f32_fw_gpu, not_basic_yxfb) {
     //  Input:
     //  1 0 -3  4  5
@@ -63,7 +62,7 @@ TEST(activation_f32_fw_gpu, not_basic_yxfb) {
 
     topology topology(
         input_layout("input", input.get_layout()),
-        activation("not", "input", activation_not));
+        activation("not", "input", activation_func::negation));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -107,7 +106,7 @@ TEST(activation_f32_fw_gpu, erf_basic_yxfb) {
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("not", "input", activation_erf));
+            activation("not", "input", activation_func::erf));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -144,7 +143,7 @@ TEST(activation_f32_fw_gpu, hard_sigmoid_basic_yxfb) {
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
-    cldnn_activation_additional_params params = { 1.0f, 0.5f };
+    activation_additional_params params = { 1.0f, 0.5f };
     set_values(input,
                { 1.0f, 0.0f, -3.0f, 4.0f, 5.0f,
                  0.0f, 2.0f, 3.0f, 4.0f, -6.0f,
@@ -153,7 +152,7 @@ TEST(activation_f32_fw_gpu, hard_sigmoid_basic_yxfb) {
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("not", "input", activation_hard_sigmoid, params));
+            activation("not", "input", activation_func::hard_sigmoid, params));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -199,7 +198,7 @@ TEST(activation_f32_fw_gpu, reciprocal_basic_yxfb) {
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("not", "input", activation_reciprocal));
+            activation("not", "input", activation_func::reciprocal));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -237,7 +236,7 @@ TEST(activation_f32_fw_gpu, selu_basic_yxfb) {
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
-    cldnn_activation_additional_params params = { 1.0f, 0.5f };
+    activation_additional_params params = { 1.0f, 0.5f };
     set_values(input,
                { 1.0f, 0.3f, -3.0f, 4.0f, 5.0f,
                  21.0f, 2.0f, 3.0f, 4.0f, -6.0f,
@@ -246,7 +245,7 @@ TEST(activation_f32_fw_gpu, selu_basic_yxfb) {
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("not", "input", activation_selu, params));
+            activation("not", "input", activation_func::selu, params));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -293,7 +292,7 @@ TEST(activation_f32_fw_gpu, softplus_basic_yxfb) {
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("not", "input", activation_softplus));
+            activation("not", "input", activation_func::softplus));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -339,7 +338,7 @@ TEST(activation_f32_fw_gpu, softsign_basic_yxfb) {
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("not", "input", activation_softsign));
+            activation("not", "input", activation_func::softsign));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -367,7 +366,6 @@ TEST(activation_f32_fw_gpu, softsign_basic_yxfb) {
     }
 }
 
-
 TEST(activation_f32_fw_gpu, sign_basic_yxfb) {
     //  Input:
     //  1 0 -3  4  5
@@ -386,7 +384,7 @@ TEST(activation_f32_fw_gpu, sign_basic_yxfb) {
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("not", "input", activation_sign));
+            activation("not", "input", activation_func::sign));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -424,7 +422,7 @@ TEST(activation_f32_fw_gpu, pow_basic_yxfb) {
 
     topology topology(
         input_layout("input", input.get_layout()),
-        activation("pow", "input", activation_pow, { 2.0f }));
+        activation("pow", "input", activation_func::pow, { 2.0f, 0.0f }));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -460,7 +458,7 @@ TEST(activation_f16_fw_gpu, pow_basic_yxfb) {
 
     topology topology(
         input_layout("input", input.get_layout()),
-        activation("pow", "input", activation_pow, { FLOAT16(3.0f) }));
+        activation("pow", "input", activation_func::pow, { FLOAT16(3.0f), FLOAT16(0.0f) }));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -496,7 +494,7 @@ TEST(activation_f16_fw_gpu, linear_basic_yxfb) {
 
     topology topology(
         input_layout("input", input.get_layout()),
-        activation("linear", "input", activation_linear, {FLOAT16(3.0f), FLOAT16(2.0f)}));
+        activation("linear", "input", activation_func::linear, {FLOAT16(3.0f), FLOAT16(2.0f)}));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -553,7 +551,7 @@ TEST(activation_f32_fw_gpu, relu_basic_yxfb) {
 
     topology topology(
         input_layout("input", input.get_layout()),
-        activation("relu", "input", activation_relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
+        activation("relu", "input", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -629,7 +627,7 @@ TEST(activation_f32_fw_gpu, relu_basic_bfzyx) {
 
     topology topology(
         input_layout("input", input.get_layout()),
-        activation("relu", "input", activation_relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 }));
+        activation("relu", "input", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 }));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -657,7 +655,7 @@ TEST(activation_f32_fw_gpu, relu_basic_bfzyx) {
     }
 }
 
-TEST(activation_f32_fw_gpu, basic_yxfb_all_functions) 
+TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
 {
     //  Input:
     //  1 -2 -3  4  5
@@ -671,39 +669,39 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 5, 4 } });
-    auto input_params = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
+    auto input_params = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
     set_values(input,
     { 0.0f, -2.0f, -3.0f, 4.0f, 5.0f,
         2.0f, 2.0f, 3.0f, 4.0f, -6.0f,
         3.0f, -3.0f, 3.0f, 5.0f, 1.0f,
         1.0f, 1.0f, 1.0f, -1.0f, 1.0f });
 
-    std::vector<cldnn_activation_func> funcs = {
-        activation_none,
-        activation_logistic,
-        activation_hyperbolic_tan,
-        activation_relu,
-        activation_relu_negative_slope,
-        activation_clamp,
-        activation_softrelu,
-        activation_abs,
-        activation_linear,
-        activation_square,
-        activation_sqrt,
-        activation_elu,
-        activation_sin,
-        activation_sinh,
-        activation_cos,
-        activation_cosh,
-        activation_exp,
-        activation_not,
-        activation_log2,
-        activation_tan,
-        activation_negative,
-        activation_abs
+    std::vector<activation_func> funcs = {
+        activation_func::none,
+        activation_func::logistic,
+        activation_func::hyperbolic_tan,
+        activation_func::relu,
+        activation_func::relu_negative_slope,
+        activation_func::clamp,
+        activation_func::softrelu,
+        activation_func::abs,
+        activation_func::linear,
+        activation_func::square,
+        activation_func::sqrt,
+        activation_func::elu,
+        activation_func::sin,
+        activation_func::sinh,
+        activation_func::cos,
+        activation_func::cosh,
+        activation_func::exp,
+        activation_func::negation,
+        activation_func::log2,
+        activation_func::tan,
+        activation_func::negative,
+        activation_func::abs
     };
 
-    cldnn_activation_additional_params params = { 0.5f, 2.5f };
+    activation_additional_params params = { 0.5f, 2.5f };
     set_values(input_params, { params.a, params.b });
 
     for (uint8_t i = 0 ; i < 2 ; i++)
@@ -747,71 +745,71 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
             {
                 switch (func)
                 {
-                case activation_none:
+                case activation_func::none:
                     EXPECT_FLOAT_EQ(input_ptr[i], output_ptr[i]);
                     break;
-                case activation_logistic:
+                case activation_func::logistic:
                     EXPECT_FLOAT_EQ(1.f / (1.f + std::exp((float)-input_ptr[i])), output_ptr[i]);
                     break;
-                case activation_hyperbolic_tan:
+                case activation_func::hyperbolic_tan:
                     EXPECT_FLOAT_EQ(std::tanh((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_relu:
+                case activation_func::relu:
                     EXPECT_FLOAT_EQ(std::fmax((float)input_ptr[i], 0.f), output_ptr[i]);
                     break;
-                case activation_clamp:
+                case activation_func::clamp:
                     EXPECT_FLOAT_EQ(std::fmin((float)std::fmax((float)input_ptr[i], params.a), params.b), output_ptr[i]);
                     break;
-                case activation_softrelu:
+                case activation_func::softrelu:
                     EXPECT_FLOAT_EQ(std::log(1.f + std::exp((float)input_ptr[i])), output_ptr[i]);
                     break;
-                case activation_abs:
+                case activation_func::abs:
                     EXPECT_FLOAT_EQ(std::fabs(input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_linear:
+                case activation_func::linear:
                     EXPECT_FLOAT_EQ((params.a*input_ptr[i] + params.b), output_ptr[i]);
                     break;
-                case activation_square:
+                case activation_func::square:
                     EXPECT_FLOAT_EQ((input_ptr[i] * input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_sqrt:
+                case activation_func::sqrt:
                     if (input_ptr[i] >= 0)
                     {
                         EXPECT_FLOAT_EQ(std::sqrt((float)input_ptr[i]), output_ptr[i]);
                     }
                     break;
-                case activation_elu:
+                case activation_func::elu:
                     EXPECT_FLOAT_EQ(std::fmax((float)input_ptr[i], 0.0f) +
                                     params.a*(std::exp(std::fmin((float)input_ptr[i], 0.0f)) - 1), output_ptr[i]);
                     break;
-                case activation_sin:
+                case activation_func::sin:
                     EXPECT_FLOAT_EQ(std::sin((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_sinh:
+                case activation_func::sinh:
                     EXPECT_FLOAT_EQ(std::sinh((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_cos:
+                case activation_func::cos:
                     EXPECT_FLOAT_EQ(std::cos((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_cosh:
+                case activation_func::cosh:
                     EXPECT_FLOAT_EQ(std::cosh((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_exp:
+                case activation_func::exp:
                     EXPECT_FLOAT_EQ(std::exp((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_not:
+                case activation_func::negation:
                     EXPECT_FLOAT_EQ((float)(!input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_log2:
+                case activation_func::log2:
                     if (input_ptr[i] > 0) //logarithm exist only for positive real values
                     {
                         EXPECT_FLOAT_EQ(std::log2((float)input_ptr[i]), output_ptr[i]);
                     }
                     break;
-                case activation_tan:
+                case activation_func::tan:
                     EXPECT_FLOAT_EQ(std::tan((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_negative:
+                case activation_func::negative:
                     EXPECT_FLOAT_EQ(-((float)input_ptr[i]), output_ptr[i]);
                     break;
                 default:
@@ -829,15 +827,15 @@ TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log_atan)
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 4 } });
     set_values(input, { 0.12f, 0.56f, 0.45f, 0.789f, 0.546f, 0.999f, 0.7899f, 0.6677f});
 
-    std::vector<cldnn_activation_func> funcs = {
-        activation_asin,
-        activation_acos,
-        activation_log,
-        activation_log2,
-        activation_atan,
-        activation_asin,
-        activation_asinh,
-        activation_atanh
+    std::vector<activation_func> funcs = {
+        activation_func::asin,
+        activation_func::acos,
+        activation_func::log,
+        activation_func::log2,
+        activation_func::atan,
+        activation_func::asin,
+        activation_func::asinh,
+        activation_func::atanh
     };
 
     for (auto func : funcs)
@@ -870,28 +868,28 @@ TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log_atan)
         {
             switch (func)
             {
-            case activation_asin:
+            case activation_func::asin:
                 EXPECT_FLOAT_EQ(std::asin((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_acos:
+            case activation_func::acos:
                 EXPECT_FLOAT_EQ(std::acos((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_log:
+            case activation_func::log:
                 EXPECT_FLOAT_EQ(std::log((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_log2:
+            case activation_func::log2:
                 EXPECT_FLOAT_EQ(std::log2((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_atan:
+            case activation_func::atan:
                 EXPECT_FLOAT_EQ(std::atan((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_asinh:
+            case activation_func::asinh:
                 EXPECT_FLOAT_EQ(std::asinh((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_acosh:
+            case activation_func::acosh:
                 EXPECT_FLOAT_EQ(std::acosh((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_atanh:
+            case activation_func::atanh:
                 EXPECT_FLOAT_EQ(std::atanh((float)input_ptr[i]), output_ptr[i]);
                 break;
             default:
@@ -930,7 +928,7 @@ TEST(activation_f32_fw_gpu, relu_basic_acosh_yxfb) {
     topology topology(
             input_layout("input", input.get_layout()),
             reorder("reorder", "input", input.get_layout().with_padding(padding{{0, 0, 2, 1}, 0})),
-            activation("relu", "reorder", activation_acosh, {0.5f, 0.f}, padding{{0, 0, 0, 0}, 0}));
+            activation("relu", "reorder", activation_func::acosh, {0.5f, 0.f}, padding{{0, 0, 0, 0}, 0}));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -996,7 +994,7 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_yxfb) {
     topology topology(
         input_layout("input", input.get_layout()),
         reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })),
-        activation("relu", "reorder", activation_relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
+        activation("relu", "reorder", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -1083,7 +1081,7 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_bfzyx) {
     topology topology(
         input_layout("input", input.get_layout()),
         reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1, 0 }, 0 })),
-        activation("relu", "reorder", activation_relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 }));
+        activation("relu", "reorder", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 }));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -1156,7 +1154,7 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) {
 
     topology topology(
         input_layout("input", input.get_layout()),
-        activation("relu", "input", activation_relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 3, 3 }, 0 }));
+        activation("relu", "input", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 3, 3 }, 0 }));
     network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
@@ -1190,9 +1188,9 @@ TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil)
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 4 } });
     set_values(input, { 0.01f, 0.99f, -0.01f, -0.99f, 1.1f, 1.0f, 0.0f, -1.1f });
 
-    std::vector<cldnn_activation_func> funcs = {
-        activation_floor,
-        activation_ceil
+    std::vector<activation_func> funcs = {
+        activation_func::floor,
+        activation_func::ceil
     };
 
     for (auto func : funcs)
@@ -1225,10 +1223,10 @@ TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil)
         {
             switch (func)
             {
-            case activation_floor:
+            case activation_func::floor:
                 EXPECT_FLOAT_EQ(std::floor((float)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_ceil:
+            case activation_func::ceil:
                 EXPECT_FLOAT_EQ(std::ceil((float)input_ptr[i]), output_ptr[i]);
                 break;
             default:
@@ -1252,10 +1250,10 @@ TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs)
     set_values(input, input_vec);
 
     // functions valid for int8 type input
-    std::vector<cldnn_activation_func> funcs = {
-        activation_none,
-        activation_negative,
-        activation_not
+    std::vector<activation_func> funcs = {
+        activation_func::none,
+        activation_func::negative,
+        activation_func::negation
     };
 
     for (auto func : funcs)
@@ -1280,13 +1278,13 @@ TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs)
         {
             switch (func)
             {
-            case activation_none:
+            case activation_func::none:
                 EXPECT_EQ((int8_t)input_ptr[i], output_ptr[i]);
                 break;
-            case activation_negative:
+            case activation_func::negative:
                 EXPECT_EQ(-((int8_t)input_ptr[i]), output_ptr[i]);
                 break;
-            case activation_not:
+            case activation_func::negation:
                 EXPECT_EQ(!((int8_t)input_ptr[i]), output_ptr[i]);
                 break;
             default:
index 8a53e8f..a81d4d0 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/activation.hpp>
-#include <api/CPP/mutable_data.hpp>
-#include <api/CPP/layout.hpp>
-#include <api/CPP/tile.hpp>
-#include <api/CPP/reshape.hpp>
-
-#include <api/CPP/batch_norm.hpp>
-#include <api/CPP/concatenation.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
+#include <api/activation.hpp>
+#include <api/mutable_data.hpp>
+#include <api/layout.hpp>
+#include <api/tile.hpp>
+#include <api/reshape.hpp>
+
+#include <api/batch_norm.hpp>
+#include <api/concatenation.hpp>
 
 using namespace cldnn;
 using namespace tests;
index 6d2250c..220b176 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/apply_adam.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/apply_adam.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/activation.hpp>
-#include <api/CPP/mutable_data.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
+#include <api/activation.hpp>
+#include <api/mutable_data.hpp>
 
 using namespace cldnn;
 using namespace tests;
@@ -61,9 +61,9 @@ TEST(apply_adam_gpu, basic_in2x2x3x2_bfyx) {
     topology.add(data("beta1_power_t1", beta1_power));
     topology.add(data("beta2_power_t1", beta2_power));
     topology.add(apply_adam("apply_adam", "input", "m", "v", "beta1_power_t1", "beta2_power_t1", lr, beta1, beta2, epsilon));
-    topology.add(activation("relu", "input", activation_linear, { 4.f, 0.f }));
-    topology.add(activation("beta1_power_t2", "beta1_power_t1", activation_linear, { beta1, 0.f }));
-    topology.add(activation("beta2_power_t2", "beta2_power_t1", activation_linear, { beta2, 0.f }));
+    topology.add(activation("relu", "input", activation_func::linear, { 4.f, 0.f }));
+    topology.add(activation("beta1_power_t2", "beta1_power_t1", activation_func::linear, { beta1, 0.f }));
+    topology.add(activation("beta2_power_t2", "beta2_power_t1", activation_func::linear, { beta2, 0.f }));
     topology.add(apply_adam("apply_adam2", "relu", "m", "v", "beta1_power_t2", "beta2_power_t2", lr, beta1, beta2, epsilon, "apply_adam"));
     topology.add(mutable_data("var", { "apply_adam", "apply_adam2" }, var));
 
index ea41280..d5c77d3 100644 (file)
 */
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/arg_max_min.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/mutable_data.hpp>
-#include <api/CPP/data.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/arg_max_min.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/mutable_data.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
-using namespace std;
 using namespace tests;
 
-
-
-
 template <typename Tin, typename Tout>
 void generic_arg_max_test_xyf(int input_b, int input_f, int input_y, int input_x, arg_max_min::out_type mode, bool expect_throw = false)
 {
@@ -98,7 +94,7 @@ TEST(arg_max_gpu_batch_one, base) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
         //y0x0 y0x1 y1x0 y1x1
         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
@@ -176,7 +172,7 @@ TEST(arg_max_gpu_top_k, base) {
        topology.add(input_layout("input", input.get_layout()));
        topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k));
 
-       vector<float> input_vec = {
+       std::vector<float> input_vec = {
                //y0x0 y0x1 y1x0 y1x1
                /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
                /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
@@ -260,7 +256,7 @@ TEST(arg_max_gpu_min_top_k, base) {
        topology.add(input_layout("input", input.get_layout()));
        topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k));
 
-       vector<float> input_vec = {
+       std::vector<float> input_vec = {
                        //f0b0 f0b1 f1b0 f1b1
                /*x0y0*/0.1f, -0.1f, 0.9f,  1.5f,
                /*x0y1*/0.2f, 0.2f,  -10.f, 5.2f,
@@ -342,7 +338,7 @@ TEST(arg_max_gpu_min_axis_batch, base) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k, arg_max_min::batch));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
         //y0x0 y0x1 y1x0 y1x1
         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
@@ -431,7 +427,7 @@ TEST(arg_max_gpu_min_axis_batch, i32) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::i32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
         //y0x0 y0x1 y1x0 y1x1
         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
@@ -476,7 +472,7 @@ TEST(arg_max_gpu_min_axis_batch_bfzyx, i32) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::i32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             //y0x0 y0x1 y1x0 y1x1
             /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
             /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
@@ -520,7 +516,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, padding(), data_types::f32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             0.1f, -0.1f,
             0.9f,  1.5f,
             0.2f, 0.2f,
@@ -531,7 +527,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) {
             0.2f, 0.2f,
             -10.f, 4.2f,
 
-
             3.f,  0.5f,
             7.f,   10.f,
             4.f,  0.5f,
@@ -543,7 +538,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) {
             8.f,   8.2f
     };
 
-    vector<float> ref_vec = {
+    std::vector<float> ref_vec = {
             1.f, 1.f,
             1.f, 1.f,
             1.f, 1.f,
@@ -587,7 +582,7 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::f32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             0.1f, -0.1f,
             0.9f,  1.5f,
             0.2f, 0.2f,
@@ -598,7 +593,6 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) {
             0.2f, 0.2f,
             -10.f, 4.2f,
 
-
             3.f,  0.5f,
             7.f,   10.f,
             4.f,  0.5f,
@@ -610,7 +604,7 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) {
             8.f,   8.2f
     };
 
-    vector<float> ref_vec = {
+    std::vector<float> ref_vec = {
             0.f, 1.f,
             0.f, 1.f,
             0.f, 1.f,
@@ -654,7 +648,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, padding(), data_types::f32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             0.1f, -0.1f,
             0.9f,  1.5f,
             0.2f, 0.2f,
@@ -665,7 +659,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) {
             0.2f, 0.2f,
             -10.f, 4.2f,
 
-
             3.f,  0.5f,
             7.f,   10.f,
             4.f,  0.5f,
@@ -677,7 +670,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) {
             8.f,   8.2f
     };
 
-    vector<float> ref_vec = {
+    std::vector<float> ref_vec = {
             1.f, 1.f,
             1.f, 1.f,
             1.f, 1.f,
@@ -688,7 +681,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) {
             1.f, 1.f,
             1.f, 1.f,
 
-
             0.f, 0.f,
             0.f, 0.f,
             0.f, 0.f,
@@ -736,7 +728,7 @@ TEST(top_k_layer_tests, second_output) {
     topology.add(mutable_data("second_output", second_output));
     topology.add(arg_max_min("arg_max", { "input", "const", "second_output" }, arg_max_min::min, top_k, arg_max_min::batch));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             //y0x0 y0x1 y1x0 y1x1
             /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
             /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
@@ -788,7 +780,7 @@ TEST(top_k_layer_tests, second_output2) {
     topology.add(mutable_data("second_output", second_output));
     topology.add(arg_max_min("arg_max", { "input", "const", "second_output" }, arg_max_min::max, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::f32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             0.1f, -0.1f,
             0.9f,  1.5f,
             0.2f, 0.2f,
@@ -799,7 +791,6 @@ TEST(top_k_layer_tests, second_output2) {
             0.2f, 0.2f,
             -10.f, 4.2f,
 
-
             3.f,  0.5f,
             7.f,   10.f,
             4.f,  0.5f,
@@ -811,7 +802,7 @@ TEST(top_k_layer_tests, second_output2) {
             8.f,   8.2f
     };
 
-    vector<float> ref_vec = {
+    std::vector<float> ref_vec = {
             0.f, 1.f,
             0.f, 1.f,
             0.f, 1.f,
@@ -823,7 +814,7 @@ TEST(top_k_layer_tests, second_output2) {
             0.f, 1.f
     };
 
-    vector<float> second_ref_vec = {
+    std::vector<float> second_ref_vec = {
             0.1f,
             1.5f,
             0.2f,
@@ -834,7 +825,6 @@ TEST(top_k_layer_tests, second_output2) {
             0.2f,
             4.2f,
 
-
             3.f,
             10.f,
             4.f,
@@ -882,7 +872,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, padding(), data_types::f32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             0.1f, -0.1f,
             0.9f,  1.5f,
             0.2f, 0.2f,
@@ -893,7 +883,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) {
             0.2f, 0.2f,
             -10.f, 4.2f,
 
-
             3.f,  0.5f,
             7.f,   10.f,
             4.f,  0.5f,
@@ -905,7 +894,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) {
             8.f,   8.2f
     };
 
-    vector<float> ref_vec = {
+    std::vector<float> ref_vec = {
             1.f, 1.f,
             1.f, 1.f,
             1.f, 1.f,
@@ -916,7 +905,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) {
             1.f, 1.f,
             1.f, 1.f,
 
-
             0.f, 0.f,
             0.f, 0.f,
             0.f, 0.f,
@@ -951,7 +939,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) {
     }
 }
 
-
 TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
     static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
     const auto& engine = get_test_engine();
@@ -961,7 +948,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_indices, false, padding(), data_types::f32));
 
-    vector<float> input_vec = {
+    std::vector<float> input_vec = {
             0.1f, -0.1f,
             0.9f,  1.5f,
             0.2f, 0.2f,
@@ -972,7 +959,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
             0.2f, 0.2f,
             -10.f, 4.2f,
 
-
             3.f,  0.5f,
             7.f,   10.f,
             4.f,  0.5f,
@@ -984,7 +970,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
             8.f,   8.2f
     };
 
-    vector<float> ref_vec = {
+    std::vector<float> ref_vec = {
             0.f, 0.f,
             0.f, 0.f,
             0.f, 0.f,
@@ -995,7 +981,6 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
             0.f, 0.f,
             0.f, 0.f,
 
-
             1.f, 1.f,
             1.f, 1.f,
             1.f, 1.f,
index d537c89..53414e1 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/average_unpooling.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/average_unpooling.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/mutable_data.hpp>
-#include <api/CPP/pooling.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
+#include <api/mutable_data.hpp>
+#include <api/pooling.hpp>
 #include "test_utils/float16.h"
 
 using namespace cldnn;
index 4ae9bcf..e4a5209 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/concatenation.hpp>
-#include <api/CPP/reorder.hpp>
+#include <api/engine.hpp>
+#include <api/memory.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/input_layout.hpp>
+#include <api/concatenation.hpp>
+#include <api/reorder.hpp>
 
 #include "test_utils/test_utils.h"
 
index ee56281..d6e306d 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/batch_norm.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/batch_norm.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/mutable_data.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
+#include <api/mutable_data.hpp>
 
 using namespace cldnn;
 using namespace tests;
@@ -49,7 +49,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2) {
     //  f0: 44.9305
     //  f1: 107.0624
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
@@ -131,8 +130,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_scale_shift) {
        //  f0: 0.0
        //  f1: 5.0
 
-
-
        const auto& engine = get_test_engine();
 
        auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
@@ -219,7 +216,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc) {
     //  f0: 44.9305
     //  f1: 107.0624
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
@@ -288,7 +284,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_no_inv_var) {
     //  f0: 44.9305
     //  f1: 107.0624
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
@@ -363,7 +358,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_scale_shift) {
        //  f0: 0.0
        //  f1: 5.0
 
-
        const auto& engine = get_test_engine();
 
        auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
@@ -455,7 +449,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_scale_shift_no_
     //  f0: 0.0
     //  f1: 5.0
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
@@ -544,7 +537,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs) {
        //  f0: 0.0
        //  f1: 5.0
 
-
        const auto& engine = get_test_engine();
 
        auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
@@ -650,7 +642,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_no_inv_var)
     //  f0: 0.0
     //  f1: 5.0
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
@@ -775,7 +766,6 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_error_non_eq
     EXPECT_ANY_THROW(network(engine, topology));
 }
 
-
 TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx) {
     //  Mean   : 3x2x2
     //  Input  : 2x3x2x2
@@ -795,7 +785,6 @@ TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx) {
     //  f0: 44.9305
     //  f1: 107.0624
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -876,7 +865,6 @@ TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx_padding) {
     //  f0: 44.9305
     //  f1: 107.0624
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -992,7 +980,6 @@ TEST(batch_normalization_gpu, basic_to_string) {
     EXPECT_NE(network.get_primitive_info("batch_norm7").length(), zero_length);
 }                                         
 
-
 TEST(batch_normalization_gpu, basic_in2x3x2x2_yxfb_scale_shift_different_shapes) {
     const auto& engine = get_test_engine();
 
@@ -1369,7 +1356,6 @@ TEST(batch_normalization_gpu, basic_in2x2x3x2_byxf_with_var_mean_outputs_no_inv_
     }
 }
 
-
 TEST(batch_normalization_gpu, basic_in2x3x5x2_yxfb_scale_shift_different_shapes) {
     const auto& engine = get_test_engine();
 
@@ -1876,7 +1862,6 @@ TEST(batch_normalization_gpu, basic_in2x2x3x5_byxf_with_var_mean_outputs_no_inv_
     }
 }
 
-
 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b1c2h2w2)
 {
     const auto& engine = get_test_engine();
@@ -1998,7 +1983,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1)
     topology.add(mutable_data("variance", variance));
     topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
 
-
     set_values<float>(input, { 
         0.54881352f,
         0.71518934f,
@@ -2006,8 +1990,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1)
         0.60276335f,
         0.54488319f,
 
-
-
         0.42365479f,
         0.64589411f,
 
@@ -2026,7 +2008,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1)
         -0.434702f, 
         
 
-
         -1.4011f, 
         0.548275f, 
 
@@ -2101,7 +2082,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1)
     topology.add(data("variance", variance));
     topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
 
-
     set_values<float>(input, { 
         0.54881352f,
         0.71518934f,
@@ -2109,8 +2089,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1)
         0.60276335f,
         0.54488319f,
 
-
-
         0.42365479f,
         0.64589411f,
 
@@ -2196,7 +2174,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1_different_shapes)
     topology.add(mutable_data("variance", variance));
     topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
 
-
     set_values<float>(input, {
         0.54881352f,
         0.71518934f,
@@ -2204,8 +2181,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1_different_shapes)
         0.60276335f,
         0.54488319f,
 
-
-
         0.42365479f,
         0.64589411f,
 
@@ -2223,8 +2198,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1_different_shapes)
         -0.0963782f,
         -0.434702f,
 
-
-
         -1.4011f,
         0.548275f,
 
@@ -2299,7 +2272,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1_differen
     topology.add(data("variance", variance));
     topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
 
-
     set_values<float>(input, {
         0.54881352f,
         0.71518934f,
@@ -2307,8 +2279,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1_differen
         0.60276335f,
         0.54488319f,
 
-
-
         0.42365479f,
         0.64589411f,
 
@@ -2329,7 +2299,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1_differen
         -0.0963782f,
         -0.434702f,
 
-
         -1.4011f,
         0.548275f,
 
@@ -2394,7 +2363,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c5h2w1_different_shapes)
     topology.add(mutable_data("variance", variance));
     topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
 
-
     set_values<float>(input, {
         0.54881352f,
         0.71518934f,
@@ -2411,8 +2379,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c5h2w1_different_shapes)
         0.54881352f,
         0.71518934f,
 
-
-
         0.42365479f,
         0.64589411f,
 
@@ -2448,9 +2414,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c5h2w1_different_shapes)
         -0.30327f,
         1.1561f,
 
-
-
-
         -1.4011f,
         0.548275f,
 
@@ -2535,7 +2498,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c5h2w1_differen
     topology.add(data("variance", variance));
     topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
 
-
     set_values<float>(input, {
         0.54881352f,
         0.71518934f,
@@ -2552,8 +2514,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c5h2w1_differen
         0.54881352f,
         0.71518934f,
 
-
-
         0.42365479f,
         0.64589411f,
 
@@ -2589,9 +2549,6 @@ TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c5h2w1_differen
         -0.30327f,
         1.1561f,
 
-
-
-
         -1.4011f,
         0.548275f,
 
index f9b820b..d4a625e 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/batch_norm_grad.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/batch_norm_grad.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
 
 using namespace cldnn;
 using namespace tests;
index 1162dea..5412f9f 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/binary_convolution.hpp"
-#include "api/CPP/reorder.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include <api/memory.hpp>
+#include <api/input_layout.hpp>
+#include "api/binary_convolution.hpp"
+#include "api/reorder.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 #include <iostream>
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 #include <src/include/to_string_utils.h>
 #include "float16.h"
 #include "test_utils.h"
@@ -126,7 +126,6 @@ void compute_ref_conv_bin(const cldnn::memory &src,
     int PH = p.ph;
     int PW = p.pw;
 
-
     auto extract_bit = [&](data_t_src val, data_t_src bit) -> data_t_src {
         return (data_t_src)((val >> bit) & 0x1);
     };
@@ -257,7 +256,6 @@ TEST_P(binary_convolution_test, conv)
     std::map<primitive_id, network_output> outputs = network_bin.execute();
     auto outputMemory = outputs.at(output_name).get_memory();
 
-
     for (size_t i = 0; i < output_ref.count(); i++) {
         if (p.dt == data_types::f32)
         {
@@ -414,7 +412,6 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel)
     auto output_layout = output_memory.get_layout();
     auto output_ptr = output_memory.pointer<float>();
 
-
     EXPECT_EQ(output_layout.format, format::bfyx);
     EXPECT_EQ(output_layout.data_type, data_types::f32);
     EXPECT_EQ(output_layout.size.batch[0], 1);
@@ -428,7 +425,6 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel)
     }
 }
 
-
 TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) {
     const auto& engine = get_test_engine();
 
@@ -499,7 +495,6 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) {
     auto output_layout = output_memory.get_layout();
     auto output_ptr = output_memory.pointer<uint16_t>();
 
-
     EXPECT_EQ(output_layout.format, format::bfyx);
     EXPECT_EQ(output_layout.data_type, data_types::f16);
     EXPECT_EQ(output_layout.size.batch[0], 1);
index f1b3da3..03b6200 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/border.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/border.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include "test_utils/test_utils.h"
 #include "test_utils/uniform_quantized_real_distribution.hpp"
 
 #include <cstddef>
 
-
 using namespace cldnn;
 using namespace ::tests;
 
-
 template<typename T>
 static std::vector<T> generate_rnd_real_input(
-    const std::size_t b, const std::size_t f, const std::size_t y, const std::size_t x,
+    const std::vector<size_t> sizes,
     const T min = static_cast<T>(0), const T max = static_cast<T>(1), const unsigned rnd_bits = 9)
 {
     static std::default_random_engine rnd_gen(random_seed);
     cldnn::tests::distributions::uniform_quantized_real_distribution<T> rnd_dist(min, max, rnd_bits);
 
+    auto acum = std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<int>());
+
     std::vector<T> data;
-    data.reserve(b * f * y * x);
-    for (size_t i = 0; i < b * f * y * x; ++i)
+    data.reserve(acum);
+    for (size_t i = 0; i < static_cast<size_t>(acum); ++i)
         data.push_back(rnd_dist(rnd_gen));
 
     return data;
 }
 
-
 TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) {
     //  Input (XY) : 4x3
     //  Output (XY): 10x7
@@ -103,7 +102,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) {
     };
     set_values(input, input_data);
 
-    network network(engine, topology);
+    cldnn::network network(engine, topology);
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -125,6 +124,221 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) {
     }
 }
 
+TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) {
+
+    constexpr auto in_size_b = 1;
+    constexpr auto in_size_f = 1;
+    constexpr auto in_size_y = 2;
+    constexpr auto in_size_x = 2;
+    constexpr auto in_size_z = 3;
+
+    constexpr auto blt_size_b = 0;
+    constexpr auto blt_size_f = 0;
+    constexpr auto blt_size_y = 1;
+    constexpr auto blt_size_x = 0;
+    constexpr auto blt_size_z = 1;
+
+    constexpr auto brb_size_b = 0;
+    constexpr auto brb_size_f = 0;
+    constexpr auto brb_size_y = 0;
+    constexpr auto brb_size_x = 0;
+    constexpr auto brb_size_z = 3;
+
+    constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b;
+    constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f;
+    constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
+    constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
+    constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z;
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } });
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        border("output", "input",
+            { blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z },
+            { brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z },
+            border_type::constant, 0.0f)
+    );
+
+    std::vector<float> input_data = {
+        1, -2,
+        3, -4,
+
+        5, 6,
+        7, 8,
+
+        -10, 12,
+        13, -13,
+    };
+    std::vector<float> out_data = {
+        0, 0,
+        0, 0,
+        0, 0,
+
+        0, 0,
+        1, -2,
+        3,  -4,
+
+        0, 0,
+        5,  6,
+        7,   8,
+
+        0, 0,
+        -10, 12,
+        13, -13,
+
+        0, 0,
+        0, 0,
+        0, 0,
+
+        0, 0,
+        0, 0,
+        0, 0,
+
+        0, 0,
+        0, 0,
+        0, 0,
+    };
+    set_values(input, input_data);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    ASSERT_EQ(out_data.size(), static_cast<std::size_t>(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z));
+
+    uint32_t idx = 0;
+    for (auto b = 0; b < out_size_b; ++b) {             // B
+        for (auto f = 0; f < out_size_f; ++f) {         // F
+            for (auto z = 0; z < out_size_z; ++z) {     // z
+                for (auto y = 0; y < out_size_y; ++y) {     // Y
+                    for (auto x = 0; x < out_size_x; ++x) { // X
+                        EXPECT_EQ(output_ptr[idx], out_data[idx]);
+                        idx++;
+                    }
+                }
+            }
+        }
+    }
+}
+
+TEST(border_gpu, basic_bfwzyx_0x0x0x1x0x1_0x0x0x1x0x1_border_constant) {
+
+    constexpr auto in_size_b = 1;
+    constexpr auto in_size_f = 1;
+    constexpr auto in_size_y = 2;
+    constexpr auto in_size_x = 2;
+    constexpr auto in_size_z = 3;
+    constexpr auto in_size_w = 1;
+
+    constexpr auto blt_size_b = 0;
+    constexpr auto blt_size_f = 0;
+    constexpr auto blt_size_y = 0;
+    constexpr auto blt_size_x = 1;
+    constexpr auto blt_size_z = 0;
+    constexpr auto blt_size_w = 1;
+
+    constexpr auto brb_size_b = 0;
+    constexpr auto brb_size_f = 0;
+    constexpr auto brb_size_y = 0;
+    constexpr auto brb_size_x = 1;
+    constexpr auto brb_size_z = 0;
+    constexpr auto brb_size_w = 1;
+
+    constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b;
+    constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f;
+    constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
+    constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
+    constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z;
+    constexpr auto out_size_w = in_size_w + blt_size_w + brb_size_w;
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } });
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        border("output", "input",
+            tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) },
+            tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) },
+            border_type::constant, 0.0f)
+    );
+
+    std::vector<float> input_data = {
+        1, -2,
+        3, -4,
+
+        5, 6,
+        7, 8,
+
+        -10, 12,
+        13, -13,
+    };
+    std::vector<float> out_data = {
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 1, -2, 0,
+        0, 3, -4, 0,
+
+        0, 5, 6, 0,
+        0, 7, 8, 0,
+
+        0, -10, 12, 0,
+        0, 13, -13, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+
+        0, 0, 0, 0,
+        0, 0, 0, 0,
+    };
+    set_values(input, input_data);
+
+    cldnn::network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    ASSERT_EQ(out_data.size(), static_cast<std::size_t>(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z * out_size_w));
+
+    uint32_t idx = 0;
+    for (auto b = 0; b < out_size_b; ++b) {             // B
+        for (auto f = 0; f < out_size_f; ++f) {         // F
+            for (auto w = 0; w < out_size_w; ++w) {     // z
+                for (auto z = 0; z < out_size_z; ++z) {     // z
+                    for (auto y = 0; y < out_size_y; ++y) {     // Y
+                        for (auto x = 0; x < out_size_x; ++x) { // X
+                            EXPECT_EQ(output_ptr[idx], out_data[idx]);
+                            idx++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
 TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) {
     //  Input (XY) : 4x3
     //  Output (XY): 10x7
@@ -158,8 +372,8 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) {
     );
     topology.add(
         border("output", "input",
-               {blt_size_b, blt_size_f, blt_size_x, blt_size_y},
-               {brb_size_b, brb_size_f, brb_size_x, brb_size_y},
+               tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
+               tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
                border_type::constant, 1.0f)
     );
 
@@ -277,6 +491,159 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) {
     }
 }
 
+TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror) {
+
+    constexpr auto in_size_b = 1;
+    constexpr auto in_size_f = 1;
+    constexpr auto in_size_y = 2;
+    constexpr auto in_size_x = 4;
+    constexpr auto in_size_z = 2;
+
+    constexpr auto blt_size_b = 0;
+    constexpr auto blt_size_f = 0;
+    constexpr auto blt_size_y = 0;
+    constexpr auto blt_size_x = 0;
+    constexpr auto blt_size_z = 1;
+
+    constexpr auto brb_size_b = 0;
+    constexpr auto brb_size_f = 0;
+    constexpr auto brb_size_y = 0;
+    constexpr auto brb_size_x = 0;
+    constexpr auto brb_size_z = 1;
+
+    constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b;
+    constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f;
+    constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
+    constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
+    constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z;
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } });
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        border("output", "input",
+            { blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z },
+            { brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z },
+            border_type::mirror)
+    );
+
+    std::vector<float> input_data = generate_rnd_real_input<float>({in_size_b, in_size_f, in_size_y, in_size_x, in_size_z}, -8.0f, 8.0f);
+    set_values(input, input_data);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (auto b = 0; b < out_size_b; ++b) {             // B
+        for (auto f = 0; f < out_size_f; ++f) {         // F
+            for (auto z = 0; z < out_size_z; ++z) {         // F
+                for (auto y = 0; y < out_size_y; ++y) {     // Y
+                    for (auto x = 0; x < out_size_x; ++x) { // X
+                        auto output_off = (((b * out_size_f + f) * out_size_z + z) * out_size_y + y) * out_size_x + x; // BFZYX
+
+                        auto in_b = (b >= blt_size_b && b < out_size_b - brb_size_b) ? b - blt_size_b : (b < blt_size_b ? blt_size_b - 1 - b : in_size_b + out_size_b - brb_size_b - 1 - b);
+                        auto in_f = (f >= blt_size_f && f < out_size_f - brb_size_f) ? f - blt_size_f : (f < blt_size_f ? blt_size_f - 1 - f : in_size_f + out_size_f - brb_size_f - 1 - f);
+                        auto in_z = (z >= blt_size_z && z < out_size_z - brb_size_z) ? z - blt_size_z : (z < blt_size_z ? blt_size_z - 1 - z : in_size_z + out_size_z - brb_size_z - 1 - z);
+                        auto in_y = (y >= blt_size_y && y < out_size_y - brb_size_y) ? y - blt_size_y : (y < blt_size_y ? blt_size_y - 1 - y : in_size_y + out_size_y - brb_size_y - 1 - y);
+                        auto in_x = (x >= blt_size_x && x < out_size_x - brb_size_x) ? x - blt_size_x : (x < blt_size_x ? blt_size_x - 1 - x : in_size_x + out_size_x - brb_size_x - 1 - x);
+
+                        auto input_off = (((in_b * in_size_f + in_f) * in_size_z + in_z) * in_size_y + in_y) * in_size_x + in_x; // BFZYX
+
+                        EXPECT_EQ(output_ptr[output_off], input_data[input_off]);
+                    }
+                }
+            }
+        }
+    }
+}
+
+TEST(border_gpu, basic_bfzyxw_0x0x0x0x1_0x0x0x0x1_border_mirror) {
+
+    constexpr auto in_size_b = 1;
+    constexpr auto in_size_f = 1;
+    constexpr auto in_size_y = 2;
+    constexpr auto in_size_x = 4;
+    constexpr auto in_size_z = 2;
+    constexpr auto in_size_w = 2;
+
+    constexpr auto blt_size_b = 0;
+    constexpr auto blt_size_f = 0;
+    constexpr auto blt_size_y = 0;
+    constexpr auto blt_size_x = 0;
+    constexpr auto blt_size_z = 1;
+    constexpr auto blt_size_w = 1;
+
+    constexpr auto brb_size_b = 0;
+    constexpr auto brb_size_f = 0;
+    constexpr auto brb_size_y = 0;
+    constexpr auto brb_size_x = 0;
+    constexpr auto brb_size_z = 1;
+    constexpr auto brb_size_w = 1;
+
+    constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b;
+    constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f;
+    constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
+    constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
+    constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z;
+    constexpr auto out_size_w = in_size_w + blt_size_w + brb_size_w;
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } });
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        border("output", "input",
+            tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) },
+            tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) },
+            border_type::mirror)
+    );
+
+    std::vector<float> input_data = generate_rnd_real_input<float>({ in_size_b, in_size_f, in_size_y, in_size_x, in_size_z, in_size_w }, -8.0f, 8.0f);
+    set_values(input, input_data);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (auto b = 0; b < out_size_b; ++b) {             // B
+        for (auto f = 0; f < out_size_f; ++f) {         // F
+            for (auto w = 0; w < out_size_w; ++w) {         // F
+                for (auto z = 0; z < out_size_z; ++z) {         // F
+                    for (auto y = 0; y < out_size_y; ++y) {     // Y
+                        for (auto x = 0; x < out_size_x; ++x) { // X
+                            auto output_off = ((((b * out_size_f + f) * out_size_w + w)* out_size_z + z) * out_size_y + y) * out_size_x + x; // BFZYX
+
+                            auto in_b = (b >= blt_size_b && b < out_size_b - brb_size_b) ? b - blt_size_b : (b < blt_size_b ? blt_size_b - 1 - b : in_size_b + out_size_b - brb_size_b - 1 - b);
+                            auto in_f = (f >= blt_size_f && f < out_size_f - brb_size_f) ? f - blt_size_f : (f < blt_size_f ? blt_size_f - 1 - f : in_size_f + out_size_f - brb_size_f - 1 - f);
+                            auto in_w = (w >= blt_size_w && w < out_size_w - brb_size_w) ? w - blt_size_w : (w < blt_size_w ? blt_size_w - 1 - w : in_size_w + out_size_w - brb_size_w - 1 - w);
+                            auto in_z = (z >= blt_size_z && z < out_size_z - brb_size_z) ? z - blt_size_z : (z < blt_size_z ? blt_size_z - 1 - z : in_size_z + out_size_z - brb_size_z - 1 - z);
+                            auto in_y = (y >= blt_size_y && y < out_size_y - brb_size_y) ? y - blt_size_y : (y < blt_size_y ? blt_size_y - 1 - y : in_size_y + out_size_y - brb_size_y - 1 - y);
+                            auto in_x = (x >= blt_size_x && x < out_size_x - brb_size_x) ? x - blt_size_x : (x < blt_size_x ? blt_size_x - 1 - x : in_size_x + out_size_x - brb_size_x - 1 - x);
+
+                            auto input_off = ((((in_b * in_size_f + in_f) * in_size_w + in_w)* in_size_z + in_z) * in_size_y + in_y) * in_size_x + in_x; // BFZYX
+
+                            EXPECT_EQ(output_ptr[output_off], input_data[input_off]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
 TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) {
     //  Input (XY) : 5x4
     //  Output (XY): 11x8
@@ -302,7 +669,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) {
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
     const auto& engine = get_test_engine();
-    auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    auto input = memory::allocate(engine, {data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
     topology.add(
@@ -310,8 +677,8 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) {
     );
     topology.add(
         border("output", "input",
-               {blt_size_b, blt_size_f, blt_size_x, blt_size_y},
-               {brb_size_b, brb_size_f, brb_size_x, brb_size_y},
+               tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
+               tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
                border_type::mirror_101)
     );
 
@@ -355,6 +722,185 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) {
     }
 }
 
+TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror_101) {
+    constexpr auto in_size_b = 1;
+    constexpr auto in_size_f = 1;
+    constexpr auto in_size_y = 2;
+    constexpr auto in_size_x = 5;
+    constexpr auto in_size_z = 2;
+
+    constexpr auto blt_size_b = 0;
+    constexpr auto blt_size_f = 0;
+    constexpr auto blt_size_y = 0;
+    constexpr auto blt_size_x = 0;
+    constexpr auto blt_size_z = 1;
+
+    constexpr auto brb_size_b = 0;
+    constexpr auto brb_size_f = 0;
+    constexpr auto brb_size_y = 0;
+    constexpr auto brb_size_x = 0;
+    constexpr auto brb_size_z = 1;
+
+    constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b;
+    constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f;
+    constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
+    constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
+    constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z;
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, tensor{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } });
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        border("output", "input",
+            tensor{ blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z },
+            tensor{ brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z },
+            border_type::mirror_101)
+    );
+
+    std::vector<float> input_data = {
+        1, -2,  3,  -4,  4,
+        5,  6,  7,   8, -8,
+
+        -10, 12, 13, -13, 10,
+        -20, 22, 23, -23, 20,
+    };
+    std::vector<float> out_data = {
+        -10, 12, 13, -13, 10,
+        -20, 22, 23, -23, 20,
+        1, -2,  3,  -4,  4,
+        5,  6,  7,   8, -8,
+        -10, 12, 13, -13, 10,
+        -20, 22, 23, -23, 20,
+        1, -2,  3,  -4,  4,
+        5,  6,  7,   8, -8,
+    };
+    set_values(input, input_data);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    ASSERT_EQ(out_data.size(), static_cast<std::size_t>(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z));
+
+    uint32_t idx = 0;
+    for (auto b = 0; b < out_size_b; ++b) {             // B
+        for (auto f = 0; f < out_size_f; ++f) {         // F
+            for (auto z = 0; z < out_size_z; ++z) {         // Z
+                for (auto y = 0; y < out_size_y; ++y) {     // Y
+                    for (auto x = 0; x < out_size_x; ++x) { // X
+                        EXPECT_EQ(output_ptr[idx], out_data[idx]);
+                        idx++;
+                    }
+                }
+            }
+        }
+    }
+}
+
+TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) {
+    constexpr auto in_size_b = 1;
+    constexpr auto in_size_f = 1;
+    constexpr auto in_size_y = 4;
+    constexpr auto in_size_x = 2;
+    constexpr auto in_size_z = 1;
+    constexpr auto in_size_w = 3;
+
+    constexpr auto blt_size_b = 0;
+    constexpr auto blt_size_f = 0;
+    constexpr auto blt_size_y = 0;
+    constexpr auto blt_size_x = 0;
+    constexpr auto blt_size_z = 0;
+    constexpr auto blt_size_w = 1;
+
+    constexpr auto brb_size_b = 0;
+    constexpr auto brb_size_f = 0;
+    constexpr auto brb_size_y = 0;
+    constexpr auto brb_size_x = 0;
+    constexpr auto brb_size_z = 0;
+    constexpr auto brb_size_w = 1;
+
+    constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b;
+    constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f;
+    constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
+    constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
+    constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z;
+    constexpr auto out_size_w = in_size_w + blt_size_w + brb_size_w;
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } });
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        border("output", "input",
+            tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) },
+            tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) },
+            border_type::mirror_101)
+    );
+
+    std::vector<float> input_data = {
+        1, -2,  3,  -4,
+        5,  6,  7,   8,
+
+        2, -3,  4,  -5, 
+        15,  4,  4,   4,
+
+        2, -6,  13,  -14,
+        3,  7,  7,   7, 
+    };
+    std::vector<float> out_data = {
+        2, -3,  4,  -5, 
+        15,  4,  4,   4,
+
+        1, -2,  3,  -4,
+        5,  6,  7,   8,
+
+        2, -3,  4,  -5, 
+        15,  4,  4,   4, 
+
+        2, -6,  13,  -14, 
+        3,  7,  7,   7,
+
+        2, -3,  4,  -5, 
+        15,  4,  4,   4,
+    };
+    set_values(input, input_data);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    ASSERT_EQ(out_data.size(), static_cast<std::size_t>(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z * out_size_w));
+
+    uint32_t idx = 0;
+    for (auto b = 0; b < out_size_b; ++b) {             // B
+        for (auto f = 0; f < out_size_f; ++f) {         // F
+            for (auto w = 0; w < out_size_w; ++w) {         // F
+                for (auto z = 0; z < out_size_z; ++z) {         // Z
+                    for (auto y = 0; y < out_size_y; ++y) {     // Y
+                        for (auto x = 0; x < out_size_x; ++x) { // X
+                            EXPECT_EQ(output_ptr[idx], out_data[idx]);
+                            idx++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
 TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) {
     //  Input (XY) : 5x4
     //  Output (XY): 11x8
@@ -380,7 +926,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) {
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
     const auto& engine = get_test_engine();
-    auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    auto input = memory::allocate(engine, {data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
     topology.add(
@@ -388,8 +934,8 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) {
     );
     topology.add(
         border("output", "input",
-               {blt_size_b, blt_size_f, blt_size_x, blt_size_y},
-               {brb_size_b, brb_size_f, brb_size_x, brb_size_y},
+               tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
+               tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
                border_type::edge)
     );
 
@@ -455,7 +1001,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) {
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
     const auto& engine = get_test_engine();
-    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
     topology.add(
@@ -463,13 +1009,13 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) {
     );
     topology.add(
         border("output", "input",
-               {blt_size_b, blt_size_f, blt_size_x, blt_size_y},
-               {brb_size_b, brb_size_f, brb_size_x, brb_size_y},
+               tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
+               tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
                border_type::constant,
                0.0f)
     );
 
-    std::vector<float> input_data = generate_rnd_real_input<float>(in_size_b, in_size_f, in_size_y, in_size_x, -8.0f, 8.0f);
+    std::vector<float> input_data = generate_rnd_real_input<float>({ in_size_b, in_size_f, in_size_y, in_size_x }, -8.0f, 8.0f);
     set_values(input, input_data);
 
     network network(engine, topology);
@@ -525,7 +1071,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) {
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
     const auto& engine = get_test_engine();
-    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
     topology.add(
@@ -533,12 +1079,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) {
     );
     topology.add(
         border("output", "input",
-               {blt_size_b, blt_size_f, blt_size_x, blt_size_y},
-               {brb_size_b, brb_size_f, brb_size_x, brb_size_y},
+               tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
+               tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
                border_type::mirror)
     );
 
-    std::vector<float> input_data = generate_rnd_real_input<float>(in_size_b, in_size_f, in_size_y, in_size_x, -8.0f, 8.0f);
+    std::vector<float> input_data = generate_rnd_real_input<float>({ in_size_b, in_size_f, in_size_y, in_size_x }, -8.0f, 8.0f);
     set_values(input, input_data);
 
     network network(engine, topology);
@@ -561,7 +1107,6 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) {
 
                     auto input_off  = ((in_b * in_size_f + in_f) * in_size_y + in_y) * in_size_x + in_x; // BFYX
 
-
                     EXPECT_EQ(output_ptr[output_off], input_data[input_off]);
                 }
             }
@@ -591,7 +1136,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) {
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
     const auto& engine = get_test_engine();
-    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
     topology.add(
@@ -599,12 +1144,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) {
     );
     topology.add(
         border("output", "input",
-               {blt_size_b, blt_size_f, blt_size_x, blt_size_y},
-               {brb_size_b, brb_size_f, brb_size_x, brb_size_y},
+               tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
+               tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
                border_type::mirror_101)
     );
 
-    std::vector<float> input_data = generate_rnd_real_input<float>(in_size_b, in_size_f, in_size_y, in_size_x, -8.0f, 8.0f);
+    std::vector<float> input_data = generate_rnd_real_input<float>({ in_size_b, in_size_f, in_size_y, in_size_x }, -8.0f, 8.0f);
     set_values(input, input_data);
 
     network network(engine, topology);
@@ -627,7 +1172,6 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) {
 
                     auto input_off  = ((in_b * in_size_f + in_f) * in_size_y + in_y) * in_size_x + in_x; // BFYX
 
-
                     EXPECT_EQ(output_ptr[output_off], input_data[input_off]);
                 }
             }
@@ -657,7 +1201,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) {
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
     const auto& engine = get_test_engine();
-    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
     topology.add(
@@ -665,12 +1209,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) {
     );
     topology.add(
         border("output", "input",
-               {blt_size_b, blt_size_f, blt_size_x, blt_size_y},
-               {brb_size_b, brb_size_f, brb_size_x, brb_size_y},
+               tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
+               tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
                border_type::edge)
     );
 
-    std::vector<float> input_data = generate_rnd_real_input<float>(in_size_b, in_size_f, in_size_y, in_size_x, -8.0f, 8.0f);
+    std::vector<float> input_data = generate_rnd_real_input<float>({ in_size_b, in_size_f, in_size_y, in_size_x }, -8.0f, 8.0f);
     set_values(input, input_data);
 
     network network(engine, topology);
@@ -693,7 +1237,6 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) {
 
                     auto input_off  = ((in_b * in_size_f + in_f) * in_size_y + in_y) * in_size_x + in_x; // BFYX
 
-
                     EXPECT_EQ(output_ptr[output_off], input_data[input_off]);
                 }
             }
index 6fbe040..216366b 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/broadcast.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/broadcast.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include "test_utils/test_utils.h"
 #include "test_utils/uniform_quantized_real_distribution.hpp"
 
 #include <cstddef>
 
-
 using namespace cldnn;
 using namespace ::tests;
 
@@ -1015,7 +1014,6 @@ TEST(broadcast_gpu_uint8_t, bfyx_2_to_2x3x4x5_w_b_axes_1_2_3) {
     start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2}, {1, 2, 3}, golden_data);
 }
 
-
 TEST(broadcast_gpu, basic_error_wrong_b_axes_size) {
 
     const auto& engine = get_test_engine();
index 175cc0f..778f201 100644 (file)
 */
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
 #include "test_utils/test_utils.h"
-#include "api/CPP/arg_max_min.hpp"
+#include "api/arg_max_min.hpp"
 
 using namespace cldnn;
 using namespace tests;
@@ -57,7 +57,6 @@ unsigned long GetMilliseconds(void)
 }
 #endif
 
-
 // Run some topology too see if command queue does work correctly
 // Coppied from arg_max_gpu.base test.
 void exexute_network(cldnn::engine engine)
index 09e299e..f855ba9 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/concatenation.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/pooling.hpp>
-#include <api/CPP/condition.hpp>
-#include <api/CPP/softmax.hpp>
-#include <api/CPP/scale.hpp>
-#include <api/CPP/data.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/concatenation.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/pooling.hpp>
+#include <api/condition.hpp>
+#include <api/softmax.hpp>
+#include <api/scale.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 
 #include <cstddef>
 
-
 using namespace cldnn;
 using namespace ::tests;
 
-
 bool is_output_equal(const cldnn::memory& mem, const std::vector<float>& ref)
 {
     auto ptr = mem.pointer<float>();
@@ -63,8 +61,7 @@ topology generate_simple_branch (bool branch_true_false, const primitive_id& inp
     return branch;
 }
 
-
-TEST(condition_gpu, basic_equal_comp) {
+TEST(DISABLED_condition_gpu, basic_equal_comp) {
     const auto& engine = get_test_engine();
     build_options bs;
     bs.set_option(build_option::optimize_data(true));
@@ -116,7 +113,7 @@ TEST(condition_gpu, basic_equal_comp) {
 
 }
 
-TEST(condition_gpu, basic_range_equal_comp) {
+TEST(DISABLED_condition_gpu, basic_range_equal_comp) {
 
     const auto& engine = get_test_engine();
     build_options bs;
@@ -335,7 +332,7 @@ TEST(DISABLED_condition_gpu, generic_test_true_false) {
     }
 }
 
-TEST(condition_gpu, basic_stacked_ifs) {
+TEST(DISABLED_condition_gpu, basic_stacked_ifs) {
 
     /*   
         <prims...>
@@ -355,16 +352,15 @@ TEST(condition_gpu, basic_stacked_ifs) {
     auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
     auto compare2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
 
-
     topology condi_1_true = generate_simple_branch(true, "condi");
     topology condi_1_false = generate_simple_branch(false, "condi");
     topology condi_2_true;
     condi_2_true.add(
-        activation("activ_when_true", "condi2", cldnn_activation_func::activation_log2)
+        activation("activ_when_true", "condi2", activation_func::log2)
     );
     topology condi_2_false;
     condi_2_false.add(
-        activation("activ_when_false", "condi2", cldnn_activation_func::activation_relu)
+        activation("activ_when_false", "condi2", activation_func::relu)
     );
 
     topology topology;
@@ -407,7 +403,7 @@ TEST(condition_gpu, basic_stacked_ifs) {
     EXPECT_TRUE(is_output_equal(out_data, {1.0f, 2.0f}));
 }
 
-TEST(condition_gpu, basic_nested_ifs) {
+TEST(DISABLED_condition_gpu, basic_nested_ifs) {
 
     /*
     <prims...>
@@ -431,7 +427,6 @@ TEST(condition_gpu, basic_nested_ifs) {
     auto scale_10_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
     set_values(scale_10_mem, { 10.0f });
 
-
     topology nested_true;
     {
         nested_true.add(scale("scale_5", "condi_nested", "scale_5_data"),
@@ -502,8 +497,7 @@ TEST(condition_gpu, basic_nested_ifs) {
     EXPECT_TRUE(is_output_equal(out_data, { 10.0f, 20.0f }));
 }
 
-
-TEST(condition_gpu, negative_compare_wrong_layout) {
+TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) {
     const auto& engine = get_test_engine();
     build_options bs;
     bs.set_option(build_option::optimize_data(true));
@@ -527,7 +521,7 @@ TEST(condition_gpu, negative_compare_wrong_layout) {
     EXPECT_ANY_THROW(network net(engine, topology, bs););
 }
 
-TEST(condition_gpu, negative_too_big_offset) {
+TEST(DISABLED_condition_gpu, negative_too_big_offset) {
     const auto& engine = get_test_engine();
     build_options bs;
     bs.set_option(build_option::optimize_data(true));
@@ -551,7 +545,7 @@ TEST(condition_gpu, negative_too_big_offset) {
     EXPECT_ANY_THROW(network net(engine, topology, bs););
 }
 
-TEST(condition_gpu, negative_not_same_layouts) {
+TEST(DISABLED_condition_gpu, negative_not_same_layouts) {
     const auto& engine = get_test_engine();
     build_options bs;
     bs.set_option(build_option::optimize_data(true));
@@ -582,7 +576,7 @@ TEST(condition_gpu, negative_not_same_layouts) {
     EXPECT_ANY_THROW(network net(engine, topology, bs););
 }
 
-TEST(condition_gpu, negative_same_names_within_different_networks) {
+TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) {
     const auto& engine = get_test_engine();
     build_options bs;
     bs.set_option(build_option::optimize_data(true));
@@ -614,4 +608,4 @@ TEST(condition_gpu, negative_same_names_within_different_networks) {
     );
     
     EXPECT_ANY_THROW(network net(engine, topology, bs););
-}
\ No newline at end of file
+}
index 1a2c671..05b4d57 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/contract.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/contract.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include "test_utils/test_utils.h"
 #include "test_utils/uniform_quantized_real_distribution.hpp"
@@ -189,7 +189,6 @@ void generic_contract_test_float(cldnn::format test_input_fmt, int input_b, int
     EXPECT_EQ(f_size, (int)output_cpu[0].size());
     EXPECT_EQ(b_size, (int)output_cpu.size());
 
-
     bool test_is_correct = true;
     VF<T> output_cpu_vec = flatten_4d<T>(test_input_fmt, output_cpu);
     for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
@@ -251,7 +250,6 @@ void generic_contract_test_int(cldnn::format test_input_fmt, int input_b, int in
     EXPECT_EQ(f_size, (int)output_cpu[0].size());
     EXPECT_EQ(b_size, (int)output_cpu.size());
 
-
     bool test_is_correct = true;
     VF<T> output_cpu_vec = flatten_4d<T>(test_input_fmt, output_cpu);
 
index 7c423c6..5c83855 100644 (file)
 
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/convolution.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/convolution.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 #include "test_utils/float16.h"
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 #include <algorithm>
 #include <cmath>
 #include <iostream>
 #include <thread>
 #include <type_traits>
 #include <fstream>
-#include <api/CPP/reorder.hpp>
+#include <api/reorder.hpp>
 
 using namespace cldnn;
 using namespace tests;
 
-
 namespace cldnn
 {
     template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
 }
 
-
-
 template<typename T>
 T kahan_summation(std::vector<T> &input) {
     T sum = 0;
@@ -63,14 +60,16 @@ T kahan_summation(std::vector<T> &input) {
 template<typename T>
 VVF<T> reference_convolve(VVVF<T> &input, VVVF<T> &filter, int stride_y, int stride_x, float bias, int dilation_y = 1, int dilation_x = 1,
         int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0,
-        int output_padding_x = 0, size_t f_begin = 0)
+        int output_padding_x = 0, size_t f_begin = 0, size_t f_end = 0, bool depthwise = false)
 {
     size_t kernel_extent_y = dilation_y * (filter[0].size() - 1) + 1;
     size_t kernel_extent_x = dilation_x * (filter[0][0].size() - 1) + 1;
     size_t output_y = 1 + (input[0].size() - kernel_extent_y + 2 * input_padding_y) / stride_y + 2 * output_padding_y;
     size_t output_x = 1 + (input[0][0].size() - kernel_extent_x + 2 * input_padding_x) / stride_x + 2 * output_padding_x;
     VVF<T> output(output_y, VF<T>(output_x, 0));
-    for (size_t f = 0; f < filter.size(); ++f) {
+    size_t filter_begin = f_begin ? f_begin : 0;
+    size_t filter_end = f_end ? f_end : filter.size();
+    for (size_t f = filter_begin; f < filter_end; ++f) {
         for (size_t y = 0; y < (output_y - 2 * output_padding_y); ++y) {
             for (size_t x = 0; x < (output_x - 2 * output_padding_x); ++x) {
                 VF<T> values;
@@ -81,7 +80,10 @@ VVF<T> reference_convolve(VVVF<T> &input, VVVF<T> &filter, int stride_y, int str
                     for (size_t xf = 0; xf < filter[0][0].size(); ++xf) {
                         int xi = -input_padding_x + (int)xf * dilation_x + stride_x * (int)x;
                         if (xi < 0 || (int)input[0][0].size() <= xi) continue;
-                        values.push_back(input[f_begin + f][yi][xi] * filter[f][yf][xf]);
+                        if (!depthwise)
+                            values.push_back(input[f][yi][xi] * filter[f][yf][xf]);
+                        else
+                            values.push_back(input[f][yi][xi] * filter[0][yf][xf]);
                     }
                 }
                 output[y + output_padding_y][x + output_padding_x] += kahan_summation<T>(values);
@@ -147,7 +149,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 4, 4 } });
     auto trans = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 18, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 4, 3, 3 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } });
 
     set_values(input, { 0.680375f, -0.211234f, 0.566198f, 0.59688f, 0.823295f, -0.604897f, -0.329554f, 0.536459f,
                         -0.444451f, 0.10794f, -0.0452059f, 0.257742f, -0.270431f, 0.0268018f, 0.904459f, 0.83239f,
@@ -279,7 +281,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1)
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 4, 4 } });
     auto trans = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 18, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 4, 3, 3 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } });
 
     set_values(input, { 0.680375f, -0.211234f, 0.566198f, 0.59688f, 0.823295f, -0.604897f, -0.329554f, 0.536459f,
                         -0.444451f, 0.10794f, -0.0452059f, 0.257742f, -0.270431f, 0.0268018f, 0.904459f, 0.83239f,
@@ -411,7 +413,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) {
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 4, 4 } });
     auto trans = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 36, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 4, 3, 3 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } });
 
     set_values(input, { 0.680375f, -0.211234f, 0.566198f, 0.59688f, 0.823295f, -0.604897f, -0.329554f, 0.536459f,
                         -0.444451f, 0.10794f, -0.0452059f, 0.257742f, -0.270431f, 0.0268018f, 0.904459f, 0.83239f,
@@ -1020,7 +1022,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
         for (int z = 0; z < z_size; ++z) {
             for (int y = 0; y < y_size; ++y) {
                 for (int x = 0; x < x_size; ++x) {
-                    int i = f * z_size * y_size * x_size + z * y_size * x_size + y * x_size + x;
                     EXPECT_EQ(output_vec[f][z][y][x],
                         output_ptr[f * z_size * y_size * x_size + z * y_size * x_size + y * x_size + x]);
                 }
@@ -1034,7 +1035,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) {
     const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 1, 2, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1, 1 } });
 
     set_values(input, {
         1.0f,  0.0f,  1.0f,  0.0f,
@@ -1153,7 +1154,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) {
         for (int z = 0; z < z_size; ++z) {
             for (int y = 0; y < y_size; ++y) {
                 for (int x = 0; x < x_size; ++x) {
-                    int i = f * z_size * y_size * x_size + z * y_size * x_size + y * x_size + x;
                     EXPECT_EQ(output_vec[f][z][y][x],
                         output_ptr[f * z_size * y_size * x_size + z * y_size * x_size + y * x_size + x]);
                 }
@@ -1203,7 +1203,6 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) {
     //  8  8   8  8
     //  8  8   8  8
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,2} });
@@ -1212,7 +1211,6 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) {
     set_values(input, { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
     set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
 
-
     topology topology(
         input_layout("input", input.get_layout()),
         data("weights", weights),
@@ -1250,7 +1248,6 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) {
     }
 }
 
-
 TEST(convolution_f32_fw_gpu, basic_convolution) {
     //  Filter : 2x3
     //  Stride : 2x1
@@ -1448,8 +1445,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) {
             { 1,1,1,1 },
             { 0,0,-1,-2 },
             { 1, 1, 1, 1 },
-            false,
-            0,
             padding{ { 0,0,0,0 }, 0 })
     );
 
@@ -1552,8 +1547,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) {
             { 1, 1, 1, 1 },
             { 0,0,1,2 },
             { 0,0,1,2 },
-            false,
-            0,
             padding{ { 0,0,0,0 }, 0 })
     );
 
@@ -1651,8 +1644,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) {
             { 1, 1, 1, 1 },
             { 0,0,1,2 },
             { 0,0,2,3 },
-            false,
-            0,
             padding{ { 0,0,0,0 }, 0 })
     );
 
@@ -1760,8 +1751,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_input_offs
             { 1, 1, 1, 1 },
             { 0,0,1,2 },
             { 0,0,1,2 },
-            false,
-            0,
             padding{ { 0,0,0,0 }, 0 })
     );
 
@@ -1872,8 +1861,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_input_off
             { 1, 1, 1, 1 },
             { 0,0,1,2 },
             { 0,0,2,3 },
-            false,
-            0,
             padding{ { 0,0,0,0 }, 0 })
     );
 
@@ -1972,8 +1959,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
             { 1,1,1,1 },
             { 0,0,-1,-2 },
             { 1, 1, 1, 1 },
-            false,
-            0,
             padding{ { 0,0,-x_pad,-y_pad }, 0 })
     );
 
@@ -2293,7 +2278,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) {
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, { 1.0f, 2.0f });
     set_values(weights, { 1.0f, 2.0f, -1.0f, -2.0f });
@@ -2352,7 +2337,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) {
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
 
     set_values(input, { 1.0f, 3.0f, 2.0f, 4.0f });
     set_values(weights, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f });
@@ -2408,7 +2393,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) {
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
 
     set_values(input, { -2.3f, -0.1f, 3.1f, 1.9f });
     set_values(weights, { -1.1f, 1.5f, 0.5f, -0.5f, 0.1f, 0.2f, 0.4f, 0.7f, 2.0f, -1.0f, 2.5f, -1.5f });
@@ -2539,8 +2524,6 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
             { 1,1,2,2 },
             { 0,0,-1,-1 },
             { 1, 1, 1, 1 },
-            false,
-            0,
             padding{ { 0,0,1,1 }, 0 })
     );
 
@@ -2767,7 +2750,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) {
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, {
         -0.5f,  0.5f,  1.0f,  1.5f,  0.5f,  2.3f,  2.0f, -0.4f,
@@ -2824,7 +2807,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx)
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, {
         -0.5f,  0.5f,  1.0f,  1.5f,  0.5f,  2.3f,  2.0f, -0.4f,
@@ -2882,7 +2865,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) {
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, {
         -0.5f, -0.5f,  0.5f,  0.5f,  1.0f,  1.0f,  1.5f,  1.5f,  0.5f,  0.5f,  2.3f,  2.3f,  2.0f,  2.0f, -0.4f, -0.4f,
@@ -3169,7 +3152,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
     topology topology(input_layout("input", input.get_layout()));
 
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
 
     set_values(weights,
         {
@@ -3263,7 +3246,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
     topology topology(input_layout("input", input.get_layout()));
 
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
 
     set_values(weights,
         {
@@ -3372,9 +3355,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_nopad_split2) {
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
     auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
-    auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
+    auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
-    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
+    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, {
        1.5f, 0.5f, 0.0f, -0.5f
@@ -3448,15 +3431,14 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
     //   1
     //   3.5
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
     auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
-    auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
+    auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
-    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
+    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, {
         1.5f, 0.5f
@@ -3536,15 +3518,14 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no
     //   6
     //  -2
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 6 } });
     auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
-    auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
+    auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
-    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
+    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
 
     set_values(input, {
         1.5f, 0.5f, 2.0f, -1.0f
@@ -3641,9 +3622,12 @@ TEST(convolution_gpu, trivial_convolution_relu) {
             { "biases" },
             { 1,1,2,2 },
             { 0,0,0,0 },
-            { 1, 1, 1, 1 },
-            true,
-            0)
+            { 1, 1, 1, 1 }),
+        activation(
+            "out",
+            "conv",
+            activation_func::relu
+        )
     );
 
     network network(engine, topology);
@@ -3651,7 +3635,7 @@ TEST(convolution_gpu, trivial_convolution_relu) {
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "conv");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
     auto output_prim = outputs.begin()->second.get_memory();
 
@@ -3715,9 +3699,13 @@ TEST(convolution_gpu, relu_with_negative_slope) {
             { "biases" },
             { 1,1,2,2 },
             { 0,0,0,0 },
-            { 1, 1, 1, 1 },
-            true,
-            0.1f)
+            { 1, 1, 1, 1 }),
+        activation(
+            "out",
+            "conv",
+            activation_func::relu_negative_slope,
+            {0.1f, 0.0f}
+        )
     );
 
     network network(engine, topology);
@@ -3725,7 +3713,7 @@ TEST(convolution_gpu, relu_with_negative_slope) {
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "conv");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
     auto output_prim = outputs.begin()->second.get_memory();
 
@@ -3836,11 +3824,10 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
     auto input = memory::allocate(engine, { data_types::f32, input_format, input_size });
     auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
     auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size });
-    auto biases = memory::allocate(engine, { data_types::f32, biases_format, {1,1,output_feature_count,1}});
+    auto biases = memory::allocate(engine, { data_types::f32, biases_format, {1,output_feature_count,1,1}});
 
     //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
 
-
     // input:
     std::vector<float> input_vals_template {
         0.25f, 0.50f, 0.75f, 1.00f,
@@ -3864,7 +3851,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
     }
     set_values(input, input_vals);
 
-
     // weights:
     std::vector<float> weights_vals_template {
         -4.0f, -2.0f,
@@ -3899,7 +3885,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
 #endif
     set_values(weights, weights_vals);
 
-
     // biases:
     std::vector<float> biases_vals;
     biases_vals.reserve(output_feature_count);
@@ -3909,7 +3894,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
     }
     set_values(biases, biases_vals);
 
-
     // output:
     std::vector<float> output_vals_template {
          9.0f, 10.0f,
@@ -3947,9 +3931,13 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
             { "biases" },
             { 1,1,stride_x,stride_y },
             { 0,0,0,0 },
-            { 1, 1, 1, 1 },
-            true,
-            0.1f)
+            { 1, 1, 1, 1 }),
+            activation(
+                "out",
+                "conv",
+                activation_func::relu,
+                { 0.1f, 0.0f }
+            )
     );
 
     network network(engine, topology);
@@ -3957,7 +3945,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "conv");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
     auto output_prim = outputs.begin()->second.get_memory();
 
@@ -4076,7 +4064,7 @@ void add_primitives(const engine& engine, topology& topology)
 
     std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
     set_values<char>(weights, weights_values);
-    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
     auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
     set_values(biases, { 1.0f, -8.0f });
 
@@ -4084,7 +4072,9 @@ void add_primitives(const engine& engine, topology& topology)
         data("weights", weights),
         data("biases", biases),
         data("w_qfs", weigths_qfs),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 0, 0, 1, 2 }, { 0, 0, 0, 0 }, { 1, 1, 1, 1 }, true));
+        convolution("conv", "input", { "weights" }, { "biases" }, { 0, 0, 1, 2 }, { 0, 0, 0, 0 }, { 1, 1, 1, 1 }),
+        activation( "out", "conv", activation_func::relu)
+    );
 }
 
 TEST(convolution_f32_fw_gpu, byte_activation) {
@@ -4142,9 +4132,9 @@ TEST(convolution_f32_fw_gpu, byte_activation) {
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
-    EXPECT_EQ(outputs.begin()->first, "conv");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
-    auto output_memory = outputs.at("conv").get_memory();
+    auto output_memory = outputs.at("out").get_memory();
     auto output_layout = output_memory.get_layout();
     auto output_ptr = output_memory.pointer<char>();
 
@@ -4198,7 +4188,7 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_low_prec_single_ofq) {
 
     auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
-    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
     auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
 
     std::vector<float> weights_values_f = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 19.0, 17.0, -1.0, -10.0, 32.0, 23.0 };
@@ -4280,7 +4270,6 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_low_prec_single_ofq) {
         }
 }
 
-
 TEST(convolution_f32_fw_gpu, quantized_convolution_high_prec_calib_per_ofm) {
     //  Filter : 2x3
     //  Stride : 2x1
@@ -4313,7 +4302,7 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_high_prec_calib_per_ofm) {
 
     auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
-    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
     auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
     auto output_calibrations = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
 
@@ -4451,7 +4440,7 @@ protected:
 
         auto input_shape = tensor(1, n_features, 4, 1);
         auto weights_shape = tensor(n_features, n_features, 3, 1);
-        auto biases_shape = tensor(1, 1, n_features, 1);
+        auto biases_shape = tensor(1, n_features, 1, 1);
 
         auto input = memory::allocate(
             engine,
@@ -4500,15 +4489,15 @@ protected:
                                       type_to_data_type<OutputTy>::value,
                                       {1, 1, 1, 1},
                                       {0, 0, 0, 0},
-                                      {1, 1, 1, 1},
-                                      true));
+                                      {1, 1, 1, 1}),
+            activation("out", "conv", activation_func::relu));
 
         network network(engine, topology, opts);
         network.set_input_data("input", input);
 
         auto outputs = network.execute();
 
-        auto output_memory = outputs.at("conv").get_memory();
+        auto output_memory = outputs.at("out").get_memory();
         auto output_layout = output_memory.get_layout();
         auto output_ptr = output_memory.pointer<OutputTy>();
         int y_size = output_layout.size.spatial[1];
@@ -4682,10 +4671,10 @@ TEST(convolution_f32_fw_gpu, calibration_advance) {
 
     auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
     auto w_qf = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
     auto weights_f_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 3, 2 } });
-    auto biases_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+    auto biases_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
     auto w_qf_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
 
     std::vector<float> weights_values_f = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.9f, 1.7f, -1.0f, -1.0f, 3.2f, 2.3f };
@@ -4854,7 +4843,6 @@ TEST(convolution_f32_fw_gpu, local_basic) {
         EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
 }
 
-
 TEST(convolution_f32_fw_gpu, local_multi_out_features) {
     //  Filter : 3x1x3x3x2x2 - local sizes
     //  Stride : 1x1
@@ -4916,7 +4904,7 @@ TEST(convolution_f32_fw_gpu, local_multi_out_features) {
     tensor local_size = tensor(3,1,2,2,3,3);
     auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
     auto weights_f = memory::allocate(engine, { data_types::f32, format::bf_lyx_yx, local_size });
-    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
 
     std::vector<float> weights_values_f = {
         0.0, 0.0, 0.0, 0.0,
@@ -5130,7 +5118,6 @@ TEST(convolution_f32_fw_gpu, local_multi_input_features) {
         EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
 }
 
-
 TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
 {
 #define USE_OLD_WEIGHTS_FORMAT 0
@@ -5144,7 +5131,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
         return;
     }
 
-
     const auto input_format   = format::yxfb;
 #if USE_OLD_WEIGHTS_FORMAT
     const auto weights_format = format::bfyx;
@@ -5168,12 +5154,11 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
     const int32_t output_x = (input_x - weights_x) / stride_x + 1;
     const int32_t output_y = (input_y - weights_y) / stride_y + 1;
 
-
     auto input_size = tensor( batch_size, input_feature_count, input_x, input_y );
     auto input = memory::allocate(engine, { data_types::f32, input_format, input_size });
     auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
     auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size });
-    auto biases_size = tensor( 1,1,output_feature_count,1 );
+    auto biases_size = tensor( 1,output_feature_count,1,1 );
     auto biases = memory::allocate(engine, { data_types::f32, biases_format, biases_size });
     auto output_size = tensor( batch_size, output_feature_count, output_x, output_y );
     //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
@@ -5183,7 +5168,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
     //auto biases_cvtd = memory::allocate(engine, { data_types::f16, biases_size });
     //auto output_cvtd  = memory::allocate({output_cvt_format, {batch_size, {output_x, output_y}, output_feature_count}});
 
-
     // input:
     std::vector<float> input_vals_template {
         0.25f, 0.50f, 0.75f, 1.00f,
@@ -5207,7 +5191,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
     }
     set_values(input, input_vals);
 
-
     // weights:
     std::vector<float> weights_vals_template {
         -0.50f, -0.25f,
@@ -5242,7 +5225,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
 #endif
     set_values(weights, weights_vals);
 
-
     // biases:
     std::vector<float> biases_vals;
     biases_vals.reserve(output_feature_count);
@@ -5252,7 +5234,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
     }
     set_values(biases, biases_vals);
 
-
     // output:
     std::vector<float> output_vals_template {
         1.125f,  1.250f,
@@ -5289,7 +5270,6 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
 //
 //    auto expected_ptr = expected.as<const memory&>().pointer<float>();
 
-
     // Computing convolution.
     topology topology(
         input_layout("input", input.get_layout()),
@@ -5347,6 +5327,15 @@ using TestParamType_convolution_gpu = ::testing::tuple<int,   // 0 - Filter size
                                                        int,   // 3 - Output padding
                                                        bool>; // 4 - With bias
 
+
+using TestParamType_convolution_depthwise_gpu = ::testing::tuple<int,   // 0 - Input XY size
+        int,   // 1 - Kernel sizeY
+        int,   // 2 - Kernel sizeX
+        int,   // 3 - Groups number
+        int,   // 4 - Stride
+        int,   // 5 - Output padding
+        bool>; // 6 - With bias
+
 struct convolution_gpu : public ::testing::TestWithParam<TestParamType_convolution_gpu>
 {
     static std::string
@@ -5362,6 +5351,23 @@ struct convolution_gpu : public ::testing::TestWithParam<TestParamType_convoluti
     }
 };
 
+struct convolution_depthwise_gpu : public ::testing::TestWithParam<TestParamType_convolution_depthwise_gpu>
+{
+    static std::string
+    PrintToStringParamName(testing::TestParamInfo<TestParamType_convolution_depthwise_gpu> param_info)
+    {
+        // construct a readable name
+        return "in" + std::to_string(testing::get<0>(param_info.param))
+               + "x" + std::to_string(testing::get<0>(param_info.param))
+               + "_k" + std::to_string(testing::get<1>(param_info.param))
+               + 'x' + std::to_string(testing::get<2>(param_info.param))
+               + "_f" + std::to_string(testing::get<3>(param_info.param))
+               + "_stride" + std::to_string(testing::get<4>(param_info.param))
+               + "_pad" + std::to_string(testing::get<5>(param_info.param))
+               + (testing::get<6>(param_info.param) ? "_bias" : "");
+    }
+};
+
 TEST_P(convolution_gpu, b_fs_yx_fsv4)
 {
     const int in_B = 2;
@@ -5442,8 +5448,8 @@ TEST_P(convolution_gpu, b_fs_yx_fsv4)
                 x = 0.3f;
             return x;
         });
-        auto bias_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
-        auto bias_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
+        auto bias_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, _OuD, 1, 1}});
+        auto bias_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, _OuD, 1, 1}});
         auto callib_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
         auto callib_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
         auto quant_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
@@ -5529,7 +5535,7 @@ TEST_P(convolution_gpu, b_fs_yx_fsv4)
 }
 
 // Select particular test cases
-INSTANTIATE_TEST_CASE_P(convolution_gpu_imad,
+INSTANTIATE_TEST_CASE_P(DISABLED_convolution_gpu_imad,
                         convolution_gpu,
                         ::testing::Values(
                             // Filter size, Input features, Stride, Output padding, With bias
@@ -5615,7 +5621,7 @@ TEST_P(convolution_gpu, fs_byx_fsv32)
     if (with_bias)
     {
         // Generate bias data
-        auto biases_size = tensor(1, 1, output_f, 1);
+        auto biases_size = tensor(1, output_f, 1, 1);
         auto biases_data = generate_random_1d<FLOAT16>(output_f, -1, 1);
         auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size });
         set_values(biases_mem, biases_data);
@@ -5699,6 +5705,147 @@ TEST_P(convolution_gpu, fs_byx_fsv32)
                 }
 }
 
+INSTANTIATE_TEST_CASE_P(convolution_depthwise_gpu,
+                        convolution_depthwise_gpu,
+                        ::testing::Values(
+                                // Input size, Filter size Y, Filter size X, groups, Stride, Output padding, With bias
+                                // Stride testing
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 32, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 32, 2, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 32, 3, 0, false),
+                                // Different Features testing
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 16, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 20, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 25, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 33, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 35, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 45, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 65, 1, 0, false),
+                                // Different filter's sizes testing
+                                TestParamType_convolution_depthwise_gpu(5, 3, 2, 16, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 1, 16, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 2, 3, 16, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 1, 3, 16, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 2, 16, 2, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 1, 16, 2, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 2, 3, 16, 2, 0, false),
+                                TestParamType_convolution_depthwise_gpu(5, 1, 3, 16, 2, 0, false),
+                                // Input FeatureMap testing
+                                TestParamType_convolution_depthwise_gpu(20, 3, 3, 50, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(30, 3, 3, 50, 1, 0, false),
+                                TestParamType_convolution_depthwise_gpu(55, 3, 3, 50, 1, 0, false),
+                                // Output padding testing + strides
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 32, 1, 1, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 32, 2, 2, false),
+                                TestParamType_convolution_depthwise_gpu(5, 3, 3, 32, 3, 3, false)
+                                ),
+                        convolution_depthwise_gpu::PrintToStringParamName);
+
+TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32)
+{
+    const auto& engine = get_test_engine();
+
+    if (!engine.get_info().supports_fp16)
+    {
+        std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl;
+        EXPECT_EQ(1, 1);
+        return;
+    }
+
+    const int batch_num = 2;
+    const int input_xy = testing::get<0>(GetParam());
+    const int groups = testing::get<3>(GetParam());
+    const int input_f = groups;
+    const int output_f = groups;
+    const int filter_y = testing::get<1>(GetParam());
+    const int filter_x = testing::get<2>(GetParam());
+    const int stride = testing::get<4>(GetParam());
+    const int output_padding = testing::get<5>(GetParam());
+    const bool with_bias = testing::get<6>(GetParam());
+    const int input_offset_y = -(filter_y / 2);
+    const int input_offset_x = -(filter_x / 2);
+
+    const int output_y = 1 + (input_xy + 2 * (-input_offset_y) - filter_y) / stride + 2 * output_padding;
+    const int output_x = 1 + (input_xy + 2 * (-input_offset_x) - filter_x) / stride + 2 * output_padding;
+
+    auto input_size = tensor(batch_num, input_f, input_xy, input_xy);
+    auto input_data = generate_random_4d<FLOAT16>(batch_num, input_f, input_xy, input_xy, -1, 1);
+    auto input_data_bfyx = flatten_4d(format::bfyx, input_data);
+    auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size });
+    set_values(input_mem, input_data_bfyx);
+
+    auto weights_size = tensor(output_f, 1, filter_x, filter_y);
+    auto weights_data = generate_random_4d<FLOAT16>(output_f, 1, filter_y, filter_x, -1, 1);
+    auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
+    auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size });
+    set_values(weights_mem, weights_data_bfyx);
+
+    // Will be used to store reference values calculated in branches depending on bias
+    auto reference_result = VVVVF<FLOAT16>(batch_num, VVVF<FLOAT16>(output_f));
+
+    topology topology(
+            input_layout("input", input_mem.get_layout()),
+            data("weights_fsv", weights_mem));
+
+    // Reorder input to fs_byx_fsv32
+    topology.add(reorder("input_fsv", "input", { data_types::f16, format::fs_b_yx_fsv32, input_size }));
+
+    // Calculate reference values without bias
+    for (auto bi = 0; bi < batch_num; ++bi)
+    {
+        for (auto ofi = 0; ofi < output_f; ++ofi)
+        {
+            reference_result[bi][ofi] = reference_convolve(
+                    input_data[bi], weights_data[ofi],  // input, weights
+                    stride, stride,                     // strides
+                    0,                                  // bias
+                    1, 1,                               // dilation
+                    -input_offset_y, -input_offset_x,   // input padding
+                    output_padding, output_padding,     // output_padding
+                    ofi, ofi + 1,                       // f_begin, f_end
+                    true);                              // depthwise
+        }
+    }
+
+    auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, groups,
+                                { 1, 1, stride, stride }, { 0, 0, input_offset_x, input_offset_y });
+    conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
+
+    topology.add(conv_fsv);
+
+    build_options options;
+    options.set_option(build_option::optimize_data(true));
+    network network(engine, topology, options);
+
+    network.set_input_data("input", input_mem);
+
+    network.execute();
+
+    auto out_mem = network.get_output("conv_fsv").get_memory();
+    auto out_ptr = out_mem.pointer<FLOAT16>();
+
+    ASSERT_EQ(out_mem.get_layout().format, format::fs_b_yx_fsv32);
+
+    for (int bi = 0; bi < batch_num; ++bi)
+        for (int fi = 0; fi < output_f; ++fi)
+            for (int yi = 0; yi < output_y; ++yi)
+                for (int xi = 0; xi < output_x; ++xi)
+                {
+                    auto val_ref = reference_result[bi][fi][yi][xi];
+                    auto val = out_ptr[(fi / 32) * batch_num * output_y * output_x * 32 +
+                                       bi * output_y * output_x * 32 +
+                                       yi * output_x * 32 +
+                                       xi * 32 +
+                                       fi % 32];
+                    auto equal = are_equal(val_ref, val, 1e-2f);
+                    EXPECT_TRUE(equal);
+                    if (!equal)
+                    {
+                        std::cout << "At b = " << bi << ", fi = " << fi << ", yi = " << yi << ", xi = " << xi << std::endl;
+                    }
+                }
+}
+
 class convolution_test : public tests::generic_test
 {
 
@@ -5711,13 +5858,11 @@ public:
             delete generic_params;
         }
 
-        for (auto layer_params : all_layer_params)
-        {
-            delete layer_params;
-        }
+        all_layer_params.clear();
+        all_test_params.clear();
     }
 
-    static std::vector<cldnn::primitive*> generate_specific_test_params()
+    static std::vector<std::shared_ptr<cldnn::primitive>> generate_specific_test_params()
     {
         // TODO: check split
 
@@ -5730,31 +5875,28 @@ public:
         std::vector<tensor> dilation_sizes = { tensor(1, 1, 1, 1), tensor(1, 1, 5, 4), tensor(1, 1, 1, 3), tensor(1, 1, 7, 2) };
         std::vector<tensor> input_offset_sizes = { tensor(0, 0, 0, 0), tensor(0, 0, 2, 2), tensor(0, 0, -5, -2), tensor(0, 0, 3, -3) };
 
-        std::vector<bool> activations = { false, true };
-        std::vector<float> activation_slopes = { 0.f, -2.3f };
-
         // No padding
-        all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0], activations[0], activation_slopes[0]));
-        all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0]));
-        all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2], activations[1], activation_slopes[0]));
-        all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1]));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "input0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0]));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1]));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2]));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "input0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3]));
 
         // Input padding
-        all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0]));
-        all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1]));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "reorder0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1]));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3]));
 
         // Output padding
-        all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0], { { 0, 0, 2, 4 },{ 0, 0, 0, 19 } }));
-        all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2], activations[1], activation_slopes[0], { { 0, 0, 1, 0 },{ 0, 0, 13, 9 } }));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], { { 0, 0, 2, 4 },{ 0, 0, 0, 19 } }));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2], { { 0, 0, 1, 0 },{ 0, 0, 13, 9 } }));
 
         // Input + Output padding
-        all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0], activations[0], activation_slopes[0], { { 0, 0, 1, 5 },{ 0, 0, 19, 4 } }));
-        all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1], { { 0, 0, 1, 2 },{ 0, 0, 3, 4 } }));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "reorder0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0], { { 0, 0, 1, 5 },{ 0, 0, 19, 4 } }));
+        all_layer_params.emplace_back(new convolution("convolution_no_relu", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], { { 0, 0, 1, 2 },{ 0, 0, 3, 4 } }));
 
         return all_layer_params;
     }
 
-    static std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> generate_all_test_params()
+    static std::vector<std::tuple<tests::test_params*, std::shared_ptr<cldnn::primitive>>> generate_all_test_params()
     {
         generate_specific_test_params();
 
@@ -5798,7 +5940,7 @@ public:
         }
 
         // Create all the combinations for the test.
-        for (cldnn::primitive* layer_param : all_layer_params)
+        for (const auto& layer_param : all_layer_params)
         {
             for (tests::test_params* test_param : all_generic_params)
             {
@@ -5816,7 +5958,7 @@ public:
 
     virtual cldnn::tensor get_expected_output_tensor()
     {
-        const cldnn::convolution* convolution = (cldnn::convolution*)layer_params;
+        auto convolution = std::static_pointer_cast<const cldnn::convolution>(layer_params);
         tensor input_size = generic_params->input_layouts[0].size;
         tensor dilation = convolution->dilation;
         tensor stride = convolution->stride;
@@ -5877,15 +6019,13 @@ public:
     {
         // Output reference is always bfyx.
 
-        const cldnn::convolution* convolution = (cldnn::convolution*)layer_params;
+        auto convolution = std::static_pointer_cast<const cldnn::convolution>(layer_params);
 
         data_types dt = inputs[0].get_layout().data_type;
 
         tensor input_size = inputs[0].get_layout().size;
         tensor dilation = convolution->dilation;
         tensor stride = convolution->stride;
-        bool is_relu_fused = convolution->with_activation;
-        float activation_slope = convolution->activation_negative_slope;
         tensor input_offset = convolution->input_offset;
         tensor weights_size = inputs[1].get_layout().size;
         padding output_padding = convolution->output_padding;
@@ -5985,15 +6125,6 @@ public:
             }
         }
 
-        // Relu activation
-        if (is_relu_fused)
-        {
-            for (int i = 0; i < (int)output_buffer_size.count(); i++)
-            {
-                output_mem[i] = (output_mem[i] > 0.f) ? output_mem[i] : (output_mem[i] * (Type)activation_slope);
-            }
-        }
-
         return output;
     }
 
@@ -6012,13 +6143,13 @@ public:
 private:
 
     static std::vector<tests::test_params*> all_generic_params;
-    static std::vector<cldnn::primitive*> all_layer_params;
-    static std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> all_test_params;
+    static std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
+    static std::vector<std::tuple<tests::test_params*, std::shared_ptr<cldnn::primitive>>> all_test_params;
 };
 
 std::vector<tests::test_params*> convolution_test::all_generic_params = {};
-std::vector<cldnn::primitive*> convolution_test::all_layer_params = {};
-std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> convolution_test::all_test_params = {};
+std::vector<std::shared_ptr<cldnn::primitive>> convolution_test::all_layer_params = {};
+std::vector<std::tuple<tests::test_params*, std::shared_ptr<cldnn::primitive>>> convolution_test::all_test_params = {};
 
 TEST_P(convolution_test, CONVOLUTION)
 {
index 6f8b9d4..e725f71 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/convolution_grad_input.hpp"
-#include <api/CPP/data.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/convolution_grad_input.hpp"
+#include <api/data.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include "api/CPP/eltwise.hpp"
+#include "api/eltwise.hpp"
 
 using namespace cldnn;
 using namespace tests;
@@ -84,7 +84,6 @@ TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad
     }
 }
 
-
 TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_output_size) {
     //  Filter : 2x2
     //  Input  : 2x2x1x2
index 29c1ea8..a022aa0 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/convolution_grad_weights.hpp"
-#include "api/CPP/convolution.hpp"
-#include "api/CPP/convolution_grad_input.hpp"
-#include "api/CPP/reorder.hpp"
-#include <api/CPP/mutable_data.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/convolution_grad_weights.hpp"
+#include "api/convolution.hpp"
+#include "api/convolution_grad_input.hpp"
+#include "api/reorder.hpp"
+#include <api/mutable_data.hpp>
+#include <api/data.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
@@ -68,7 +68,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_p
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 3 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
     set_values(input_grad, { 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.7f, 1.8f });
@@ -144,7 +144,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in8x1x2x2_bfyx_stride2_p
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 4.f, 7.f });
     set_values(input_grad, { 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.7f, 1.8f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 1.f, 1.7f, 1.8f });
@@ -447,7 +447,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz1x1_in1x2x5x5_bfyx_stride2_p
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 5, 5 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 1, 1 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, {
         8.f, 0.5f, 1.f, 2.f,
@@ -620,7 +620,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz3x3_in2x1x3x3_bfyx_stride1_p
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 3 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, {
         0.5f, 0.6f, 0.7f, 0.9f, 1.f,  1.1f, 0.7f, 0.9f, 0.1f,
@@ -702,9 +702,9 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz3x3_in2x1x3x3_bfyx_stride1_p
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 3 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
     auto prev_weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3} });
-    auto prev_biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1} });
+    auto prev_biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1} });
 
     set_values(input, {
         0.5f, 0.6f, 0.7f, 0.9f, 1.f,  1.1f, 0.7f, 0.9f, 0.1f,
@@ -804,7 +804,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz7x7_in2x1x7x7_bfyx_stride1_p
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 7, 7 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, {
         0.5f, 0.6f, 0.7f, 0.9f, 0.2f, 0.1f, 0.7f,
@@ -950,9 +950,9 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz7x7_in2x1x7x7_bfyx_stride1_p
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 7, 7 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
     auto prev_weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
-    auto prev_biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto prev_biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, {
         0.5f, 0.6f, 0.7f, 0.9f, 0.2f, 0.1f, 0.7f,
@@ -1074,7 +1074,6 @@ TEST(convolution_grad_weights_f32_fw_gpu, ngraph_2d_1item_2iterations) {
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 3 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
 
-
     topology topology(
         input_layout("input_grad", input_grad.get_layout()),
         data("input", input),
@@ -1086,7 +1085,6 @@ TEST(convolution_grad_weights_f32_fw_gpu, ngraph_2d_1item_2iterations) {
     bo.set_option(build_option::optimize_data(true));
     network network(engine, topology, bo);
 
-
     // set values for first iteration
     set_values(input,
         { 0.671875f, 0.546875f, -0.5625f, -0.359375f, -0.09375f, 0.546875f, -0.546875f, 0.890625f, 0.828125f, -0.546875f, 1.f, -0.078125f, -0.890625f, 0.40625f, -0.359375f });
index 9bea2a3..622065d 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/crop.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/crop.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
@@ -93,6 +93,53 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) {
     }
 }
 
+TEST(crop_gpu, basic_in2x2x2x3_crop_all) {
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 2;
+    auto feature_num = 2;
+    auto x_size = 3;
+    auto y_size = 2;
+
+    auto crop_batch_num = batch_num - 1;
+    auto crop_feature_num = feature_num - 1;
+    auto crop_x_size = x_size - 1;
+    auto crop_y_size = y_size - 1;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size, y_size } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 }));
+
+    std::vector<float> input_vec;
+    for (int i = 0; i < batch_num * feature_num * y_size * x_size; i++)
+        input_vec.push_back(i);
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("crop").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    printf("Results:\n");
+    for (int b = 0; b < crop_batch_num; ++b) { //B
+        for (int f = 0; f < crop_feature_num; ++f) { //F
+            for (int y = 0; y < crop_y_size; ++y) { //Y
+                for (int x = 0; x < crop_x_size; ++x) { //X
+                    int linear_id = b + batch_num * (f + feature_num * (x + x_size * y));
+                    int output_linear_id = b + crop_batch_num * (f + crop_feature_num * (x + crop_x_size * y));
+                    EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                }
+            }
+        }
+    }
+}
+
 TEST(crop_gpu, basic_int_in2x3x2x2_crop_all) {
     //  Reference  : 1x2x2x2
     //  Input      : 2x3x4x5
@@ -662,11 +709,11 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(activation("relu", "input", activation_relu));
+    topology.add(activation("relu", "input", activation_func::relu));
     topology.add(crop("crop1", "relu", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) }));
     topology.add(crop("crop2", "relu", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_2)), { tensor(feature(feature_offset_2), spatial(0,0),batch(0)) }));
-    topology.add(activation("relu1", "crop1", activation_relu));
-    topology.add(activation("relu2", "crop2", activation_relu));
+    topology.add(activation("relu1", "crop1", activation_func::relu));
+    topology.add(activation("relu2", "crop2", activation_func::relu));
 
     std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
     std::vector<float> out1 = { 0.f, 2.f,0.f };
index f74a5f9..706f870 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/eltwise.hpp"
-#include "api/CPP/reorder.hpp"
-#include "api/CPP/custom_gpu_primitive.hpp"
-#include <api/CPP/engine.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/eltwise.hpp"
+#include "api/reorder.hpp"
+#include "api/custom_gpu_primitive.hpp"
+#include <api/engine.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
-
 namespace cldnn
 {
        template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
@@ -74,7 +73,10 @@ TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) {
             }
         )__krnl";
     std::string entry_point = "add_kernel";
-    std::vector<cldnn_arg> parameters = { {arg_input, 0}, {arg_input, 1 }, {arg_output, 0 } };
+    std::vector<custom_gpu_primitive::arg_desc> parameters = {
+        {custom_gpu_primitive::arg_input, 0},
+        {custom_gpu_primitive::arg_input, 1 },
+        {custom_gpu_primitive::arg_output, 0 } };
     layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
     std::vector<size_t> gws = { output_layout.count() };
     topology topology;
@@ -178,7 +180,7 @@ void add_basic_in2x2x2x2_with_reorder()
         "       output[idx] = input0[idx] + input1[idx];\n" +
         "   }\n";
     std::string entry_point = "add_kernel";
-    std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_input, 1 },{ arg_output, 0 } };
+    std::vector<custom_gpu_primitive::arg_desc> parameters = { { custom_gpu_primitive::arg_input, 0 },{ custom_gpu_primitive::arg_input, 1 },{ custom_gpu_primitive::arg_output, 0 } };
     layout output_layout = { DType, format::yxfb,{ 2, 2, 2, 2 } };
     std::vector<size_t> gws = { output_layout.count() };
     topology topology;
@@ -280,7 +282,7 @@ TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) {
             }
         )__krnl";
     std::string entry_point = "add_kernel";
-    std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_output, 0 } };
+    std::vector<custom_gpu_primitive::arg_desc> parameters = { { custom_gpu_primitive::arg_input, 0 },{ custom_gpu_primitive::arg_output, 0 } };
     layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
     std::vector<size_t> gws = { output_layout.count() };
     topology topology;
@@ -373,7 +375,7 @@ TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) {
             }
         )__krnl";
     std::string entry_point = "add_kernel";
-    std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_output, 0 } };
+    std::vector<custom_gpu_primitive::arg_desc> parameters = { { custom_gpu_primitive::arg_input, 0 },{ custom_gpu_primitive::arg_output, 0 } };
     layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
     std::vector<size_t> gws = { output_layout.count() };
     topology topology;
@@ -474,7 +476,7 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2
             }
         )__krnl";
     std::string entry_point = "add_kernel";
-    std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_output, 0 } };
+    std::vector<custom_gpu_primitive::arg_desc> parameters = { { custom_gpu_primitive::arg_input, 0 },{ custom_gpu_primitive::arg_output, 0 } };
     layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
     std::vector<size_t> gws = { output_layout.count() };
     topology topology;
@@ -539,7 +541,7 @@ TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) {
             }
         )__krnl";
     std::string entry_point = "add_kernel";
-    std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_input, 1 },{ arg_output, 0 } };
+    std::vector<custom_gpu_primitive::arg_desc> parameters = { { custom_gpu_primitive::arg_input, 0 },{ custom_gpu_primitive::arg_input, 1 },{ custom_gpu_primitive::arg_output, 0 } };
     layout output_layout = { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } };
     std::vector<size_t> gws = { output_layout.count() };
     topology topology;
@@ -593,4 +595,4 @@ TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) {
     {
         EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
     }
-}
\ No newline at end of file
+}
index 4aa2ad8..f0460d7 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/deconvolution.hpp"
-#include <api/CPP/data.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/deconvolution.hpp"
+#include <api/data.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 #include "test_utils/float16.h"
-#include "api/CPP/reorder.hpp"
-
+#include "api/reorder.hpp"
 
 using namespace cldnn;
 using namespace tests;
@@ -49,9 +48,9 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
     //  2
     //
     //  Output:
-    //  -14    5     2.25 
-    //   18    0.75  7.25   
-    //   23    42.5  15.5   
+    //  -14    5     2.25
+    //   18    0.75  7.25
+    //   23    42.5  15.5
 
     const auto& engine = get_test_engine();
 
@@ -93,7 +92,6 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
     }
 }
 
-
 TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
     //  Filter : 2x2
     //  Input  : 2x2
@@ -108,12 +106,12 @@ TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
     //   3.5 1.5
     //
     //  no bias
-    //  
+    //
     //
     //  Output:
-    //  -14    5     2.25 
-    //   18    0.75  7.25   
-    //   23    42.5  15.5   
+    //  -14    5     2.25
+    //   18    0.75  7.25
+    //   23    42.5  15.5
 
     const auto& engine = get_test_engine();
 
@@ -152,7 +150,6 @@ TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
     }
 }
 
-
 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) {    //  Filter : 2x2
     //  Input  : 2x2
     //  Output : 3x3
@@ -169,9 +166,9 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) {    //  Filt
     //  2
     //
     //  Output:
-    //  -14    5     2.25 
-    //   18    0.75  7.25   
-    //   23    42.5  15.5   
+    //  -14    5     2.25
+    //   18    0.75  7.25
+    //   23    42.5  15.5
 
     const auto& engine = get_test_engine();
 
@@ -231,7 +228,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1) {
     //  2
     //
     //  Output:
-    //  0.75  
+    //  0.75
 
     const auto& engine = get_test_engine();
 
@@ -282,7 +279,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad) {
     //  1
     //
     //  Output:
-    //  0.75  
+    //  0.75
 
     const auto& engine = get_test_engine();
 
@@ -471,8 +468,8 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) {
     //  1  5
     //
     //  Output:
-    //  f0: -3   4.5 
-    //  f0: 13   -17 
+    //  f0: -3   4.5
+    //  f0: 13   -17
     //  f1: 1    8.5
     //  f1: 17 - 13
 
@@ -480,7 +477,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) {
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
     set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
@@ -661,8 +658,8 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd
     //  1  5
     //
     //  Output:
-    //  f0: -3   4.5 
-    //  f0: 13   -17 
+    //  f0: -3   4.5
+    //  f0: 13   -17
     //  f1: 1    8.5
     //  f1: 17 - 13
 
@@ -670,7 +667,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
     set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
@@ -913,7 +910,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2)
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
     set_values(weights, {
@@ -955,11 +952,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_
     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
 
-    const auto& engine = get_test_engine(); 
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
-    set_values(input, 
-    { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f, 
+    set_values(input,
+    { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
@@ -1056,11 +1053,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16
 
     std::vector<primitive_id> weights_vec;
     std::vector<primitive_id> bias_vec;
-    
+
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
 
-    set_values(weights, 
+    set_values(weights,
         {
             -2.f, 2.f, 7.f, -0.5f,
             -4.f, 1.f, -9.f, -7.f,
@@ -1085,7 +1082,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16
         data("weights", weights),
         data("bias", biases)
     );
-   
+
     topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
 
     network network(engine, topology);
@@ -1142,9 +1139,9 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_
     for (uint32_t i = 0; i < 8; i++)
     {
         auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
-        auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+        auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
         auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
-        auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+        auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 
         set_values(weights, { -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f });
         set_values(biases, { 1.0f, 1.0f });
@@ -1221,10 +1218,10 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16
 
     std::vector<primitive_id> weights_vec;
     std::vector<primitive_id> bias_vec;
-    
+
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 32, 1, 2, 2 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 32, 1 } });
-    
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 32, 1, 1 } });
+
     set_values(weights,
         {
             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
@@ -1288,7 +1285,6 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16
     }
 }
 
-
 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3) {
     //  Filter : 1x1
     //  Stride : 1x1
@@ -1318,7 +1314,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_
     //  -1   2.5 2
     //
     //  Output:
-    //  -1.5  
+    //  -1.5
     //   8
     //   7.75
     //
@@ -1330,9 +1326,9 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
-    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+    auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
 
     set_values(input, {
         1.5f, 0.5f, 2.0f, -1.0f
@@ -1378,7 +1374,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 6, 2, 1, 1 } });
-    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 6, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 6, 1, 1 } });
 
     set_values(input, {
         1.5f, 0.5f, 2.0f, -1.0f
index 3462ca1..6cb0ec0 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/concatenation.hpp"
-#include "api/CPP/convolution.hpp"
-#include "api/CPP/data.hpp"
-#include "api/CPP/eltwise.hpp"
-#include "api/CPP/fully_connected.hpp"
-#include "api/CPP/pooling.hpp"
-#include "api/CPP/crop.hpp"
-#include "api/CPP/upsampling.hpp"
-#include "api/CPP/reshape.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/concatenation.hpp"
+#include "api/convolution.hpp"
+#include "api/data.hpp"
+#include "api/eltwise.hpp"
+#include "api/fully_connected.hpp"
+#include "api/pooling.hpp"
+#include "api/crop.hpp"
+#include "api/upsampling.hpp"
+#include "api/reshape.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
@@ -266,10 +266,10 @@ TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool) {
                          {1, 1, 2, 1}, /*kernel*/
                          {1, 1, 1, 1}  /*stride*/
                          ));
-    topology.add(upsampling("unpool1", "input1", 1, 0, upsampling_sample_type::nearest));
+    topology.add(upsampling("unpool1", "input1", tensor(1, 1, 2, 2), 0, upsampling_sample_type::nearest));
     topology.add(concatenation("concat1", {"pool1", "unpool1"}, cldnn::concatenation::along_x));
-    topology.add(data("weights", weights)),
-        topology.add(convolution("conv", "concat1", {"weights"}));
+    topology.add(data("weights", weights));
+    topology.add(convolution("conv", "concat1", {"weights"}));
 
     cldnn::build_options options;
     options.set_option(cldnn::build_option::optimize_data(true));
@@ -297,14 +297,14 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) {
 
     topology topology;
     topology.add(input_layout("input1", input1.get_layout()));
-    topology.add(activation("relu1", "input1", activation_relu));
-    topology.add(activation("relu2", "relu1", activation_sqrt));
+    topology.add(activation("relu1", "input1", activation_func::relu));
+    topology.add(activation("relu2", "relu1", activation_func::sqrt));
     topology.add(concatenation("depth1", {"relu2", "relu1"}, concatenation::along_f));
-    topology.add(activation("relu3", "depth1", activation_sqrt));
+    topology.add(activation("relu3", "depth1", activation_func::sqrt));
     topology.add(concatenation("depth2", {"relu3", "depth1"}, concatenation::along_f));
-    topology.add(activation("relu4", "depth2", activation_sqrt));
+    topology.add(activation("relu4", "depth2", activation_func::sqrt));
     topology.add(concatenation("depth3", {"relu4", "depth2"}, concatenation::along_f));
-    topology.add(activation("relu5", "depth3", activation_relu));
+    topology.add(activation("relu5", "depth3", activation_func::relu));
 
     cldnn::build_options options;
     options.set_option(cldnn::build_option::optimize_data(true));
@@ -356,7 +356,7 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) {
     topology.add(input_layout("input1", input1.get_layout()));
     topology.add(input_layout("input2", input2.get_layout()));
     topology.add(concatenation("depth1", {"input1", "input2"}, concatenation::along_f));
-    topology.add(activation("relu1", "depth1", activation_relu));
+    topology.add(activation("relu1", "depth1", activation_func::relu));
 
     cldnn::build_options options;
     options.set_option(cldnn::build_option::optimize_data(true));
@@ -553,7 +553,6 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) {
     }
 }
 
-
 TEST(depth_concatenate_i32_gpu, optimize_data01) {
     const auto& engine = get_test_engine();
     build_options build_opt;
@@ -893,23 +892,21 @@ public:
             delete generic_params;
         }
 
-        for (auto layer_params : all_layer_params) {
-            delete layer_params;
-        }
+        all_layer_params.clear();
     }
 
-    static std::vector<cldnn::primitive*> generate_specific_test_params(int i) {
-        std::vector<cldnn::primitive*> all_layer_params;
+    static std::vector<std::shared_ptr<cldnn::primitive>> generate_specific_test_params(int i) {
+        std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
 
         switch (i) {
             case 1:
-                all_layer_params.push_back(new concatenation("depth_concatenate", {"input0"}, concatenation::along_f));
+                all_layer_params.emplace_back(new concatenation("depth_concatenate", {"input0"}, concatenation::along_f));
                 break;
             case 2:
-                all_layer_params.push_back(new concatenation("depth_concatenate", {"input0", "input1"}, concatenation::along_f));
+                all_layer_params.emplace_back(new concatenation("depth_concatenate", {"input0", "input1"}, concatenation::along_f));
                 break;
             case 3:
-                all_layer_params.push_back(new concatenation("depth_concatenate", {"input0", "input1", "input2"}, concatenation::along_f));
+                all_layer_params.emplace_back(new concatenation("depth_concatenate", {"input0", "input1", "input2"}, concatenation::along_f));
                 break;
             default:
                 assert(0);
@@ -974,8 +971,8 @@ public:
         return all_generic_params;
     }
 
-    static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_all_test_params() {
-        std::vector<std::tuple<test_params*, cldnn::primitive*>> res;
+    static std::vector<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>> generate_all_test_params() {
+        std::vector<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>> res;
 
         for (int i = 1; i <= 3; ++i) {
             auto tpv = generate_generic_test_params(i);
@@ -1063,7 +1060,7 @@ public:
         }
     }
 
-    static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info) {
+    static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>>& info) {
         std::stringstream res;
 
         const auto& p = std::get<0>(info.param);
@@ -1089,10 +1086,10 @@ public:
 
 private:
     static std::vector<tests::test_params*> all_generic_params;
-    static std::vector<cldnn::primitive*> all_layer_params;
+    static std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
 };
 
-std::vector<cldnn::primitive*> depth_concatenate_test::all_layer_params = {};
+std::vector<std::shared_ptr<cldnn::primitive>> depth_concatenate_test::all_layer_params = {};
 std::vector<tests::test_params*> depth_concatenate_test::all_generic_params = {};
 
 TEST_P(depth_concatenate_test, DEPTHCONCATENATE) {
index 49e8dcb..cf435ea 100644 (file)
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/depth_to_space.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/depth_to_space.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include <cstddef>
 #include <tests/test_utils/test_utils.h>
index 3252224..99f115d 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/detection_output.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/detection_output.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 namespace cldnn
@@ -834,7 +834,6 @@ public:
 typedef ::testing::Types<float, FLOAT16> detection_output_test_types;
 TYPED_TEST_CASE(detection_output_test, detection_output_test_types);
 
-
 TYPED_TEST(detection_output_test, test_setup_basic)
 {
     this->setup_basic(false);
index ce36f07..3493fa4 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/eltwise.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/eltwise.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 
 namespace cldnn
@@ -109,16 +109,21 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f
     topology.add(input_layout("input1", input1.get_layout()));
     topology.add(input_layout("input2", input2.get_layout()));
     topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 })));
-    topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, relu, slope, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 }));
-
+    topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 }));
+    primitive_id out_id = "eltwise";
+    if (relu)
+    {
+        topology.add(activation("out", out_id, activation_func::relu, { slope, 0.0f }));
+        out_id = "out";
+    }
     network network(engine, topology);
     network.set_input_data("input1", input1);
     network.set_input_data("input2", input2);
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "eltwise");
+    EXPECT_EQ(outputs.begin()->first, out_id);
 
-    auto output_memory = outputs.at("eltwise").get_memory();
+    auto output_memory = outputs.at(out_id).get_memory();
     auto output_layout = output_memory.get_layout();
     auto output_ptr = output_memory.pointer<T>();
 
@@ -1120,7 +1125,6 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) {
             -2.f,  6.5f,
             -0.5f, -2.5f });
 
-
     set_values(input2, {
         1.f,
         0.f,
@@ -1190,7 +1194,6 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) {
             -2.f,  6.5f,
             -0.5f, -2.5f });
 
-
     set_values(input2, {
         1.f, 0.f,
         2.f, 0.f,
@@ -1254,7 +1257,6 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) {
             -2.f,  6.5f,
             -0.5f, -2.5f });
 
-
     set_values(input2, {
         1.f, 0.f,
 
@@ -1379,7 +1381,6 @@ TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) {
             13.f, 14.f,
             15.f, 16.f });
 
-
     set_values(input2, { 2.0f });
 
     network network(engine, topology);
@@ -1439,7 +1440,6 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) {
             -2.f,  6.5f,
             -0.5f, -2.5f });
 
-
     set_values(input2, {
         1.f, 0.f,
 
@@ -1518,7 +1518,6 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) {
             -2.f,  6.5f,
             -0.5f, -2.5f });
 
-
     set_values(input2, {
         1.f, 0.f,
 
@@ -1779,7 +1778,6 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) {
                     expected = std::fmod(input_1_vec[i], input_2_vec[i]);
                 }
 
-
                 EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
             }
         }
@@ -1923,7 +1921,6 @@ TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) {
     //  f1: b0:   119  80    b1:  96   -18.75
     //
 
-
     const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
@@ -2323,7 +2320,6 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
     }
 }
 
-
 TEST(eltwise_gpu_f32, stride_test_2x2) {
     //  Input  : 2x2x2x2
     //  Input2 : 2x2x4x4
@@ -2496,7 +2492,6 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic)
     //F2
     //    1221 1222  2221 2222
 
-
     tensor input_tensor(2, 2, 2, 2);
     auto fp16_bfyx_2x2x2x2_input =
     {
@@ -2721,7 +2716,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding)
     topology golden_topology;
     golden_topology.add(input_layout("input1", input1.get_layout()));
     golden_topology.add(input_layout("input2", input2.get_layout()));
-    golden_topology.add(eltwise("eltwise", "input1", "input2", eltwise_mode::sum,false,0.0f, padding{ {0,0,5,10} , 0 }));
+    golden_topology.add(eltwise("eltwise", "input1", "input2", eltwise_mode::sum, padding{ {0,0,5,10} , 0 }));
 
     network golden_network(engine, golden_topology);
     golden_network.set_input_data("input1", input1);
@@ -2737,7 +2732,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding)
     FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input2", input2.get_layout()));
     FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor)));
     FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::byxf, input_tensor)));
-    FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum,false,0.0f, padding{ {0,0,5,10} , 0 }));
+    FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum, padding{ {0,0,5,10} , 0 }));
     FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorderOutput", "eltwise", layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,5,10} , 0 })));
 
     network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology);
@@ -2754,7 +2749,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding)
     BYXF_OUTPUT_topology.add(input_layout("input2", input2.get_layout()));
     BYXF_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::byxf, input_tensor)));
     BYXF_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor)));
-    BYXF_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum, false, 0.0f, padding{ {0,0,5,10} , 0 }));
+    BYXF_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum, padding{ {0,0,5,10} , 0 }));
     BYXF_OUTPUT_topology.add(reorder("reorderOutput", "eltwise", layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,5,10} , 0 })));
 
     network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology);
@@ -2937,7 +2932,7 @@ void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int in
     topology.add(input_layout("input1", input1.get_layout()));
     topology.add(input_layout("input2", input2.get_layout()));
     topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 })));
-    topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, false, 0.f, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 }));
+    topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 }));
 
     network network(engine, topology);
     network.set_input_data("input1", input1);
@@ -3025,7 +3020,6 @@ TEST(eltwise_gpu_bool, eltwise_or) {
     run_eltwise_bool_generic_test(cldnn::eltwise_mode::logic_or);
 }
 
-
 void run_eltwise_generic_test(cldnn::eltwise_mode mode)
 {
     cldnn::format test_inputs_fmt = cldnn::format::bfyx;
@@ -3058,7 +3052,6 @@ TEST(eltwise_gpu, eltwise_mod) {
     run_eltwise_generic_test(cldnn::eltwise_mode::mod);
 }
 
-
 TEST(eltwise_gpu, b_fs_yx_fsv4_w_callib) {
     int B_array[] = {   1,   4,   16,   32, 0 };  // Batch
     int F_array[] = { 256, 512, 1024, 2048, 0 };  // Features
@@ -3114,15 +3107,15 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_w_callib) {
         {
             topology topology;
 
-            auto eltw = eltwise("eltw_GOLD",
+            auto eltw = eltwise("eltw_GOLD_no_relu",
                                 "input1", "input2",
                                 "callib",
-                                eltwise_mode::sum, true);
-
+                                eltwise_mode::sum);
+            auto actv = activation("eltw_GOLD", eltw, activation_func::relu);
             // Create a topology
             topology.add(input_layout("input1", input1.get_layout()),
                          input_layout("input2", input2.get_layout()),
-                         eltw);
+                         eltw, actv);
 
             topology.add(data("callib", callib));
 
@@ -3158,14 +3151,15 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_w_callib) {
                                          format::b_fs_yx_fsv4,
                                          { in_B, in_F, in_X, in_Y })));
 
-            auto eltw = eltwise("eltw_IMAD",
+            auto eltw = eltwise("eltw_IMAD_no_relu",
                                 "reorder1_Swizzelled", "reorder2_Swizzelled",
                                 "callib",
-                                eltwise_mode::sum, true);
+                                eltwise_mode::sum);
+            auto actv = activation("eltw_IMAD", eltw, activation_func::relu);
 
             topology.add(input_layout("input1", input1.get_layout()),
                          input_layout("input2", input2.get_layout()),
-                         eltw);
+                         eltw, actv);
 
             topology.add(data("callib", callib));
 
@@ -3257,15 +3251,16 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) {
         {
             topology topology;
 
-            auto eltw = eltwise("eltw_GOLD",
+            auto eltw = eltwise("eltw_GOLD_no_relu",
                                 { "input1", "input2", "input3" },
-                                mode[i], true);
+                                mode[i]);
+            auto actv = activation("eltw_GOLD", eltw, activation_func::relu);
 
             // Create a topology
             topology.add(input_layout("input1", input1.get_layout()),
                          input_layout("input2", input2.get_layout()),
                          input_layout("input3", input3.get_layout()),
-                         eltw);
+                         eltw, actv);
 
             // Network processing
             network network(engine, topology);
@@ -3305,16 +3300,16 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) {
                                         format::b_fs_yx_fsv4,
                                         { in_B, in_F, in_X, in_Y })));
 
-            auto eltw = eltwise("eltw_IMAD",
+            auto eltw = eltwise("eltw_IMAD_no_relu",
                                 { "reorder1_Swizzelled",
                                   "reorder2_Swizzelled",
                                   "reorder3_Swizzelled" },
-                                mode[i], true);
-
+                                mode[i]);
+            auto actv = activation("eltw_IMAD", eltw, activation_func::relu);
             topology.add(input_layout("input1", input1.get_layout()),
                          input_layout("input2", input2.get_layout()),
                          input_layout("input3", input3.get_layout()),
-                         eltw);
+                         eltw, actv);
 
             // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
             topology.add(reorder("reorder_UnSwizzelled",
index 1ed4515..fb4e0aa 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/embed.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/tensor.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/data.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/embed.hpp"
+#include <api/topology.hpp>
+#include <api/tensor.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 
-
 #include <cmath>
 
 using namespace cldnn;
 using namespace tests;
 
-
 TEST(embed_gpu, seq3num4) {
     //  Input  : 1x1x1x3
     //  Weights: 4x1x3x1
index d5df255..5ed3129 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/fully_connected.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/tensor.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/fully_connected.hpp"
+#include <api/topology.hpp>
+#include <api/tensor.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 #include "instrumentation.h"
 
 #include <cmath>
@@ -94,21 +94,26 @@ void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format te
     set_values(weights, weights_rnd_vec);
     set_values(bias, bias_rnd_vec);
 
+    primitive_id out_id = "fully_connected";
     topology topology(
         input_layout("input", input.get_layout()),
         data("weights", weights),
         data("bias", bias),
-        fully_connected("fully_connected", "input", "weights", "bias", relu, slope)
+        fully_connected(out_id, "input", "weights", "bias")
     );
-
+    if (relu)
+    {
+        topology.add(activation("out", out_id, activation_func::relu, { slope, 0.0f }));
+        out_id = "out";
+    }
     network network(engine, topology);
     network.set_input_data("input", input);
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "fully_connected");
+    EXPECT_EQ(outputs.begin()->first, out_id);
 
-    auto output_memory = outputs.at("fully_connected").get_memory();
+    auto output_memory = outputs.at(out_id).get_memory();
     auto output_layout = output_memory.get_layout();
     auto output_ptr = output_memory.pointer<T>();
 
@@ -236,7 +241,6 @@ TEST(fully_connected_gpu, no_biases) {
     EXPECT_EQ(3.0f, output_ptr[3]);
 }
 
-
 TEST(fully_connected_gpu, no_biases_int8) {
     //  Input  : 3x1
     //  Output : 4x1
@@ -297,7 +301,6 @@ TEST(fully_connected_gpu, no_biases_int8) {
     EXPECT_EQ(-52.0f, output_ptr[3]);
 }
 
-
 TEST(fully_connected_gpu, xb_f32_batch_1) {
     //  Input  : 3x1
     //  Output : 4x1
@@ -479,7 +482,6 @@ TEST(fully_connected_gpu, x_f32) {
     EXPECT_EQ(7.00f, output_ptr[3]);
 }
 
-
 TEST(fully_connected_gpu, yxfn_f32) {
     //  Input  : 1x2x1x2 - 1 batch 2 feature maps of size 2x1
     //  Output : 2x1 - 2 batches 1 neuron each
@@ -574,7 +576,8 @@ TEST(fully_connected_gpu, xb_f32_batch_1_relu) {
         input_layout("input", input_prim.get_layout()),
         data("weights", weights_prim),
         data("bias", bias_prim),
-        fully_connected("full_con_prim", "input", "weights", "bias", true, 0)
+        fully_connected("full_con_prim", "input", "weights", "bias"),
+        activation("out", "full_con_prim", activation_func::relu)
     );
 
     network network(engine, topology);
@@ -582,7 +585,7 @@ TEST(fully_connected_gpu, xb_f32_batch_1_relu) {
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "full_con_prim");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
     auto output_prim = outputs.begin()->second.get_memory();
 
@@ -635,7 +638,8 @@ TEST(fully_connected_gpu, xb_f32_batch_2_relu) {
         input_layout("input", input_prim.get_layout()),
         data("weights", weights_prim),
         data("bias", bias_prim),
-        fully_connected("full_con_prim", "input", "weights", "bias", true, 0)
+        fully_connected("full_con_prim", "input", "weights", "bias"),
+        activation("out", "full_con_prim", activation_func::relu)
     );
 
     network network(engine, topology);
@@ -643,7 +647,7 @@ TEST(fully_connected_gpu, xb_f32_batch_2_relu) {
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "full_con_prim");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
     auto output_prim = outputs.begin()->second.get_memory();
 
@@ -697,7 +701,8 @@ TEST(fully_connected_gpu, x_f32_relu) {
         input_layout("input", input_prim.get_layout()),
         data("weights", weights_prim),
         data("bias", bias_prim),
-        fully_connected("full_con_prim", "input", "weights", "bias", true, 0)
+        fully_connected("full_con_prim", "input", "weights", "bias"),
+        activation("out", "full_con_prim", activation_func::relu)
     );
 
     network network(engine, topology);
@@ -705,7 +710,7 @@ TEST(fully_connected_gpu, x_f32_relu) {
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "full_con_prim");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
     auto output_prim = outputs.begin()->second.get_memory();
 
@@ -756,7 +761,8 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) {
         input_layout("input", input_prim.get_layout()),
         data("weights", weights_prim),
         data("bias", bias_prim),
-        fully_connected("full_con_prim", "input", "weights", "bias", true, 0.1f)
+        fully_connected("full_con_prim", "input", "weights", "bias"),
+        activation("out", "full_con_prim", activation_func::relu_negative_slope, { 0.1f })
     );
 
     network network(engine, topology);
@@ -764,7 +770,7 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) {
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "full_con_prim");
+    EXPECT_EQ(outputs.begin()->first, "out");
 
     auto output_prim = outputs.begin()->second.get_memory();
 
@@ -899,7 +905,7 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4)
     }
 }
 
-TEST(fully_connected_gpu, fs_byx_fsv32_b12)
+TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12)
 {
     const auto& engine = get_test_engine();
 
@@ -942,7 +948,8 @@ TEST(fully_connected_gpu, fs_byx_fsv32_b12)
         data("weights", weights_prim),
         data("bias", bias_prim),
         reorder("input_fsv", "input", {data_types::f16, format::fs_b_yx_fsv32, { batch_num, input_f, input_y, input_x } }),
-        fully_connected("fc", "input_fsv", "weights", "bias", true)
+        fully_connected("fc", "input_fsv", "weights", "bias"),
+        activation("out", "fc", activation_func::relu)
     );
 
     // Set data optimization to allow weights reordering to optimal format
@@ -954,7 +961,7 @@ TEST(fully_connected_gpu, fs_byx_fsv32_b12)
 
     auto outputs = network.execute();
 
-    auto output_prim = outputs.at("fc").get_memory();
+    auto output_prim = outputs.at("out").get_memory();
     auto output_ptr = output_prim.pointer<FLOAT16>();
 
     for (size_t bi = 0; bi < batch_num; ++bi)
@@ -974,7 +981,7 @@ TEST(fully_connected_gpu, fs_byx_fsv32_b12)
     }
 }
 
-TEST(fully_connected_gpu, fs_byx_fsv32_b34)
+TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34)
 {
     const auto& engine = get_test_engine();
 
@@ -1017,7 +1024,8 @@ TEST(fully_connected_gpu, fs_byx_fsv32_b34)
         data("weights", weights_prim),
         data("bias", bias_prim),
         reorder("input_fsv", "input", { data_types::f16, format::fs_b_yx_fsv32, { batch_num, input_f, input_y, input_x } }),
-        fully_connected("fc", "input_fsv", "weights", "bias", true)
+        fully_connected("fc", "input_fsv", "weights", "bias"),
+        activation("out", "fc", activation_func::relu)
     );
 
     // Set data optimization to allow weights reordering to optimal format
@@ -1029,7 +1037,7 @@ TEST(fully_connected_gpu, fs_byx_fsv32_b34)
 
     auto outputs = network.execute();
 
-    auto output_prim = outputs.at("fc").get_memory();
+    auto output_prim = outputs.at("out").get_memory();
     auto output_ptr = output_prim.pointer<FLOAT16>();
 
     for (size_t bi = 0; bi < batch_num; ++bi)
index 2737576..89e0f29 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/fully_connected_grad_input.hpp"
-#include <api/CPP/data.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/fully_connected_grad_input.hpp"
+#include <api/data.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
@@ -50,7 +50,6 @@ TEST(fully_connected_grad_input_gpu, basic_bfyx) {
     //  Output:
     //  -1.125  5.625   10.125
 
-
     const auto& engine = get_test_engine();
 
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
index b470bda..49c7610 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/fully_connected_grad_weights.hpp"
-#include "api/CPP/fully_connected.hpp"
-#include "api/CPP/fully_connected_grad_input.hpp"
-#include "api/CPP/reorder.hpp"
-#include <api/CPP/mutable_data.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/fully_connected_grad_weights.hpp"
+#include "api/fully_connected.hpp"
+#include "api/fully_connected_grad_input.hpp"
+#include "api/reorder.hpp"
+#include <api/mutable_data.hpp>
+#include <api/data.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
index 7ba200f..a2603af 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/convolution.hpp"
-#include "api/CPP/eltwise.hpp"
-#include "api/CPP/reorder.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/convolution.hpp"
+#include "api/eltwise.hpp"
+#include "api/reorder.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 
-#include <api_extension/CPP/fused_conv_eltwise.hpp>
+#include <api_extension/fused_conv_eltwise.hpp>
 
 #include <cassert>
 #include <cmath>
@@ -76,7 +76,6 @@ TEST(fused_conv_eltwise, basic_0)
     EXPECT_EQ(out_layout.size.spatial[1], 5);
 }
 
-
 TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
 {
     const auto& engine = get_test_engine();
@@ -173,7 +172,7 @@ protected:
 
         auto input_shape = tensor(1, n_features, 4, 1);
         auto weights_shape = tensor(n_features, n_features, 3, 1);
-        auto biases_shape = tensor(1, 1, n_features, 1);
+        auto biases_shape = tensor(1, n_features, 1, 1);
         auto sum_input_shape = tensor(1, n_features, 2, 1);
 
         auto input = memory::allocate(
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
new file mode 100644 (file)
index 0000000..2c3e76f
--- /dev/null
@@ -0,0 +1,440 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+#include "api/memory.hpp"
+#include "api/input_layout.hpp"
+#include "api/convolution.hpp"
+#include "api/quantize.hpp"
+#include "api/topology.hpp"
+#include "api/tensor.hpp"
+#include "api/network.hpp"
+#include "api/eltwise.hpp"
+#include "api/fully_connected.hpp"
+#include "api/binary_convolution.hpp"
+#include "api/engine.hpp"
+#include "api/data.hpp"
+
+#include "test_utils/test_utils.h"
+
+#include <cmath>
+
+using namespace cldnn;
+using namespace tests;
+
+struct bc_test_params {
+    tensor in_shape;
+    tensor out_shape;
+    tensor kernel;
+    tensor stride;
+    tensor pad;
+    tensor dilation;
+    uint32_t groups;
+    data_types data_type;
+    format input_format;
+    data_types weights_type;
+    format weights_format;
+    data_types default_type;
+    format default_format;
+    size_t expected_fused_primitives;
+    size_t expected_not_fused_primitives;
+};
+
+class BaseFusingTest : public ::testing::TestWithParam<bc_test_params> {
+public:
+    cldnn::engine engine;
+    cldnn::topology topology;
+    cldnn::build_options bo_fused;
+    cldnn::build_options bo_not_fused;
+
+    float tolerance = 0.0f;
+
+    static const int min_random = -200;
+    static const int max_random = 200;
+
+    void SetUp() override {
+        bo_fused.set_option(build_option::optimize_data(true));
+        bo_not_fused.set_option(build_option::optimize_data(false));
+    }
+
+    void execute(bc_test_params& p) {
+        auto input_prim = get_mem(get_input_layout(p));
+        network network_not_fused(this->engine, this->topology, bo_not_fused);
+        network network_fused(this->engine, this->topology, bo_fused);
+        network_fused.set_input_data("input", input_prim);
+        network_not_fused.set_input_data("input", input_prim);
+
+        compare(network_not_fused, network_fused, p);
+    }
+
+    void compare(const network& not_fused, const network& fused, bc_test_params& p) {
+        auto outputs_ref = not_fused.execute();
+        auto outputs_fused = fused.execute();
+
+        ASSERT_EQ(fused.get_executed_primitives().size(), p.expected_fused_primitives);
+        ASSERT_EQ(not_fused.get_executed_primitives().size(), p.expected_not_fused_primitives);
+        ASSERT_EQ(outputs_ref.size(), outputs_fused.size());
+        ASSERT_EQ(outputs_ref.size(), size_t(1));
+
+        auto output_not_fused_prim = outputs_ref.begin()->second.get_memory();
+        auto output_fused_prim = outputs_fused.begin()->second.get_memory();
+        if (output_not_fused_prim.get_layout().data_type == data_types::f32) {
+            auto ref = output_not_fused_prim.pointer<float>();
+            auto output_ptr = output_fused_prim.pointer<float>();
+            for (size_t i = 0; i < output_fused_prim.get_layout().count(); i++) {
+                ASSERT_NEAR(ref[i], output_ptr[i], tolerance) << "i = " << i;
+            }
+        } else {
+            auto ref = output_not_fused_prim.pointer<int16_t>();
+            auto output_ptr = output_fused_prim.pointer<int16_t>();
+            for (size_t i = 0; i < output_fused_prim.get_layout().count(); i++) {
+                ASSERT_NEAR(float16_to_float32(ref[i]), float16_to_float32(output_ptr[i]), tolerance) << "i = " << i;
+            }
+        }
+    }
+
+    cldnn::memory get_mem(cldnn::layout l) {
+        auto prim = memory::allocate(engine, l);
+        tensor s = l.size;
+        if (l.data_type == data_types::bin) {
+            VF<int32_t> rnd_vec = generate_random_1d<int32_t>(s.count()/32, min_random, max_random);
+            set_values(prim, rnd_vec);
+        } else {
+            VVVVF<float> rnd = generate_random_4d<float>(s.batch[0], s.feature[0], s.spatial[1], s.spatial[0],
+                                                         min_random, max_random);
+            VF<float> rnd_vec = flatten_4d<float>(format::bfyx, rnd);
+            set_values(prim, rnd_vec);
+        }
+
+        return prim;
+    }
+
+    cldnn::memory get_mem(cldnn::layout l, float fill_value) {
+        auto prim = memory::allocate(engine, l);
+        tensor s = l.size;
+        if (l.data_type == data_types::bin) {
+            VF<int32_t> rnd_vec(s.count()/32, static_cast<int32_t>(fill_value));
+            set_values(prim, rnd_vec);
+        } else {
+            VF<float> rnd_vec(s.count(), fill_value);
+            set_values(prim, rnd_vec);
+        }
+
+        return prim;
+    }
+
+    cldnn::memory get_mem(cldnn::layout l, int min, int max) {
+        auto prim = memory::allocate(engine, l);
+        tensor s = l.size;
+        if (l.data_type == data_types::f32) {
+            VF<float> rnd_vec = generate_random_1d<float>(s.count(), min, max);
+            set_values(prim, rnd_vec);
+        } else if (l.data_type == data_types::i8) {
+            VF<int8_t> rnd_vec = generate_random_1d<int8_t>(s.count(), min, max);
+            set_values(prim, rnd_vec);
+        } else if (l.data_type == data_types::bin) {
+            VF<int32_t> rnd_vec = generate_random_1d<int32_t>(s.count()/32, min, max);
+            set_values(prim, rnd_vec);
+        }
+
+        return prim;
+    }
+
+    layout get_input_layout(bc_test_params& p) {
+        auto pad = p.pad.negate();
+        std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
+        return layout{p.data_type, p.input_format, p.in_shape, padding{pad_}};
+    }
+
+    layout get_output_layout(bc_test_params& p) {
+        return layout{p.data_type, p.input_format, p.out_shape};
+    }
+
+    layout get_weights_layout(bc_test_params& p) {
+        return layout{p.weights_type, p.weights_format, tensor{p.out_shape.feature[0],
+                                                               static_cast<int32_t>(p.in_shape.feature[0] / p.groups),
+                                                               p.kernel.spatial[0], p.kernel.spatial[1]}};
+    }
+
+    layout get_bias_layout(bc_test_params& p) {
+        return layout{p.data_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1}};
+    }
+
+    layout get_per_channel_layout(bc_test_params& p) {
+        return layout{p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1}};
+    }
+    layout get_single_element_layout(bc_test_params& p) {
+        return layout{p.default_type, p.default_format, tensor{1, 1, 1, 1}};
+    }
+};
+
+#define CASE_CONV1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx, data_types::f32, format::bfyx
+#define CASE_CONV2 {1, 16, 4, 5}, {1, 32, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx_f16, data_types::f32, format::o_i_yx_i16_o16, data_types::f32, format::bfyx
+#define CASE_CONV3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx_f16, data_types::f32, format::o_i_yx_i16_o16, data_types::f32, format::bfyx
+#define CASE_CONV4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f32, format::bfyx_f16, data_types::f32,  format::oiyx_o16, data_types::f32, format::bfyx
+
+#define CASE_BIN_CONV1 {1, 16, 4, 5}, {1, 16, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 1, data_types::bin, format::b_fs_yx_32fp, data_types::bin, format::os_is_yx_osv32_isv32p, data_types::f32, format::bfyx
+#define CASE_BIN_CONV2 {1, 16, 4, 5}, {1, 30, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::bin, format::b_fs_yx_32fp, data_types::bin, format::os_is_yx_osv32_isv32p, data_types::f32, format::bfyx
+#define CASE_BIN_CONV3 {1, 184, 12, 21}, {1, 224, 12, 21}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::bin, format::b_fs_yx_32fp, data_types::bin, format::os_is_yx_osv32_isv32p, data_types::f32, format::bfyx
+
+class conv_activation : public BaseFusingTest {};
+TEST_P(conv_activation, basic) {
+    auto p = GetParam();
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                 activation("activation", "conv_prim", activation_func::abs),
+                 reorder("reorder_bfyx", "activation", p.default_format, data_types::f32)
+    );
+
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_activation, ::testing::ValuesIn(std::vector<bc_test_params>{
+                                                                           bc_test_params{CASE_CONV1, 3, 4},
+                                                                           bc_test_params{CASE_CONV2, 3, 4},
+                                                                           bc_test_params{CASE_CONV3, 3, 4},
+                                                                           bc_test_params{CASE_CONV4, 3, 4},
+}), );
+
+
+class conv_fp32_scale : public BaseFusingTest {};
+TEST_P(conv_fp32_scale, basic) {
+    auto p = GetParam();
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
+                 convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                 scale("scale", "conv_prim", "scale_data"),
+                 reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV1, 4, 4},  // doesn't support this fusing for now
+                                             bc_test_params{CASE_CONV2, 3, 4},
+                                             bc_test_params{CASE_CONV3, 3, 4},
+                                             bc_test_params{CASE_CONV4, 3, 4},
+                                             }), );
+
+class conv_fp32_prelu_eltwise : public BaseFusingTest {};
+TEST_P(conv_fp32_prelu_eltwise, basic) {
+    auto p = GetParam();
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 data("slope_data", get_mem(get_per_channel_layout(p))),
+                 data("eltwise_data", get_mem(get_output_layout(p))),
+                 convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                 activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+                 eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+                 reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_prelu_eltwise,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV1, 5, 5},  // doesn't support this fusing for now
+                                             bc_test_params{CASE_CONV2, 3, 5},
+                                             bc_test_params{CASE_CONV3, 3, 5},
+                                             bc_test_params{CASE_CONV4, 3, 5},
+                                             }), );
+
+class conv_bin_activation : public BaseFusingTest {};
+TEST_P(conv_bin_activation, basic) {
+    auto p = GetParam();
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 activation("activation", "bin_conv_prim", activation_func::relu),
+                 reorder("reorder_bfyx", "activation", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_bin_activation,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{bc_test_params{CASE_BIN_CONV1, 3, 4},
+                                            }), );
+
+class conv_bin_scale_activation : public BaseFusingTest {};
+TEST_P(conv_bin_scale_activation, basic) {
+    auto p = GetParam();
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 scale("scale", "bin_conv_prim", "scale_data"),
+                 activation("activation", "scale", activation_func::relu),
+                 reorder("reorder_bfyx", "activation", p.default_format, data_types::f32)
+    );
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_bin_scale_activation,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                            bc_test_params{CASE_BIN_CONV1, 3, 5},
+                            bc_test_params{CASE_BIN_CONV2, 3, 5},
+                                            }), );
+
+class conv_bin_quantize_bin : public BaseFusingTest {};
+TEST_P(conv_bin_quantize_bin, channel_wise_quantize) {
+    auto p = GetParam();
+    auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random);
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 data("in_lo", in_thresh),
+                 data("in_hi", in_thresh),
+                 data("out_lo", get_mem(get_per_channel_layout(p), -1)),
+                 data("out_hi", get_mem(get_per_channel_layout(p),  1)),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 quantize("quantize_data", "bin_conv_prim", "in_lo", "in_hi", "out_lo", "out_hi", 2),
+                 reorder("reorder_bfyx", "quantize_data", p.default_format, data_types::f32)
+    );
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+TEST_P(conv_bin_quantize_bin, blob_wise_quantize) {
+    auto p = GetParam();
+    auto in_thresh = get_mem(get_single_element_layout(p), min_random, max_random);
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 data("in_lo", in_thresh),
+                 data("in_hi", in_thresh),
+                 data("out_lo", get_mem(get_single_element_layout(p), -1)),
+                 data("out_hi", get_mem(get_single_element_layout(p), 1)),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 quantize("quantize_data", "bin_conv_prim", "in_lo", "in_hi", "out_lo", "out_hi", 2),
+                 reorder("reorder_bfyx", "quantize_data", p.default_format, data_types::f32)
+    );
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_bin_quantize_bin,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                            bc_test_params{CASE_BIN_CONV1, 3, 4},
+                            bc_test_params{CASE_BIN_CONV2, 3, 4},
+                                            }), );
+
+class conv_bin_scale_conv_dw : public BaseFusingTest {};
+TEST_P(conv_bin_scale_conv_dw, dw_kernel_3x3_stride2) {
+    auto p = GetParam();
+    auto dw_weights_layout = layout{p.default_type, p.default_format, tensor{p.out_shape.feature[0],
+                                                                             1, 3, 3}};
+
+    auto dw_stride = tensor{1, 1, 2, 2};
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
+                 data("scale_data", get_mem(get_per_channel_layout(p), 1e-1)),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 scale("scale", "bin_conv_prim", "scale_data"),
+                 convolution("conv_dw", "scale", {"weights_dw"}, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
+                 reorder("reorder_bfyx", "conv_dw", p.default_format, data_types::f32)
+    );
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+TEST_P(conv_bin_scale_conv_dw, dw_kernel_3x3_stride1) {
+    auto p = GetParam();
+    auto dw_weights_layout = layout{p.default_type, p.default_format, tensor{p.out_shape.feature[0],
+                                                                             1, 3, 3}};
+
+    auto dw_stride = tensor{1, 1, 1, 1};
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
+                 data("scale_data", get_mem(get_per_channel_layout(p), 1e-1)),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 scale("scale", "bin_conv_prim", "scale_data"),
+                 convolution("conv_dw", "scale", {"weights_dw"}, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
+                 reorder("reorder_bfyx", "conv_dw", p.default_format, data_types::f32)
+    );
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_bin_scale_conv_dw,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                            bc_test_params{CASE_BIN_CONV2, 4, 5},
+                            bc_test_params{CASE_BIN_CONV3, 4, 5},
+                                            }), );
+
+class conv_bin_scale_conv_dw_prelu : public BaseFusingTest {};
+TEST_P(conv_bin_scale_conv_dw_prelu, dw_kernel_3x3_stride2) {
+    auto p = GetParam();
+    auto dw_weights_layout = layout{p.default_type, p.default_format, tensor{p.out_shape.feature[0],
+                                                                             1, 3, 3}};
+
+    auto dw_stride = tensor{1, 1, 2, 2};
+    auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random);
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
+                 data("scale_data", get_mem(get_per_channel_layout(p), 1e-1)),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 scale("scale", "bin_conv_prim", "scale_data"),
+                 convolution("conv_dw", "scale", {"weights_dw"}, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
+                 data("slope_data", get_mem(get_per_channel_layout(p))),
+                 activation("activation", "conv_dw", "slope_data", activation_func::relu_negative_slope),
+                 reorder("reorder_bfyx", "activation", p.default_format, data_types::f32)
+    );
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+TEST_P(conv_bin_scale_conv_dw_prelu, dw_kernel_3x3_stride1) {
+    auto p = GetParam();
+    auto dw_weights_layout = layout{p.default_type, p.default_format, tensor{p.out_shape.feature[0],
+                                                                             1, 3, 3}};
+
+    auto dw_stride = tensor{1, 1, 1, 1};
+    auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random);
+    topology.add(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p), -127, 127)),
+                 data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
+                 data("scale_data", get_mem(get_per_channel_layout(p), 1e-1)),
+                 binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
+                 scale("scale", "bin_conv_prim", "scale_data"),
+                 convolution("conv_dw", "scale", {"weights_dw"}, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
+                 data("slope_data", get_mem(get_per_channel_layout(p))),
+                 activation("activation", "conv_dw", "slope_data", activation_func::relu_negative_slope),
+                 reorder("reorder_bfyx", "activation", p.default_format, data_types::f32)
+    );
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_bin_scale_conv_dw_prelu,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                            bc_test_params{CASE_BIN_CONV2, 4, 6},
+                            bc_test_params{CASE_BIN_CONV3, 4, 6},
+                                            }), );
index 76614e9..8245763 100644 (file)
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/gather.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/gather.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include <cstddef>
 #include <tests/test_utils/test_utils.h>
index 8c92c97..124cc72 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/gemm.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/gemm.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include "test_utils/test_utils.h"
 #include "test_utils/uniform_quantized_real_distribution.hpp"
 
 #include <cstddef>
 
-
 using namespace cldnn;
 using namespace ::tests;
 
@@ -76,7 +75,6 @@ TEST(gemm_gpu, basic_bfyx_t1) {
     auto output = outputs.at("output").get_memory();
     auto output_ptr = output.pointer<float>();
 
-
     EXPECT_EQ(output_ptr.size(), (uint32_t)3);
     for (uint32_t i = 0; i < out_data.size(); ++i) {
         EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
@@ -123,7 +121,6 @@ TEST(gemm_gpu, basic_bfyx_t2) {
     auto output = outputs.at("output").get_memory();
     auto output_ptr = output.pointer<float>();
 
-
     EXPECT_EQ(output_ptr.size(), (uint32_t)3);
     for (uint32_t i = 0; i < out_data.size(); ++i) {
         EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
@@ -180,7 +177,6 @@ TEST(gemm_gpu, basic_bfyx_t1t2) {
     auto output = outputs.at("output").get_memory();
     auto output_ptr = output.pointer<float>();
 
-
     EXPECT_EQ(output_ptr.size(), (uint32_t)6);
     for (uint32_t i = 0; i < out_data.size(); ++i) {
         EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
@@ -195,7 +191,6 @@ TEST(gemm_gpu, basic_input3) {
     float alpha = 2.f;
     float beta = 10.f;
 
-
     std::vector<float> input_data = { 
         1.0f, 2.0f, 3.0f,
         1.0f, 0.0f, 1.0f
@@ -259,7 +254,6 @@ TEST(gemm_gpu, basic_input3_t1t2) {
     float alpha = 2.f;
     float beta = 3.f;
 
-
     std::vector<float> input_data = {
         1.0f, 2.0f, 3.0f, 4.0f,
         1.0f, 0.0f, 1.0f, 0.0f,
@@ -396,7 +390,6 @@ TEST(gemm_gpu, basic_input3_t2) {
     float alpha = 2.f;
     float beta = 3.f;
 
-
     std::vector<float> input_data = {
         1.0f, 1.0f, 0.0f,
         2.0f, 0.0f, 0.0f,
@@ -404,7 +397,6 @@ TEST(gemm_gpu, basic_input3_t2) {
         4.0f, 0.0f, 0.0f
     };
 
-
     std::vector<float> input_data2 = {
         3.0f, 3.0f, 1.0f,
         2.0f, 1.0f, 2.0f,
@@ -466,7 +458,6 @@ TEST(gemm_gpu, basic_input3_t1) {
     float alpha = 2.f;
     float beta = 3.f;
 
-
     std::vector<float> input_data = {
         1.0f, 2.0f, 3.0f, 4.0f,
         1.0f, 0.0f, 1.0f, 0.0f,
@@ -3181,14 +3172,11 @@ TEST(gemm_gpu, basic3_bfyx) {
     auto output = outputs.at("output").get_memory();
     auto output_ptr = output.pointer<float>();
 
-
     EXPECT_EQ(output_ptr.size(), (uint32_t)45);
     for (uint32_t  i = 0; i < out_data.size(); ++i) {
         EXPECT_NEAR(output_ptr[i], out_data[i], 0.0001);
     }
 
-
-
 }
 
 TEST(gemm_gpu, basic_smarcink2) {
@@ -3243,7 +3231,6 @@ TEST(gemm_gpu, basic_smarcink2) {
     auto output = outputs.at("output").get_memory();
     auto output_ptr = output.pointer<float>();
 
-
     EXPECT_EQ(output_ptr.size(), (uint32_t)8);
     for (uint32_t i = 0; i < out_data.size(); ++i) {         
         EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
index 218cac0..33331b1 100644 (file)
 
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/index_select.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/index_select.hpp>
+#include <api/memory.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include "test_utils/test_utils.h"
 
@@ -133,7 +133,6 @@ std::vector<float> generate_reference_bfyx(const std::vector<float>& input, cons
     }
 }
 
-
 std::vector<float> generate_reference_yxfb(const std::vector<float>& input, const std::vector<int32_t>& indices, index_select_axis_name axis, const cldnn::layout& input_lay)
 {
     auto memory_desc_inp = generic_test::get_linear_memory_desc(input_lay);
@@ -215,7 +214,6 @@ std::vector<float> generate_reference_yxfb(const std::vector<float>& input, cons
     }
 }
 
-
 TEST(index_select_gpu, basic_along_b_3_executes_bfyx)
 {
     /*
@@ -839,8 +837,6 @@ TEST(index_select_gpu, reverse_along_b_bfyx)
         8.f,  9.f, 10.f, 11.f,
         12.f, 13.f, 14.f, 15.f,
 
-
-
         16.f, 17.f, 18.f, 19.f,
         20.f, 21.f, 22.f, 23.f,
 
@@ -1039,7 +1035,6 @@ TEST(index_select_gpu, reverse_along_x_bfyx)
     }
 }
 
-
 TEST(index_select_gpu, reverse_along_y_yxfb)
 {
     const auto& engine = get_test_engine();
@@ -1052,8 +1047,6 @@ TEST(index_select_gpu, reverse_along_y_yxfb)
         8.f,  9.f, 10.f, 11.f,
         12.f, 13.f, 14.f, 15.f,
 
-
-
         16.f, 17.f, 18.f, 19.f,
         20.f, 21.f, 22.f, 23.f,
 
@@ -1068,8 +1061,6 @@ TEST(index_select_gpu, reverse_along_y_yxfb)
         24.f, 25.f, 26.f, 27.f,
         28.f, 29.f, 30.f, 31.f,
 
-
-
         0.f,  1.f,  2.f,  3.f,
         4.f,  5.f,  6.f,  7.f,
 
@@ -1252,7 +1243,6 @@ TEST(index_select_gpu, reverse_along_b_yxfb)
     }
 }
 
-
 TEST(index_select_gpu, reverse_along_yx_bfyx)
 {
     const auto& engine = get_test_engine();
@@ -1412,7 +1402,6 @@ TEST(index_select_gpu, reverse_along_bfyx_bfyx)
         79.f, 78.f, 77.f, 76.f,
         75.f, 74.f, 73.f, 72.f,
 
-
         71.f, 70.f, 69.f, 68.f,
         67.f, 66.f, 65.f, 64.f,
         63.f, 62.f, 61.f, 60.f,
@@ -1523,7 +1512,6 @@ TEST(index_select_gpu, reverse_along_bfx_yxfb)
         7.f,  6.f,  5.f,  4.f,
         3.f,  2.f,  1.f,  0.f,
 
-
         71.f, 70.f, 69.f, 68.f,
         67.f, 66.f, 65.f, 64.f,
         63.f, 62.f, 61.f, 60.f,
@@ -1536,7 +1524,6 @@ TEST(index_select_gpu, reverse_along_bfx_yxfb)
         43.f, 42.f, 41.f, 40.f,
         39.f, 38.f, 37.f, 36.f,
 
-
         107.f, 106.f, 105.f, 104.f,
         103.f, 102.f, 101.f, 100.f,
         99.f, 98.f, 97.f, 96.f,
@@ -1634,7 +1621,6 @@ TEST(index_select_gpu, reverse_along_bfyx_yxfb)
         79.f, 78.f, 77.f, 76.f,
         75.f, 74.f, 73.f, 72.f,
 
-
         71.f, 70.f, 69.f, 68.f,
         67.f, 66.f, 65.f, 64.f,
         63.f, 62.f, 61.f, 60.f,
@@ -1647,7 +1633,6 @@ TEST(index_select_gpu, reverse_along_bfyx_yxfb)
         43.f, 42.f, 41.f, 40.f,
         39.f, 38.f, 37.f, 36.f,
 
-
         35.f, 34.f, 33.f, 32.f,
         31.f, 30.f, 29.f, 28.f,
         27.f, 26.f, 25.f, 24.f,
index 594c803..5deb427 100644 (file)
 */
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/lookup_table.hpp"
-#include "api/CPP/arg_max_min.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/lookup_table.hpp"
+#include "api/arg_max_min.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
 using namespace std;
 using namespace tests;
 
-
 TEST(lookup_table_base, base) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2;
index 6e93ffb..46cfc1c 100644 (file)
@@ -1,25 +1,28 @@
 ï»¿#include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include "api/CPP/mutable_data.hpp"
-#include "api/CPP/input_layout.hpp"
-#include "api/CPP/lstm.hpp"
-#include "api/CPP/lstm_dynamic.hpp"
-#include "api_extension/CPP/lstm_dynamic_input.hpp"
-#include "api_extension/CPP/lstm_dynamic_timeloop.hpp"
-#include "api/CPP/topology.hpp"
-#include "api/CPP/tensor.hpp"
-#include "api/CPP/network.hpp"
-#include "api/CPP/engine.hpp"
+#include "api/memory.hpp"
+#include "api/mutable_data.hpp"
+#include "api/input_layout.hpp"
+#include "api/lstm.hpp"
+#include "api/lstm_dynamic.hpp"
+#include "api/reorder.hpp"
+#include "api_extension/lstm_dynamic_input.hpp"
+#include "api_extension/lstm_dynamic_timeloop.hpp"
+#include "api/topology.hpp"
+#include "api/tensor.hpp"
+#include "api/network.hpp"
+#include "api/engine.hpp"
 #include "test_utils/test_utils.h"
-#include "api/CPP/data.hpp"
+#include "api/data.hpp"
 #include "instrumentation.h"
 #include <test_utils/float16.h>
-
+#include <chrono>
 #include <sstream>
 #include <iomanip>
 
 #pragma warning( disable : 4503 )
 
+#define MEASURE_PERF false
+#define MEASURE_LOOP 50
 using namespace cldnn;
 using namespace tests;
 
@@ -31,9 +34,9 @@ namespace {
 
 struct offset_order_dynamic {
     size_t it, ot, ft, zt;
-    offset_order_dynamic(size_t scale, const cldnn_lstm_offset_order& t = cldnn_lstm_offset_order_fizo) {
-        static const std::map<cldnn_lstm_offset_order, std::vector<size_t>> offset_map{
-            { cldnn_lstm_offset_order_fizo, { 1, 3, 0, 2 } },
+    offset_order_dynamic(size_t scale, const lstm_weights_order& t = lstm_weights_order::fizo) {
+        static const std::map<lstm_weights_order, std::vector<size_t>> offset_map{
+            { lstm_weights_order::fizo, { 1, 3, 0, 2 } },
         };
         std::vector<size_t> v = offset_map.at(t);
         it = v[0] * scale;
@@ -42,7 +45,7 @@ struct offset_order_dynamic {
         zt = v[3] * scale;
     }
 };
-cldnn_lstm_offset_order default_offset_type_dynamic = cldnn_lstm_offset_order_fizo;
+lstm_weights_order default_offset_type_dynamic = lstm_weights_order::fizo;
 
 namespace dynamic_lstm
 {
@@ -243,7 +246,28 @@ struct lstm_dynamic_input_layer_test : public ::testing::Test
             "weights",
             bias_id));
 
-        network network(engine, topology);
+        build_options opts;
+        opts.set_option(build_option::optimize_data(true));
+        network network(engine, topology, opts);
+
+#if MEASURE_PERF == true
+        using clock = std::chrono::high_resolution_clock;
+        std::vector<std::chrono::nanoseconds> times(MEASURE_LOOP);
+        for (uint32_t i = 0; i < MEASURE_LOOP; i++)
+        {
+            auto t0 = clock::now();
+            network.set_input_data("input", input_mem);
+            network.set_input_data("dynamic_lstm_input", dynamic_length_mem);
+            auto real_outs = network.execute();
+            real_outs.at("dynamic_lstm_input").get_event().wait();
+            auto t1 = clock::now();
+            auto exec_time = t1 - t0;
+            times[i] = exec_time;
+        }
+        std::sort(times.begin(), times.end());
+        std::nth_element(times.begin(), times.begin() + times.size() / 2, times.end());
+        std::cout << "Perf: " << std::chrono::duration_cast<std::chrono::microseconds>(times[times.size() / 2]).count() << " micros. " << std::endl;
+#else
         network.set_input_data("input", input_mem);
         network.set_input_data("dyn_len", dynamic_length_mem);
 
@@ -263,11 +287,17 @@ struct lstm_dynamic_input_layer_test : public ::testing::Test
                 {
                     for (auto x = 0; x < out_tensor.spatial[0]; x++)
                     {
-                        EXPECT_NEAR(output_ref[b][len][dir][x], (float)out_ptr[i++], 1e-3f);
+                        EXPECT_NEAR(output_ref[b][len][dir][x], (float)out_ptr[i++], 1e-3f)
+                            << "b:" << b << ", "
+                            << "len:" << len << ", "
+                            << "dir:" << dir << ", "
+                            << "x:" << x << ", "
+                            << std::endl;
                     }
                 }
             }
         }
+#endif
     }
 };
 
@@ -297,10 +327,6 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test
         VF<T> ref_hidden_vec = flatten_4d<T>(cldnn::format::bfyx, ref_hidden);
         VF<T> ref_cell_vec = flatten_4d<T>(cldnn::format::bfyx, ref_cell);
 
-        dynamic_lstm::lstm_dynamic_reference(ref_input, ref_hidden, ref_cell, ref_weights, ref_recurrent, ref_bias, ref_output_hidden,
-            ref_output_cell, has_bias, has_initial_hidden, has_initial_cell,
-            clip_threshold, input_forget);
-
         const auto& engine = get_test_engine();
         constexpr auto dt = std::is_same<T, float>::value ? data_types::f32 : data_types::f16;
         VF<T> ref_dynamic_length;
@@ -323,7 +349,6 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test
         memory initial_cell_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, 1, hidden_size, direction } });
         set_values<T>(initial_cell_mem, ref_cell_vec);
 
-
         topology topology;
         topology.add(input_layout("input", input_mem.get_layout()));
         topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout()));
@@ -378,10 +403,33 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test
             initial_hidden_id,
             initial_cell_id));
 
-        network network(engine, topology);
+        build_options opts;
+        opts.set_option(build_option::optimize_data(true));
+        network network(engine, topology, opts);
         network.set_input_data("input", input_mem);
         network.set_input_data("dyn_len", dynamic_length_mem);
 
+#if MEASURE_PERF == true
+        using clock = std::chrono::high_resolution_clock;
+        std::vector<std::chrono::nanoseconds> times(MEASURE_LOOP);
+        for (uint32_t i = 0; i < MEASURE_LOOP; i++)
+        {
+            auto t0 = clock::now();
+            network.set_input_data("input", input_mem);
+            network.set_input_data("dyn_len", dynamic_length_mem);
+            auto real_outs = network.execute();
+            real_outs.at("dynamic_lstm").get_event().wait();
+            auto t1 = clock::now();
+            auto exec_time = t1 - t0;
+            times[i] = exec_time;
+        }
+        std::sort(times.begin(), times.end());
+        std::nth_element(times.begin(), times.begin() + times.size() / 2, times.end());
+        std::cout << "Perf: " << std::chrono::duration_cast<std::chrono::microseconds>(times[times.size() / 2]).count() << " micros. " << std::endl;
+#else
+        dynamic_lstm::lstm_dynamic_reference(ref_input, ref_hidden, ref_cell, ref_weights, ref_recurrent, ref_bias, ref_output_hidden,
+            ref_output_cell, has_bias, has_initial_hidden, has_initial_cell,
+            clip_threshold, input_forget);
         auto real_outs = network.execute();
         auto out = real_outs.at("dynamic_lstm");
         auto out_tensor = out.get_memory().get_layout().size;
@@ -400,39 +448,76 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test
                         //check hidden
                         if (len < dynamic_lengths[b])
                         {
-                            EXPECT_NEAR((float)ref_output_hidden[b][len][dir][x], (float)out_ptr[i++], epsilon);
+                            EXPECT_NEAR((float)ref_output_hidden[b][len][dir][x], (float)out_ptr[i++], epsilon)
+                                << "check hidden, "
+                                << "b:" << b << ", "
+                                << "len:" << len << ", "
+                                << "dir:" << dir << ", "
+                                << "x:" << x << ", "
+                                << std::endl;
                         }
                         else
                         {
-                            EXPECT_NEAR(0.0f, (float)out_ptr[i++], epsilon);
+                            EXPECT_NEAR(0.0f, (float)out_ptr[i++], epsilon)
+                                << "check hidden, "
+                                << "b:" << b << ", "
+                                << "len:" << len << ", "
+                                << "dir:" << dir << ", "
+                                << "x:" << x << ", "
+                                << std::endl;
                         }
 
                         //check optional last hidden state output
                         if(has_last_hidden_state && len == dynamic_lengths[b] - 1)
                         {
                             auto ratio = (float)ref_output_hidden[b][len][dir][x] / (float)last_hidden_ptr[i_lh++];                 
-                            EXPECT_TRUE(std::abs((1.0f - ratio) < 0.01f));
+                            EXPECT_TRUE(std::abs((1.0f - ratio) < 0.01f))
+                            << "check has_last_hidden_state with ratio: " << ratio << ", "
+                                << "b:" << b << ", "
+                                << "len:" << len << ", "
+                                << "dir:" << dir << ", "
+                                << "x:" << x << ", "
+                                << std::endl;
 
                         }
                         else if (has_last_hidden_state && len == 0 && dynamic_lengths[b] == 0)
                         {
-                            EXPECT_NEAR(0.0f, (float)last_hidden_ptr[i_lh++], epsilon);
+                            EXPECT_NEAR(0.0f, (float)last_hidden_ptr[i_lh++], epsilon)
+                                << "check has_last_hidden_state, "
+                                << "b:" << b << ", "
+                                << "len:" << len << ", "
+                                << "dir:" << dir << ", "
+                                << "x:" << x << ", "
+                                << std::endl;
                         }
 
                         //check optional last cell state output
                         if(has_last_cell_state && len == dynamic_lengths[b] - 1)
                         {
                             auto ratio = (float)ref_output_cell[b][len][dir][x] / (float)last_cell_ptr[i_lc++];
-                            EXPECT_TRUE(std::abs((1.0f - ratio) < 0.01f));
+                            EXPECT_TRUE(std::abs((1.0f - ratio) < 0.01f))
+                                << "check has_last_cell_state with ratio: " << ratio << ", "
+                                << "b:" << b << ", "
+                                << "len:" << len << ", "
+                                << "dir:" << dir << ", "
+                                << "x:" << x << ", "
+                                << std::endl;
                         }
                         else if (has_last_cell_state && len == 0 && dynamic_lengths[b] == 0)
                         {
-                            EXPECT_NEAR(0.0f, (float)last_cell_ptr[i_lc++], epsilon);
+                            EXPECT_NEAR(0.0f, (float)last_cell_ptr[i_lc++], epsilon)
+                                << "check has_last_cell_state, "
+                                << "b:" << b << ", "
+                                << "len:" << len << ", "
+                                << "dir:" << dir << ", "
+                                << "x:" << x << ", "
+                                << std::endl;
                         }
                     }
                 }
             }
         }
+#endif
     }
 
 };
@@ -445,8 +530,6 @@ TYPED_TEST_CASE(lstm_dynamic_input_layer_test, lstm_dynamic_test_types);
         DYNAMIC_LSTM INPUT TEST
 ----------------------------------------------
 */
-//VVVVF<T> lstm_dynamic_input_ref(VVVVF<T>& input, VVVVF<T>& weights, VVVVF<T>& bias,
-//size_t seq, bool hasBias = true, size_t dir = 0) {
 
 TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_b1_seq3_is3_hs2)
 {
@@ -462,6 +545,16 @@ TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_b3_seq5_is3_hs2)
     this->input_single_layer_generic_test(dir, batch_size, max_seq_len, input_size, hidden_size, dynamic_lengths, true);
 }
 
+TYPED_TEST(lstm_dynamic_input_layer_test, b10_seq20_is16_hs64)
+{
+    auto dir = 1, batch = 10, max_seq_len = 20, input_size = 16, hidden_size = 64;
+    std::vector<float> dynamic_lengths =
+    {
+        5, 10, 12, 11, 5, 6, 7, 8, 9, 15,
+    };
+    this->input_single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths);
+}
+
 TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_b8_seq10_is4_hs16)
 {
     auto batch_size = 8, max_seq_len = 10, input_size = 4, hidden_size = 16;
@@ -482,6 +575,28 @@ TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_dir2_b8_seq10_is4_hs16_opt
     }
 }
 
+TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_1b1_seq1_is32_hs_128)
+{
+    auto dir = 1, batch = 1, max_seq_len = 1, input_size = 32, hidden_size = 128;
+    std::vector<float> dynamic_lengths =
+    {
+        1
+    };
+    bool bias = true;
+    this->input_single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths, bias);
+}
+
+TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_dir_b8_seq27_is16_hs_56)
+{
+    auto dir = 1, batch = 8, max_seq_len = 27, input_size = 16, hidden_size = 56;
+    std::vector<float> dynamic_lengths =
+    {
+        20, 25, 24, 10, 15, 8, 19, 26
+    };
+    this->input_single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths, false);
+}
+
+
 /*
 ----------------------------------------------
         FULL DYNAMIC_LSTM TESTS
@@ -571,12 +686,13 @@ TYPED_TEST(lstm_dynamic_single_layer_test, b10_seq20_is16_hs64)
     auto dir = 1, batch = 10, max_seq_len = 20, input_size = 16, hidden_size = 64;
     std::vector<float> dynamic_lengths =
     {
-        5, 10, 12, 11, 5, 6, 7, 8,  9,  15,
+        5, 10, 12, 11, 5, 6, 7, 8, 9, 15,
     };
     this->single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths);
 }
 
-TYPED_TEST(lstm_dynamic_single_layer_test, b16_seq20_is32_hs32_options)
+// DISABLED beacuse it is veeery long
+TYPED_TEST(lstm_dynamic_single_layer_test, DISABLED_b16_seq20_is32_hs32_options)
 {
     auto dir = 1, batch = 16, max_seq_len = 20, input_size = 32, hidden_size = 32;
     std::vector<float> dynamic_lengths =
@@ -606,7 +722,6 @@ TYPED_TEST(lstm_dynamic_single_layer_test, b16_seq20_is32_hs32_options)
     }
 }
 
-
 /*
 ----------------------------------------------
               BIDIRECTIONAL TESTS
@@ -620,6 +735,26 @@ TYPED_TEST(lstm_dynamic_single_layer_test, bidir_b2_seq7_is3_hs4)
     this->single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths);
 }
 
+TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_dir_b1_seq1_is32_hs_512)
+{
+    auto dir = 2, batch = 1, max_seq_len = 1, input_size = 8, hidden_size = 128;
+    std::vector<float> dynamic_lengths =
+    {
+        1
+    };
+    this->input_single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths, true);
+}
+
+TYPED_TEST(lstm_dynamic_input_layer_test, dlstm_input_dir_b8_seq5_is32_hs_512)
+{
+    auto dir = 2, batch = 8, max_seq_len = 5, input_size = 8, hidden_size = 128;
+    std::vector<float> dynamic_lengths =
+    {
+        3, 4, 5, 1, 3, 2, 2, 3
+    };
+    this->input_single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths, true);
+}
+
 TYPED_TEST(lstm_dynamic_single_layer_test, bidir_b10_seq7_is3_hs4)
 {
     auto dir = 2, batch = 10, max_seq_len = 7, input_size = 3, hidden_size = 4;
@@ -718,19 +853,6 @@ TYPED_TEST(lstm_dynamic_single_layer_test, b16_seq20_is4_hs8_dirs_optional_outpu
         5, 10, 12, 11, 5, 6, 7, 8, 9, 15, 0, 0, 0, 0, 14, 18
     };
     this->single_layer_generic_test(1, batch, max_seq_len, input_size, hidden_size, dynamic_lengths, false, false, false, true, true, 1e-3f);
-    //auto dirs = { 1, 2 };
-    //auto opitonal_hidden_outputs = { false, true };
-    //auto opitonal_cell_outputs = { false, true };
-    //for (auto dir : dirs)
-    //{
-    //    for (auto o_h_o: opitonal_hidden_outputs)
-    //    {
-    //        for (auto o_c_o : opitonal_cell_outputs)
-    //        {
-    //            this->single_layer_generic_test(dir, batch, max_seq_len, input_size, hidden_size, dynamic_lengths, false, false, false, o_h_o, o_c_o);
-    //        }
-    //    }
-    //}
 }
 
 /*
@@ -839,7 +961,6 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_0) {
     ASSERT_ANY_THROW(network network(engine, topology));
 }
 
-
 TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) {
 
     auto batch_size = 50, max_sequence_len = 10, input_size = 16, hidden_size = 32, direction = 1;
@@ -866,4 +987,3 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) {
 }
 
 
-
index 41649a2..6ae2ee7 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/lstm.hpp"
-#include <api/CPP/split.hpp>
-#include <api/CPP/crop.hpp>
-#include <api/CPP/reshape.hpp>
-#include <api/CPP/concatenation.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/tensor.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/lstm.hpp"
+#include <api/split.hpp>
+#include <api/crop.hpp>
+#include <api/reshape.hpp>
+#include <api/concatenation.hpp>
+#include <api/topology.hpp>
+#include <api/tensor.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 #include "instrumentation.h"
 #include <test_utils/float16.h>
 
@@ -52,10 +52,10 @@ namespace {
 
 struct offset_order {
     size_t it, ot, ft, zt;
-    offset_order(size_t scale, const cldnn_lstm_offset_order& t = cldnn_lstm_offset_order_iofz) {
-        static const std::map<cldnn_lstm_offset_order, std::vector<size_t>> offset_map{
-            { cldnn_lstm_offset_order_iofz,{ 0, 1, 2, 3 } },
-            { cldnn_lstm_offset_order_ifoz,{ 0, 2, 1, 3 } }
+    offset_order(size_t scale, const lstm_weights_order& t = lstm_weights_order::iofz) {
+        static const std::map<lstm_weights_order, std::vector<size_t>> offset_map{
+            { lstm_weights_order::iofz,{ 0, 1, 2, 3 } },
+            { lstm_weights_order::ifoz,{ 0, 2, 1, 3 } }
         };
         std::vector<size_t> v = offset_map.at(t);
         it = v[0] * scale;
@@ -64,7 +64,7 @@ struct offset_order {
         zt = v[3] * scale;
     }
 };
-cldnn_lstm_offset_order default_offset_type = cldnn_lstm_offset_order_iofz;
+lstm_weights_order default_offset_type = lstm_weights_order::iofz;
 
 template<typename T>
 T clip(T val, T threshold) {
@@ -75,7 +75,6 @@ T clip(T val, T threshold) {
     return val;
 }
 
-
 template <typename T>
 VVVVF<T> lstm_gemm_reference(VVVVF<T>& input, VVVVF<T>& weights, VVVVF<T>& recurrent, VVVVF<T>& bias, VVVVF<T>& hidden,
     size_t seq, bool hasBias = true, bool hasHidden = true, size_t dir = 0, size_t input_dir = 0) {
@@ -213,8 +212,6 @@ void lstm_reference(VVVVF<T>& input, VVVVF<T>& hidden, VVVVF<T>& cell,
     last_cell = cell;
 }
 
-
-
 template<typename T>
 void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
     bool hasBias = true, bool hasHidden = true) {
@@ -413,7 +410,6 @@ void generate_lstm_topology(topology& t, memory& input, memory& hidden, memory&
     t.add(concatenation("concatenation", output_ids_offsets, concatenation::along_f));
 }
 
-
 template<typename T>
 void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
     bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true) {
@@ -609,13 +605,13 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
             topology.add(lstm(lstm_id, lstm_inputs, weights_id, recurrent_id,
                             hasBias ? biases_id : "", hasInitialHidden ? hidden_id : "", hasInitialCell ? cell_id : "", "",
                             clip_threshold, input_forget, {}, {},
-                            cldnn_lstm_output::cldnn_lstm_output_sequence, default_offset_type));
+                            lstm_output_selection::sequence, default_offset_type));
         }
         else {
             topology.add(lstm(lstm_id, { prev_lstm_id }, weights_id, recurrent_id,
                             hasBias ? biases_id : "", hasInitialHidden ? hidden_id : "", hasInitialCell ? cell_id : "", "",
                             clip_threshold, input_forget, {}, {},
-                            cldnn_lstm_output::cldnn_lstm_output_sequence, default_offset_type));
+                            lstm_output_selection::sequence, default_offset_type));
         }
         prev_lstm_id = lstm_id;
     }
@@ -662,7 +658,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
 
 // -------------------------------------------------------
 template<typename T>
-void lstm_gpu_output_test(const cldnn_lstm_output& output_selection, int directions) {
+void lstm_gpu_output_test(const lstm_output_selection& output_selection, int directions) {
     int layers = 1;
     int sequence_len = 4;
     int batch_size = 3;
@@ -671,7 +667,7 @@ void lstm_gpu_output_test(const cldnn_lstm_output& output_selection, int directi
 
     std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
             << " Sequence Len = " << sequence_len << " Directions = " << directions << " Batch Size = " << batch_size
-                       << " Output selection: " << output_selection << std::endl;
+                       << " Output selection: " << static_cast<int>(output_selection) << std::endl;
     int min_random = -2, max_random = 2;
 
     VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
@@ -712,10 +708,10 @@ void lstm_gpu_output_test(const cldnn_lstm_output& output_selection, int directi
     set_values(hidden, ref_hidden_vec);
     set_values(cell, ref_cell_vec);
 
-    bool emit_last_cell = output_selection == cldnn_lstm_output_hidden_cell ||
-                          output_selection == cldnn_lstm_output_sequence_cell;
-    bool emit_last_hidden = output_selection == cldnn_lstm_output_hidden ||
-                            output_selection == cldnn_lstm_output_hidden_cell;
+    bool emit_last_cell = output_selection == lstm_output_selection::hidden_cell ||
+                          output_selection == lstm_output_selection::sequence_cell;
+    bool emit_last_hidden = output_selection == lstm_output_selection::hidden ||
+                            output_selection == lstm_output_selection::hidden_cell;
 
     topology topology;
     std::vector<std::pair<primitive_id, tensor>> input_ids_offsets;
@@ -820,7 +816,6 @@ void lstm_gpu_output_test(const cldnn_lstm_output& output_selection, int directi
     }
 }
 
-
 // -------------------------------------------------------
 template<typename T>
 void lstm_gpu_format_test(const cldnn::format& format, int directions) {
@@ -830,11 +825,11 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {
     int input_size = 4;
     int hidden_size = 5;
 
-    cldnn_lstm_output output_selection = cldnn_lstm_output::cldnn_lstm_output_sequence;
+    lstm_output_selection output_selection = lstm_output_selection::sequence;
 
     std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
             << " Sequence Len = " << sequence_len << " Directions = " << directions << " Batch Size = " << batch_size
-            << " Output selection: " << output_selection << std::endl;
+            << " Output selection: " << static_cast<int>(output_selection) << std::endl;
     int min_random = -2, max_random = 2;
 
     VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
@@ -875,10 +870,10 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {
     set_values(hidden, ref_hidden_vec);
     set_values(cell, ref_cell_vec);
 
-    bool emit_last_cell = output_selection == cldnn_lstm_output_hidden_cell ||
-                          output_selection == cldnn_lstm_output_sequence_cell;
-    bool emit_last_hidden = output_selection == cldnn_lstm_output_hidden ||
-                            output_selection == cldnn_lstm_output_hidden_cell;
+    bool emit_last_cell = output_selection == lstm_output_selection::hidden_cell ||
+                          output_selection == lstm_output_selection::sequence_cell;
+    bool emit_last_hidden = output_selection == lstm_output_selection::hidden ||
+                            output_selection == lstm_output_selection::hidden_cell;
 
     topology topology;
     std::vector<std::pair<primitive_id, tensor>> input_ids_offsets;
@@ -1071,7 +1066,7 @@ void lstm_gpu_users_test() {
     topology.add(input_layout("cell", cell.get_layout()));
     topology.add(lstm("lstm", lstm_inputs, "weights", "recurrent",
                       "biases", "hidden", "cell", "", 0, false, {}, {},
-                      cldnn_lstm_output::cldnn_lstm_output_hidden, default_offset_type));
+                      lstm_output_selection::hidden, default_offset_type));
     std::vector<primitive_id> output_ids_offsets {"lstm", "hidden"};
     topology.add(concatenation("concatenation", output_ids_offsets, concatenation::along_f));
 
@@ -1216,13 +1211,13 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
             topology.add(lstm(lstm_id, { "input" }, weights_id, recurrent_id,
                                has_bias ? biases_id : "", has_initial_hidden ? hidden_id : "", has_initial_cell ? cell_id : "", "",
                                clip_threshold, input_forget, {}, {},
-                               cldnn_lstm_output::cldnn_lstm_output_sequence_cell, default_offset_type));
+                               lstm_output_selection::sequence_cell, default_offset_type));
                }
                else {
                        topology.add(lstm(lstm_id, { prev_node_id }, weights_id, recurrent_id,
                                has_bias ? biases_id : "", has_initial_hidden ? hidden_id : "", has_initial_cell ? cell_id : "", "",
                                clip_threshold, input_forget, {}, {},
-                               cldnn_lstm_output::cldnn_lstm_output_sequence_cell, default_offset_type));
+                               lstm_output_selection::sequence_cell, default_offset_type));
                }
 
         // Crop out the whole output sequence element
@@ -1277,7 +1272,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
 template<typename T>
 void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
                          int directions, size_t layers, size_t chains, int sequence_len,
-                         const cldnn_lstm_output& output_selection)
+                         const lstm_output_selection& output_selection)
 {
     int min_random = -2, max_random = 2;
     bool has_bias = false;
@@ -1288,7 +1283,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
 
     std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
         << " Sequence Len = " << sequence_len << " Directions = " << directions << " Batch Size = " << batch_size
-        << " Output selection: " << output_selection << std::endl;
+        << " Output selection: " << static_cast<int>(output_selection) << std::endl;
 
     VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
     std::vector<std::vector< VVVVF<T>>> ref_weights;
@@ -1466,8 +1461,8 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
     }
     topology.add(split("inputSplit", "input", input_ids_offsets));
 
-    bool emit_last_hidden = output_selection == cldnn_lstm_output_hidden
-        || output_selection == cldnn_lstm_output_hidden_cell;
+    bool emit_last_hidden = output_selection == lstm_output_selection::hidden
+        || output_selection == lstm_output_selection::hidden_cell;
 
     std::vector<cldnn::primitive_id> output_sequence_ids;
     std::vector<cldnn::primitive_id> last_hidden_ids;
@@ -1500,7 +1495,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
 
             primitive_id initial_hidden_id;
             primitive_id initial_cell_id;
-            cldnn_lstm_output output_selection_per_layer;
+            lstm_output_selection output_selection_per_layer;
 
             topology.add(data(weights_id, weights[chain][layer]));
             topology.add(data(recurrent_id, recurrent[chain][layer]));
@@ -1528,7 +1523,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
             // last hidden and last cell
             if (layer < layers - 1)
             {
-                output_selection_per_layer = cldnn_lstm_output::cldnn_lstm_output_sequence_cell;
+                output_selection_per_layer = lstm_output_selection::sequence_cell;
             }
             else
             {
@@ -1651,7 +1646,6 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
     }
 }
 
-
 TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f32) {
     generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, true);
 }
@@ -1785,9 +1779,9 @@ TEST(lstm_gpu, generic_lstm_clip_input_forget_f32) {
 }
 
 TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32) {
-    default_offset_type = cldnn_lstm_offset_order_ifoz;
+    default_offset_type = lstm_weights_order::ifoz;
     generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true);
-    default_offset_type = cldnn_lstm_offset_order_iofz;
+    default_offset_type = lstm_weights_order::iofz;
 }
 
 TEST(lstm_gpu, generic_lstm_canonical_f32) {
@@ -1830,35 +1824,35 @@ TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32) {
 
 // optional outputs support
 TEST(lstm_gpu, output_test_sequence_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence, 1);
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence, 1);
 }
 
 TEST(lstm_gpu, output_test_hidden_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden, 1);
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden, 1);
 }
 
 TEST(lstm_gpu, output_test_hidden_cell_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden_cell, 1);
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 1);
 }
 
 TEST(lstm_gpu, output_test_sequence_cell_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence_cell, 1);
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 1);
 }
 
 TEST(lstm_gpu, output_test_sequence_bi_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence, 2);
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence, 2);
 }
 
 TEST(lstm_gpu, output_test_hidden_bi_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden, 2);
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden, 2);
 }
 
 TEST(lstm_gpu, output_test_hidden_cell_bi_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden_cell, 2);
+    lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 2);
 }
 
 TEST(lstm_gpu, output_test_sequence_cell_bi_f32) {
-    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence_cell, 2);
+    lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 2);
 }
 
 // format tests
@@ -1902,7 +1896,7 @@ TEST(lstm_gpu, generic_lstm_chained_unidirectional_f32) {
     // chains = 1
     // sequence length = 1
     // output selection = output sequence and cell
-    lstm_gpu_chain_test<float>(1, 2, 4, 1, 1, 2, 1, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+    lstm_gpu_chain_test<float>(1, 2, 4, 1, 1, 2, 1, lstm_output_selection::sequence_cell);
 }
 
 TEST(lstm_gpu, generic_lstm_chained_bidirectional_f32) {
@@ -1914,7 +1908,7 @@ TEST(lstm_gpu, generic_lstm_chained_bidirectional_f32) {
     // chains = 1
     // sequence length = 1
     // output selection = output sequence and cell
-    lstm_gpu_chain_test<float>(1, 2, 4, 2, 1, 1, 1, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+    lstm_gpu_chain_test<float>(1, 2, 4, 2, 1, 1, 1, lstm_output_selection::sequence_cell);
 }
 
 TEST(lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32) {
@@ -1926,7 +1920,7 @@ TEST(lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32) {
     // chains = 2
     // sequence length = 5
     // output selection = output sequence and cell
-    lstm_gpu_chain_test<float>(2, 2, 4, 2, 1, 2, 5, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+    lstm_gpu_chain_test<float>(2, 2, 4, 2, 1, 2, 5, lstm_output_selection::sequence_cell);
 }
 
 TEST(lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32) {
@@ -1938,7 +1932,7 @@ TEST(lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32) {
     // chains = 2
     // sequence length = 5
     // output selection = output sequence and cell
-    lstm_gpu_chain_test<float>(2, 2, 4, 2, 4, 2, 5, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+    lstm_gpu_chain_test<float>(2, 2, 4, 2, 4, 2, 5, lstm_output_selection::sequence_cell);
 }
 
 // FP16 Half precision tests
@@ -2023,9 +2017,9 @@ TEST(lstm_gpu, generic_lstm_clip_input_forget_f16) {
 }
 
 TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16) {
-    default_offset_type = cldnn_lstm_offset_order_ifoz;
+    default_offset_type = lstm_weights_order::ifoz;
     generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
-    default_offset_type = cldnn_lstm_offset_order_iofz;
+    default_offset_type = lstm_weights_order::iofz;
 }
 
 TEST(lstm_gpu, generic_lstm_canonical_f16) {
index afade14..6d9f7d2 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/max_unpooling.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/max_unpooling.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/mutable_data.hpp>
-#include <api/CPP/pooling.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
+#include <api/mutable_data.hpp>
+#include <api/pooling.hpp>
 #include "test_utils/float16.h"
 
 using namespace cldnn;
@@ -391,7 +391,6 @@ TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) {
     //  f1: b0:  0    0  0   b1:   0    0    0
     //  f1: b0:  0    8  16  b1:   12   0    17
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
index 75821bf..25a1884 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/activation.hpp>
-#include <api/CPP/pooling.hpp>
-#include <api/CPP/concatenation.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/reshape.hpp>
-#include <api/CPP/crop.hpp>
-#include <api/CPP/scale.hpp>
+#include <api/engine.hpp>
+#include <api/memory.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/input_layout.hpp>
+#include <api/activation.hpp>
+#include <api/pooling.hpp>
+#include <api/concatenation.hpp>
+#include <api/data.hpp>
+#include <api/reshape.hpp>
+#include <api/crop.hpp>
+#include <api/scale.hpp>
 
 #include "test_utils/test_utils.h"
 
@@ -44,7 +44,7 @@ TEST(memory_tests, DISABLED_execution_loop)
 
     topology tpl{
         input_layout("in", in.get_layout()),
-        activation("out", "in", activation_linear)
+        activation("out", "in", activation_func::linear)
     };
 
     network net(eng, tpl);
@@ -64,7 +64,7 @@ TEST(memory_tests, DISABLED_network_creation_loop)
 
     topology tpl{
         input_layout("in", in.get_layout()),
-        activation("out", "in", activation_linear)
+        activation("out", "in", activation_func::linear)
     };
 
     while (true)
@@ -85,12 +85,12 @@ TEST(memory_pool, basic_non_padded_relu_pipe) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(activation("relu", "input", activation_relu));
-    topology.add(activation("relu1", "relu", activation_relu));
-    topology.add(activation("relu2", "relu1", activation_relu));
-    topology.add(activation("relu3", "relu2", activation_relu));
-    topology.add(activation("relu4", "relu3", activation_relu));
-    topology.add(activation("relu5", "relu4", activation_relu));
+    topology.add(activation("relu", "input", activation_func::relu));
+    topology.add(activation("relu1", "relu", activation_func::relu));
+    topology.add(activation("relu2", "relu1", activation_func::relu));
+    topology.add(activation("relu3", "relu2", activation_func::relu));
+    topology.add(activation("relu4", "relu3", activation_func::relu));
+    topology.add(activation("relu5", "relu4", activation_func::relu));
 
     std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
     set_values(input, input_vec);
@@ -101,10 +101,9 @@ TEST(memory_pool, basic_non_padded_relu_pipe) {
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 80);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 64);
  }
 
-
 TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
     // uncomment this line to disable memory pool
     /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
@@ -119,13 +118,13 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(activation("relu", "input", activation_relu));
-    topology.add(activation("relu1", "relu", activation_relu));
+    topology.add(activation("relu", "input", activation_func::relu));
+    topology.add(activation("relu1", "relu", activation_func::relu));
     topology.add(pooling("pool1", "relu1",pooling_mode::max, { 1,1,3,3 }, { 1,1,2,2 }));
-    topology.add(activation("relu2", "pool1", activation_relu));
-    topology.add(activation("relu3", "relu2", activation_relu));
-    topology.add(activation("relu4", "relu3", activation_relu));
-    topology.add(activation("relu5", "relu4", activation_relu));
+    topology.add(activation("relu2", "pool1", activation_func::relu));
+    topology.add(activation("relu3", "relu2", activation_func::relu));
+    topology.add(activation("relu4", "relu3", activation_func::relu));
+    topology.add(activation("relu5", "relu4", activation_func::relu));
 
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
@@ -134,10 +133,9 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1088);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)896);
 }
 
-
 TEST(memory_pool, multi_outputs_network) {
     //            -- relu -- relu1 -- relu4
     //     input<           
@@ -157,14 +155,14 @@ TEST(memory_pool, multi_outputs_network) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(activation("relu", "input", activation_relu));
-    topology.add(activation("relu1", "relu", activation_relu));
-    topology.add(activation("relu2", "input", activation_relu));
-    topology.add(activation("relu3", "relu2", activation_relu));
-    topology.add(activation("relu4", "relu1", activation_relu));
-    topology.add(activation("relu5", "relu3", activation_relu));
-    topology.add(activation("relu6", "relu5", activation_relu));
-    topology.add(activation("relu7", "relu6", activation_relu));
+    topology.add(activation("relu", "input", activation_func::relu));
+    topology.add(activation("relu1", "relu", activation_func::relu));
+    topology.add(activation("relu2", "input", activation_func::relu));
+    topology.add(activation("relu3", "relu2", activation_func::relu));
+    topology.add(activation("relu4", "relu1", activation_func::relu));
+    topology.add(activation("relu5", "relu3", activation_func::relu));
+    topology.add(activation("relu6", "relu5", activation_func::relu));
+    topology.add(activation("relu7", "relu6", activation_func::relu));
 
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
@@ -173,10 +171,9 @@ TEST(memory_pool, multi_outputs_network) {
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)2048);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1536);
 }
 
-
 TEST(memory_pool, oooq) {
     /*          -- relu1 - concat1- relu4 -- 
         input<  -- relu2 /                   >-- concat2 -- relu6
@@ -194,14 +191,14 @@ TEST(memory_pool, oooq) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(activation("relu1", "input", activation_relu));
-    topology.add(activation("relu2", "input", activation_relu));
-    topology.add(activation("relu3", "input", activation_relu));
+    topology.add(activation("relu1", "input", activation_func::relu));
+    topology.add(activation("relu2", "input", activation_func::relu));
+    topology.add(activation("relu3", "input", activation_func::relu));
     topology.add(concatenation("concat1", { "relu1", "relu2"},concatenation::along_f));
-    topology.add(activation("relu4", "concat1", activation_relu));
-    topology.add(activation("relu5", "relu3", activation_relu));
+    topology.add(activation("relu4", "concat1", activation_func::relu));
+    topology.add(activation("relu5", "relu3", activation_func::relu));
     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
-    topology.add(activation("relu6", "concat2", activation_relu));
+    topology.add(activation("relu6", "concat2", activation_func::relu));
 
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
@@ -210,7 +207,7 @@ TEST(memory_pool, oooq) {
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2560);
 }
 
 TEST(memory_pool, shared_mem_pool_same_topology_twice) {
@@ -237,14 +234,14 @@ TEST(memory_pool, shared_mem_pool_same_topology_twice) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(activation("relu1", "input", activation_relu));
-    topology.add(activation("relu2", "input", activation_sqrt));
-    topology.add(activation("relu3", "input", activation_square));
+    topology.add(activation("relu1", "input", activation_func::relu));
+    topology.add(activation("relu2", "input", activation_func::sqrt));
+    topology.add(activation("relu3", "input", activation_func::square));
     topology.add(concatenation("concat1", { "relu1", "relu2" }, concatenation::along_f));
-    topology.add(activation("relu4", "concat1", activation_relu));
-    topology.add(activation("relu5", "relu3", activation_relu));
+    topology.add(activation("relu4", "concat1", activation_func::relu));
+    topology.add(activation("relu5", "relu3", activation_func::relu));
     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
-    topology.add(activation("relu6", "concat2", activation_linear, {1.0f, 0.5f}));
+    topology.add(activation("relu6", "concat2", activation_func::linear, {1.0f, 0.5f}));
 
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
@@ -257,7 +254,7 @@ TEST(memory_pool, shared_mem_pool_same_topology_twice) {
     auto output_layout_first = output_memory_first.get_layout();
     auto output_ptr_first = output_memory_first.pointer<float>();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2560);
 
     network network_second(engine, topology, bo);
     network_second.set_input_data("input", input);
@@ -267,7 +264,7 @@ TEST(memory_pool, shared_mem_pool_same_topology_twice) {
     auto output_layout_second = output_memory_second.get_layout();
     auto output_ptr_second = output_memory_second.pointer<float>();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 3584);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 3328);
     EXPECT_EQ(output_layout_first, output_layout_second);
 
     int y_size = output_layout_first.size.spatial[1];
@@ -365,7 +362,6 @@ TEST(memory_pool, shared_mem_pool_same_topology_twice_weights) {
     }
 }
 
-
 TEST(memory_pool, shared_mem_pool_diff_batches) {
 
     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
@@ -524,16 +520,15 @@ TEST(memory_pool, add_mem_dep_test) {
         5.0f, 6.0f, 7.0f, 8.0f});
     set_values(scale_memory, { 1.0f });
 
-
     auto input = cldnn::input_layout("input1", input_layout1);
-    auto actv1 = cldnn::activation("input_activ1", "input1", cldnn_activation_func::activation_abs);
-    auto actv2 = cldnn::activation("input_activ2", "input1", cldnn_activation_func::activation_abs);
+    auto actv1 = cldnn::activation("input_activ1", "input1", activation_func::abs);
+    auto actv2 = cldnn::activation("input_activ2", "input1", activation_func::abs);
     auto crop1 = cldnn::crop("crop1", "input_activ1", { 1,1,2,2 }, { 0, 0, 0, 0 });
     auto crop2 = cldnn::crop("crop2", "input_activ2", { 1,1,2,2 }, { 0, 1, 0, 0 });
     auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
     auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
-    auto actv3 = cldnn::activation("out3", "elt1", cldnn_activation_func::activation_abs);
-    auto actv4 = cldnn::activation("out4", "elt2", cldnn_activation_func::activation_abs);
+    auto actv3 = cldnn::activation("out3", "elt1", activation_func::abs);
+    auto actv4 = cldnn::activation("out4", "elt2", activation_func::abs);
 
     auto topology = cldnn::topology(
         input,
index da2cc38..5cf057b 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/mvn.hpp"
-#include "api/CPP/reorder.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include <api/memory.hpp>
+#include <api/input_layout.hpp>
+#include "api/mvn.hpp"
+#include "api/reorder.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 #include <iostream>
 #include "float16.h"
index 0e0a089..5bd22ac 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/one_hot.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/one_hot.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include "test_utils/test_utils.h"
 #include "test_utils/uniform_quantized_real_distribution.hpp"
index 800469d..31fbc03 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/permute.hpp"
-#include "api/CPP/reorder.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/permute.hpp"
+#include "api/reorder.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/data.hpp>
-#include <api/CPP/fully_connected.hpp>
-#include <api/CPP/reshape.hpp>
-#include <api/CPP/crop.hpp>
+#include <api/data.hpp>
+#include <api/fully_connected.hpp>
+#include <api/reshape.hpp>
+#include <api/crop.hpp>
 #include <cmath>
 #include <gmock/gmock.h>
 #include <limits>
@@ -36,12 +36,10 @@ using namespace cldnn;
 using namespace tests;
 using namespace testing;
 
-
 TEST(permute_gpu_f32, output_ordering_test)
 {
     const auto& engine = get_test_engine();
 
-
     std::vector<std::vector<int32_t>> input_tensors =
     {
         { 10, 5, 15, 2 },{ 2, 4, 6, 8 },{ 2, 2, 3, 2 },{ 9, 8, 7, 4 }
@@ -108,7 +106,6 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3)
     //
     //  Output = input
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -143,7 +140,6 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3)
 
     auto output = outputs.begin()->second.get_memory();
 
-
     auto output_ptr = output.pointer<float>();
     for (int i = 0; i < 24; i++)
     {
index 8488477..e9b1487 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/pooling.hpp"
-#include "api/CPP/mutable_data.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/pooling.hpp"
+#include "api/mutable_data.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include "api/CPP/reorder.hpp"
-#include <api/CPP/data.hpp>
+#include "api/reorder.hpp"
+#include <api/data.hpp>
 #include "test_utils/float16.h"
 
 using namespace cldnn;
@@ -1100,7 +1100,6 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) {
     //  f0: b0:  4    4   b1:   15    13
     //  f1: b0:  10  11   b1:   21    23
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -1178,7 +1177,6 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) {
     //  f0: b0:  4    4   b1:   15    13
     //  f1: b0:  10  11   b1:   21    23
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 3, 2, 1 } });
@@ -1238,7 +1236,6 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) {
     }
 }
 
-
 TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) {
     //  Input  : 2x2x3x2
     //  Argmax : 2x2x2x1
@@ -1259,7 +1256,6 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) {
     //  f0: b0:  4    4   b1:   15    13
     //  f1: b0:  10  11   b1:   21    23
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -1339,7 +1335,6 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) {
     //  f0: b0:  4    4   b1:   15    13
     //  f1: b0:  10  11   b1:   21    23
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -1429,7 +1424,6 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) {
     //  f0: b0:  4    4   b1:   15    13
     //  f1: b0:  10  11   b1:   21    23
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -2006,8 +2000,6 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x
     //  [0,    1, -0.5,  0,  0]
     //  [0,    0,    0,  0,  0]
 
-
-
         tensor input_tensor(1, 1, 3, 3);
         auto input_prim = memory::allocate(engine, { data_types::f16, format::bfyx, input_tensor });
 
@@ -2025,7 +2017,6 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x
             FLOAT16(-1.00f), FLOAT16(-1.00f), FLOAT16(-0.50f)
             });
 
-
         network.set_input_data("input_prim", input_prim);
 
         std::vector<float> expected = {
@@ -2214,11 +2205,7 @@ public:
         {
             delete generic_params;
         }
-
-        for (auto layer_params : all_layer_params)
-        {
-            delete layer_params;
-        }
+        all_layer_params.clear();
     }
 
     static tensor generate_input_offset(int x, int y, const tensor& window_size)
@@ -2226,7 +2213,7 @@ public:
         return tensor(0, 0, -std::min(x, window_size.spatial[0] - 1), -std::min(y, window_size.spatial[1] - 1));
     }
 
-    static std::vector<cldnn::primitive*> generate_specific_test_params()
+    static std::vector<std::shared_ptr<cldnn::primitive>> generate_specific_test_params()
     {
         std::vector<pooling_mode> pooling_modes = { pooling_mode::max, pooling_mode::average, pooling_mode::average_no_padding };
 
@@ -2241,23 +2228,23 @@ public:
                 for (auto stride : strides)
                 {
                     // No padding
-                    all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode, size, stride));
-                    all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_input_offset(4, 3, size)));
+                    all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride));
+                    all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_input_offset(4, 3, size)));
 
                     // Input padding
-                    all_layer_params.push_back(new pooling("pooling", "reorder0", pooling_mode, size, stride));
+                    all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride));
 
                     // Output padding
-                    all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_input_offset(2, 3, size), { { 0, 0, 1, 5 },{ 0, 0, 19, 4 } }));
+                    all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_input_offset(2, 3, size), { { 0, 0, 1, 5 },{ 0, 0, 19, 4 } }));
 
                     // Input + output padding
-                    all_layer_params.push_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_input_offset(2, 3, size), { { 0, 0, 2, 1 },{ 0, 0, 3, 4 } }));
+                    all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_input_offset(2, 3, size), { { 0, 0, 2, 1 },{ 0, 0, 3, 4 } }));
                 }
             }
         }
 
         // This case tests the pooling_gpu_bfyx_average_opt kernel.
-        all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode::average, tensor(1, 1, 3, 3), tensor(1, 1, 1, 1), generate_input_offset(1, 1, tensor(1, 1, 3, 3))));
+        all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode::average, tensor(1, 1, 3, 3), tensor(1, 1, 1, 1), generate_input_offset(1, 1, tensor(1, 1, 3, 3))));
 
         return all_layer_params;
     }
@@ -2301,7 +2288,7 @@ public:
 
     virtual cldnn::tensor get_expected_output_tensor()
     {
-        const cldnn::pooling* pooling = (cldnn::pooling*)layer_params;
+        auto pooling = std::static_pointer_cast<cldnn::pooling>(layer_params);
 
         int batch = generic_params->input_layouts[0].size.batch[0];
         int feature = generic_params->input_layouts[0].size.feature[0];
@@ -2336,15 +2323,13 @@ public:
     template<typename Type>
     memory generate_reference_typed(const std::vector<cldnn::memory>& inputs)
     {
-        const cldnn::pooling* pooling = (cldnn::pooling*)layer_params;
+        auto pooling = std::static_pointer_cast<cldnn::pooling>(layer_params);
 
         int batch = inputs[0].get_layout().size.batch[0];
         int feature = inputs[0].get_layout().size.feature[0];
         int height = inputs[0].get_layout().size.spatial[1];
         int width = inputs[0].get_layout().size.spatial[0];
 
-
-
         cldnn::pooling_mode pooling_mode = pooling->mode;
 
         int input_offset_width = pooling->input_offset.spatial[0];
@@ -2518,11 +2503,11 @@ public:
 private:
 
     static std::vector<tests::test_params*> all_generic_params;
-    static std::vector<cldnn::primitive*> all_layer_params;
+    static std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
 
 };
 
-std::vector<cldnn::primitive*> pooling_test::all_layer_params = {};
+std::vector<std::shared_ptr<cldnn::primitive>> pooling_test::all_layer_params = {};
 std::vector<tests::test_params*> pooling_test::all_generic_params = {};
 
 TEST_P(pooling_test, POOLING)
index fee9d7f..8d6f1e9 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/concatenation.hpp>
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/reshape.hpp>
+#include <api/concatenation.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
+#include <api/reshape.hpp>
 
 using namespace cldnn;
 using namespace tests;
index c1b818d..2e40ec0 100644 (file)
 #include <fstream>
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/proposal.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include <api/proposal.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 #include "test_utils/float16.h"
 
index 2f2e083..3c138f3 100644 (file)
@@ -25,7 +25,6 @@
 // post nms topn: 150 -> 25
 // !!!!!!!!
 
-
 float cls_scores_data[] = {
     0.999760f, 0.997614f, 0.999854f, 0.996280f, 0.994689f, 0.999543f, 0.999865f, // 0
     0.999969f, 0.999885f, 0.999879f, 0.999758f, 0.999719f, 0.999626f, 0.999386f, // 7
@@ -859,7 +858,6 @@ float cls_scores_data[] = {
 
 size_t cls_scores_data_size = sizeof(cls_scores_data) / sizeof(cls_scores_data[0]);
 
-
 float bbox_pred_data[] = {
     0.006756f, 0.062491f, 0.113831f, 0.063944f, 0.024297f, 0.009997f, -0.043972f, // 0
     -0.051204f, -0.036587f, -0.048956f, -0.021944f, -0.011054f, -0.023826f, -0.003094f, // 7
@@ -2519,7 +2517,6 @@ float bbox_pred_data[] = {
     -0.032304f, -0.061007f, 0.021732f, 0.020398f, -0.115368f, -0.094854f, -0.119841f, // 11585
 };
 
-
 size_t bbox_pred_data_size = sizeof(bbox_pred_data) / sizeof(bbox_pred_data[0]);
 
 float proposal_ref[] = {
index db7a9d2..85cb3af 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/pyramid_roi_align.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/mutable_data.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/pyramid_roi_align.hpp>
+#include <api/memory.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/mutable_data.hpp>
 
 #include "test_utils/test_utils.h"
 
-
 using namespace cldnn;
 using namespace tests;
 
index 2cce341..b384a32 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/engine.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/quantize.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/engine.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/quantize.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include "test_utils/test_utils.h"
 
 #include <cstddef>
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 #include <src/include/to_string_utils.h>
 
-
 using namespace cldnn;
 using namespace ::tests;
 
-
 TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) {
     const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 16, 2, 2}});
@@ -69,29 +67,29 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) {
                              4.0f, 5.0f, 6.0f, 7.0f,
                              7.0f, 6.0f, 5.0f, 4.0f,
                              3.0f, 2.0f, 1.0f, 0.0f });
-    set_values(output_low,  { 0.0f });
-    set_values(output_high, { 1.0f });
+    set_values(output_low,  { -1.0f });
+    set_values(output_high, {  1.0f });
 
     // 0 1 1 0  0 0 0 0  0 0 0 0  0 1 1 1
     // 1 1 1 1  0 1 0 0  0 0 1 1  0 1 1 1
     // 1 1 1 0  0 0 0 0  0 0 0 0  0 1 0 1
     // 1 1 1 0  0 0 0 0  0 0 0 0  0 1 0 1
-    std::vector<float> ref_data = { 0, 1, 1, 1,
-                                    1, 1, 1, 1,
-                                    1, 1, 1, 1,
-                                    0, 1, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 0, 0, 0,
-                                    1, 1, 1, 1,
-                                    1, 1, 0, 0,
-                                    1, 1, 1, 1 };
+    std::vector<float> ref_data = { -1,  1,  1,  1,
+                                     1,  1,  1,  1,
+                                     1,  1,  1,  1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                     1,  1,  1,  1,
+                                     1,  1, -1, -1,
+                                     1,  1,  1,  1 };
 
     topology topology;
     topology.add(
@@ -99,9 +97,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) {
         data("input_low", input_low),
         data("input_high", input_high),
         data("output_low", output_low),
-        data("output_high", output_high)
-    );
-    topology.add(
+        data("output_high", output_high),
         quantize("quantize", "input", "input_low", "input_high", "output_low", "output_high", 2)
     );
 
@@ -123,6 +119,136 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) {
     }
 }
 
+TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) {
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 8, 2, 2}});
+    auto input_thresh = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 8, 1, 1 } });
+    auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } });
+    auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f,
+                         5.0f, 2.0f, 2.0f, 3.0f,
+                         4.0f, 6.0f, 3.0f, 3.0f,
+                         3.0f, 5.0f, 1.0f, 1.0f,
+
+                         1.0f, 1.0f, 1.0f, 1.0f,
+                         4.0f, 6.0f, 3.0f, 3.0f,
+                         3.0f, 5.0f, 1.0f, 1.0f,
+                         1.0f, 1.0f, 1.0f, 1.0f });
+
+    set_values(input_thresh,  { 0.0f, 1.0f, 2.0f, 3.0f,
+                                4.0f, 5.0f, 6.0f, 7.0f });
+
+    set_values(output_low,  { -1.0f });
+    set_values(output_high, {  1.0f });
+
+    // 0 1 1 0  0 0 0 0  0 0 0 0  0 1 1 1
+    // 1 1 1 1  0 1 0 0  0 0 1 1  0 1 1 1
+    // 1 1 1 0  0 0 0 0  0 0 0 0  0 1 0 1
+    // 1 1 1 0  0 0 0 0  0 0 0 0  0 1 0 1
+    std::vector<float> ref_data = { -1,  1,  1,  1,
+                                     1,  1,  1,  1,
+                                     1,  1,  1,  1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1, -1, -1, -1 };
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout()),
+        data("input_low", input_thresh),
+        data("input_high", input_thresh),
+        data("output_low", output_low),
+        data("output_high", output_high),
+        quantize("quantize", "input", "input_low", "input_high", "output_low", "output_high", 2)
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("quantize").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    // Check that layout and memory contains logical size of tensor
+    ASSERT_EQ(output.count(), (size_t)32);
+    ASSERT_EQ(output.get_layout().count(), (size_t)32);
+
+    ASSERT_EQ(output.size(), ref_data.size() * sizeof(uint32_t));
+
+    for (size_t i = 0; i < ref_data.size(); ++i) {
+        EXPECT_EQ(output_ptr[i], ref_data[i]) << " index = " << i;
+    }
+}
+
+TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) {
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 8, 2, 2}});
+    auto input_thresh = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 8, 1, 1 } });
+    auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } });
+    auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f,
+                         5.0f, 2.0f, 2.0f, 3.0f,
+                         4.0f, 6.0f, 3.0f, 3.0f,
+                         3.0f, 5.0f, 1.0f, 1.0f,
+
+                         1.0f, 1.0f, 1.0f, 1.0f,
+                         4.0f, 6.0f, 3.0f, 3.0f,
+                         3.0f, 5.0f, 1.0f, 1.0f,
+                         1.0f, 1.0f, 1.0f, 1.0f });
+
+    set_values(input_thresh,  { 0.0f, 1.0f, 2.0f, 3.0f,
+                                4.0f, 5.0f, 6.0f, 7.0f });
+    set_values(output_low,  { -1.0f });
+    set_values(output_high, {  1.0f });
+
+    // 0 1 1 0  0 0 0 0  0 0 0 0  0 1 1 1
+    // 1 1 1 1  0 1 0 0  0 0 1 1  0 1 1 1
+    // 1 1 1 0  0 0 0 0  0 0 0 0  0 1 0 1
+    // 1 1 1 0  0 0 0 0  0 0 0 0  0 1 0 1
+    std::vector<float> ref_data = { -1,  1,  1,  1,
+                                     1,  1,  1,  1,
+                                     1,  1,  1,  1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1, -1, -1, -1 };
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout()),
+        data("input_low", input_thresh),
+        data("input_high", input_thresh),
+        data("output_low", output_low),
+        data("output_high", output_high),
+        quantize("quantize", "input", "input_low", "input_high", "output_low", "output_high", 2),
+        reorder("reorder", "quantize", layout{data_types::f32, format::bfyx, tensor{1,8,2,2}})
+    );
+
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+    network network(engine, topology, bo);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reorder").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    // Check that layout and memory contains logical size of tensor
+    ASSERT_EQ(output.count(), (size_t)32);
+    ASSERT_EQ(output.get_layout().count(), (size_t)32);
+
+    ASSERT_EQ(output.size(), ref_data.size() * sizeof(uint32_t));
+
+    for (size_t i = 0; i < ref_data.size(); ++i) {
+        EXPECT_EQ(output_ptr[i], ref_data[i]) << " index = " << i;
+    }
+}
+
 TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) {
     const cldnn::engine& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 16, 2, 2}});
@@ -153,25 +279,25 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) {
 
     set_values(input_low,  { 4.0f });
     set_values(input_high, { 4.0f });
-    set_values(output_low,  { 0.0f });
-    set_values(output_high, { 1.0f });
-
-    std::vector<float> ref_data = { 0, 0, 0, 0,
-                                    1, 0, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 0, 0, 0,
-                                    1, 0, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 1, 0, 0,
-                                    0, 0, 0, 0 };
+    set_values(output_low,  { -1.0f });
+    set_values(output_high, {  1.0f });
+
+    std::vector<float> ref_data = { -1, -1, -1, -1,
+                                     1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1, -1, -1, -1,
+                                     1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1,  1, -1, -1,
+                                    -1, -1, -1, -1 };
 
     topology topology;
     topology.add(
@@ -179,9 +305,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) {
         data("input_low", input_low),
         data("input_high", input_high),
         data("output_low", output_low),
-        data("output_high", output_high)
-    );
-    topology.add(
+        data("output_high", output_high),
         quantize("quantize", "input", "input_low", "input_high", "output_low", "output_high", 2)
     );
 
@@ -270,9 +394,7 @@ TEST(quantize_gpu, quantize_levels_3) {
         data("input_low", input_low),
         data("input_high", input_high),
         data("output_low", output_low),
-        data("output_high", output_high)
-    );
-    topology.add(
+        data("output_high", output_high),
         quantize("quantize", "input", "input_low", "input_high", "output_low", "output_high", 3)
     );
 
index d8e77cd..0b65ef1 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/reduce.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include <api/input_layout.hpp>
+#include "api/reduce.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 #include "test_utils/float16.h"
 
 using namespace cldnn;
index 2cf9b3e..c6c2577 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 
-#include <api/CPP/data.hpp>
-#include <api/CPP/reshape.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/shuffle_channels.hpp>
-#include <api/CPP/strided_slice.hpp>
+#include <api/data.hpp>
+#include <api/reshape.hpp>
+#include <api/input_layout.hpp>
+#include <api/shuffle_channels.hpp>
+#include <api/strided_slice.hpp>
 
 #include "test_utils/test_utils.h"
 
@@ -141,7 +141,7 @@ TEST(removing_output_node, output_node_optimization) {
     topology.add(input_layout("input", input.get_layout()));
     topology.add(data("weights", weights));
     topology.add(convolution("conv", "input", { "weights" }, { 1,1,1,2 }));
-    topology.add(activation("relu", "conv", activation_relu));
+    topology.add(activation("relu", "conv", activation_func::relu));
 
     network network(engine, topology);
     network.set_input_data("input", input);
index bd88346..1b07127 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/reorder.hpp"
-#include "api/CPP/crop.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/reshape.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/reorder.hpp"
+#include "api/crop.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/reshape.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/data.hpp>
+#include <api/data.hpp>
 
 #include <cmath>
 #include <gmock/gmock.h>
@@ -560,7 +560,6 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) {
     }
 }
 
-
 TEST(reorder_gpu, basic_convert_int8) {
 
     const auto& engine = get_test_engine();
@@ -593,7 +592,7 @@ TEST(reorder_gpu, basic_convert_int8) {
         engine,
         topology,
         build_options{
-            build_option::outputs({ "reorder2"})
+            build_option::outputs({ "reorder_input", "reorder2"})
         });
 
     network.set_input_data("input", input_memory);
@@ -772,7 +771,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfyx_input_padding)
 
     topology topology(
         input_layout("input", input.get_layout()),
-        reorder("reorder", "input", input.get_layout().format, input.get_layout().data_type, "", cldnn_reorder_mean_mode::mean_subtract, padding{ { 0, 0, 1, 2 }, 0 }),
+        reorder("reorder", "input", input.get_layout().format, input.get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 1, 2 }, 0 }),
         reorder("reorder2", "reorder", output_layout));
 
     network network(engine, topology);
@@ -851,7 +850,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_yxfb_input_padding)
 
     topology topology(
         input_layout("input", input.get_layout()),
-        reorder("reorder", "input", input.get_layout().format, input.get_layout().data_type, "", cldnn_reorder_mean_mode::mean_subtract, padding{ { 0, 0, 2, 1 }, 0 }),
+        reorder("reorder", "input", input.get_layout().format, input.get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 2, 1 }, 0 }),
         reorder("reorder2", "reorder", output_layout));
 
     network network(engine, topology);
@@ -1137,7 +1136,7 @@ TEST(reorder_gpu_opt, remove_redundant_activation_fuse)
     topology tpl{
         input_layout("in", in.get_layout()),
         reorder("r1", "in", format::bfyx, data_types::f32),
-        activation("relu", "r1", cldnn_activation_func::activation_relu_negative_slope, {0.01f, 0.0f}),
+        activation("relu", "r1", activation_func::relu_negative_slope, {0.01f, 0.0f}),
         data("scale_data", scale_mem),
         scale("output", "relu", "scale_data")
     };
@@ -1266,7 +1265,6 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant)
     EXPECT_TRUE(outputs.at("r1").get_memory().get_layout().format == format::bfyx);
 }
 
-
 TEST(reorder_gpu_opt, mean_mul)
 {
     engine eng;
@@ -1284,7 +1282,7 @@ TEST(reorder_gpu_opt, mean_mul)
     topology tpl{
         input_layout("in", in.get_layout()),
         data("mul",mul),
-        reorder("r1", "in", format::bfyx, data_types::f32,"mul", cldnn_reorder_mean_mode::mean_mul)
+        reorder("r1", "in", format::bfyx, data_types::f32,"mul", reorder_mean_mode::mul)
     };
 
     float answers[] = { 0.5f, 5.0f, -15.0f, 17.2f, 6.0f, -21.0f };
@@ -1302,7 +1300,6 @@ TEST(reorder_gpu_opt, mean_mul)
 
 }
 
-
 TEST(reorder_gpu_opt, mean_div)
 {
     engine eng;
@@ -1320,7 +1317,7 @@ TEST(reorder_gpu_opt, mean_div)
     topology tpl{
         input_layout("in", in.get_layout()),
         data("mul",mul),
-        reorder("r1", "in", format::bfyx, data_types::f32,"mul", cldnn_reorder_mean_mode::mean_div)
+        reorder("r1", "in", format::bfyx, data_types::f32,"mul", reorder_mean_mode::div)
     };
 
     float answers[] = { 2.0f, 1.0f, -1.0f, 0.5f, 4.0f, -2.0f };
@@ -1338,7 +1335,6 @@ TEST(reorder_gpu_opt, mean_div)
 
 }
 
-
 TEST(reorder_gpu_opt, mean_mul_val)
 {
     engine eng;
@@ -1352,7 +1348,7 @@ TEST(reorder_gpu_opt, mean_mul_val)
     std::vector<float> mul_val = { 2.0f, 0.5f, 10.0f };
     topology tpl{
         input_layout("in", in.get_layout()),
-        reorder("r1", "in", format::bfyx, data_types::f32, mul_val, cldnn_reorder_mean_mode::mean_mul)
+        reorder("r1", "in", format::bfyx, data_types::f32, mul_val, reorder_mean_mode::mul)
     };
 
     float answers[] = { 2.0f, 4.0f, 1.5f, 2.0f, 50.0f, 600.0f };
@@ -1369,7 +1365,6 @@ TEST(reorder_gpu_opt, mean_mul_val)
         EXPECT_FLOAT_EQ(*(a_ptr++), val);;
 }
 
-
 TEST(reorder_gpu_opt, mean_mul_val_float_to_int)
 {
     engine eng;
@@ -1383,7 +1378,7 @@ TEST(reorder_gpu_opt, mean_mul_val_float_to_int)
     std::vector<float> mul_val = { 1.4f, 0.5f, 5.0f };
     topology tpl{
         input_layout("in", in.get_layout()),
-        reorder("r1", "in", format::bfyx, data_types::i8, mul_val, cldnn_reorder_mean_mode::mean_mul)
+        reorder("r1", "in", format::bfyx, data_types::i8, mul_val, reorder_mean_mode::mul)
     };
 
     char answers[] = { 0, 2, 1, 2, 25, 127 };
@@ -1482,7 +1477,7 @@ TEST(reorder_gpu_i64, basic)
         EXPECT_EQ(*(a_ptr++), val);
 }
 
-TEST(reorder_gpu_binary, basic)
+TEST(reorder_gpu_binary, binary_output)
 {
     const auto& engine = get_test_engine();
 
@@ -1530,6 +1525,53 @@ TEST(reorder_gpu_binary, basic)
     }
 }
 
+TEST(reorder_gpu_binary, binary_input)
+{
+    const auto& engine = get_test_engine();
+
+    cldnn::build_options options;
+    options.set_option(cldnn::build_option::optimize_data(true));
+
+    auto input = memory::allocate(engine, { data_types::bin, format::b_fs_yx_32fp,{ 2, 2, 2, 2 } });
+    layout output_layout(data_types::f32, format::bfyx, { 2, 2, 2, 2 });
+
+    // Data is supposed to be quantized to {0,1} values
+    std::vector<float> answers = {
+            1.f, -1.f, 1.f, 1.f,
+            -1.f, 1.f, 1.f, -1.f,
+
+            1.f, 1.f, -1.f, 1.f,
+            -1.f, -1.f, -1.f, 1.f
+    };
+
+    set_values<int32_t>(input, { 1, 2, 3, 1,
+                                 1, 1, 0, 3 });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        reorder("reorder", "input", output_layout));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "reorder");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    // Check that layout and memory contains logical size of tensor
+    ASSERT_EQ(output.count(), input.get_layout().count());
+    ASSERT_EQ(output.get_layout().count(), input.get_layout().count());
+
+    ASSERT_EQ(output.size(), answers.size() * sizeof(float));
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]) << "index: " << i;
+    }
+}
+
 TEST(reorder_gpu_f32, bfwzyx_bfyx_chain)
 {
     // Topology:
@@ -1607,6 +1649,30 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain)
     }
 }
 
+TEST(reorder_gpu, any_format) {
+    auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, layout(data_types::f32, format::yxfb, tensor(5, 7, 13, 9)));
+
+    topology topo;
+    topo.add(input_layout("in", input.get_layout()));
+    topo.add(reorder("out", "in", format::any, data_types::f32));
+
+    network net(engine, topo);
+
+    auto data = generate_random_1d<float>(input.count(), -1, 1);
+    set_values(input, data);
+    net.set_input_data("in", input);
+
+    auto outputs = net.execute();
+    auto out_mem = outputs.at("out").get_memory();
+    auto output = out_mem.pointer<float>();
+
+    for (size_t i = 0; i < data.size(); ++i) {
+        EXPECT_EQ(output[i], data[i]) << "i = " << i;
+    }
+}
+
 using namespace cldnn;
 
 class reorder_test : public tests::generic_test
@@ -1620,15 +1686,10 @@ public:
         {
             delete generic_params;
         }
-        for (auto test_param : all_test_params)
-        {
-            auto primitive = std::get<1>(test_param);
-            delete primitive;
-        }
+        all_test_params.clear();
     }
 
-
-    static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_specific_test_params()
+    static std::vector<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>> generate_specific_test_params()
     {
         generic_test::generate_generic_test_params(all_generic_params);
 
@@ -1655,7 +1716,7 @@ public:
             for (const auto& output_layout : output_layouts)
             {
                 //TODO: check input + output padding.
-                all_test_params.push_back(std::make_tuple(test_param, new reorder("reorder", "input0", output_layout, subtract)));
+                all_test_params.emplace_back(std::make_tuple(test_param, std::make_shared<reorder>("reorder", "input0", output_layout, subtract)));
 
             }
         }
@@ -1675,7 +1736,7 @@ public:
     template<typename InputType, typename OutputType>
     memory generate_reference_typed(const std::vector<cldnn::memory>& inputs)
     {
-        const cldnn::reorder* reorder = (cldnn::reorder*)layer_params;
+        auto reorder = std::static_pointer_cast<cldnn::reorder>(layer_params);
         primitive_id mean = reorder->mean;
         std::vector<float> subtract_per_feature = reorder->subtract_per_feature;
         assert(mean == "");
@@ -1725,12 +1786,12 @@ public:
 private:
 
     static std::vector<tests::test_params*> all_generic_params;
-    static std::vector<std::tuple<test_params*, cldnn::primitive*>> all_test_params;
+    static std::vector<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>> all_test_params;
 
 };
 
 std::vector<tests::test_params*> reorder_test::all_generic_params = {};
-std::vector<std::tuple<test_params*, cldnn::primitive*>> reorder_test::all_test_params = {};
+std::vector<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>> reorder_test::all_test_params = {};
 
 TEST_P(reorder_test, REORDER)
 {
index 1538e53..d39a5b9 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 
-#include <api/CPP/data.hpp>
-#include <api/CPP/reshape.hpp>
-#include <api/CPP/input_layout.hpp>
+#include <api/data.hpp>
+#include <api/reshape.hpp>
+#include <api/input_layout.hpp>
 
 #include "test_utils/test_utils.h"
 
@@ -458,11 +458,11 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) {
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(activation("relu", "input", activation_relu));
+    topology.add(activation("relu", "input", activation_func::relu));
     topology.add(reshape("reshape", "relu", tensor(batch(4))));
     topology.add(reorder("reorder1", "reshape", format::yxfb, data_types::f32));
-    topology.add(activation("relu1", "reorder1", activation_relu));
-    topology.add(activation("relu2", "reshape", activation_relu));
+    topology.add(activation("relu1", "reorder1", activation_func::relu));
+    topology.add(activation("relu2", "reshape", activation_func::relu));
 
     std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
     std::vector<float> out1 = {0.f, 0.f, 2.f, 4.0f};
index 64d8b24..ddb5edf 100644 (file)
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/reverse_sequence.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/reverse_sequence.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include <cstddef>
 #include <tests/test_utils/test_utils.h>
index 78908b1..d1cc8ff 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/scale.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/scale.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include "api/CPP/reorder.hpp"
+#include "api/reorder.hpp"
 
 #include <iostream>
 
@@ -1193,7 +1193,6 @@ TEST(scale_gpu, basic_in2x2x2x3x2_scale_same_size_bfzyx) {
     //  Input  : 2x2x2x3x2
     //  Output : 2x2x2x3x2
 
-
     const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 2, 3, 2 } });
@@ -1507,14 +1506,14 @@ public:
     }
 
     //TODO: use an enum instead of int i
-    static std::vector<cldnn::primitive*> generate_specific_test_params(int variant)
+    static std::vector<std::shared_ptr<cldnn::primitive>> generate_specific_test_params(int variant)
     {
-        std::vector<cldnn::primitive*> all_layer_params;
+        std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
 
         switch(variant)
         {
-            case 0: all_layer_params.push_back(new scale("scale", "input0", "input1")); break;
-            case 1: all_layer_params.push_back(new scale("scale", "input0", "input1", "input2")); break;
+            case 0: all_layer_params.emplace_back(new scale("scale", "input0", "input1")); break;
+            case 1: all_layer_params.emplace_back(new scale("scale", "input0", "input1", "input2")); break;
                     //    case 3: all_layer_params.push_back(new scale("scale", "input0", "input1", true));    // This case should be checked by negative_scale_test
                     //    case 4: all_layer_params.push_back(new scale("scale", "input0", "input1", false));    // This case should be checked by negative_scale_test
             default: assert(0);
@@ -1570,9 +1569,9 @@ public:
         return all_generic_params;
     }
 
-    static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_all_test_params()
+    static std::vector<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>> generate_all_test_params()
     {
-        std::vector<std::tuple<test_params*, cldnn::primitive*>> res;
+        std::vector<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>> res;
 
         for (int variant = 0; variant <= 1; ++variant)
         {
@@ -1702,7 +1701,7 @@ public:
         }
     }
 
-    static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
+    static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>>& info)
     {
         std::stringstream res;
 
@@ -1732,10 +1731,10 @@ public:
 
 private:
     static std::vector<std::unique_ptr<tests::test_params>> all_generic_params;
-    static std::vector<std::unique_ptr<cldnn::primitive>> all_layer_params;
+    static std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
 };
 
-std::vector<std::unique_ptr<cldnn::primitive>> scale_test::all_layer_params = {};
+std::vector<std::shared_ptr<cldnn::primitive>> scale_test::all_layer_params = {};
 std::vector<std::unique_ptr<tests::test_params>> scale_test::all_generic_params = {};
 
 TEST_P(scale_test, SCALE)
index c7057d5..c365ef5 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/scale_grad_input.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/scale_grad_input.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 #include <iostream>
index 680c68e..0c4521d 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/data.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/mutable_data.hpp>
-#include "api/CPP/scale_grad_weights.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/data.hpp>
+#include <api/input_layout.hpp>
+#include <api/mutable_data.hpp>
+#include "api/scale_grad_weights.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 #include <iostream>
index abd2cff..d57a66a 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/select.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/select.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
index 630b3b8..f78548f 100644 (file)
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
 
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/shuffle_channels.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/shuffle_channels.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
 
 #include <cstddef>
 #include <tests/test_utils/test_utils.h>
@@ -71,7 +69,6 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_ax1_g5) {
     }
 }
 
-
 TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) {
     engine engine;
 
index 850137c..3e6b58d 100644 (file)
 */
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/softmax.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/softmax.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
 using namespace std;
 using namespace tests;
 
-
 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
 public:
     static const int32_t
@@ -36,7 +35,6 @@ public:
         in_size   = input_x*input_b,
         out_size  = output_x*output_b;
 
-
     float in_buffer[in_size];
     float out_buffer[out_size];
     float expected_buffer[out_size];
@@ -294,7 +292,6 @@ TEST(softmax_gpu_bfyx_f32, normalize_y) {
         0.999962831f,   //b=0, f=2, x=0
         0.993307149f,   //b=0, f=2, x=1
 
-
         0.98201379f,    //b=1, f=0, x=0
         0.99998987f,    //b=1, f=0, x=1
 
@@ -878,15 +875,12 @@ public:
             delete generic_params;
         }
 
-        for (auto layer_params : all_layer_params)
-        {
-            delete layer_params;
-        }
+        all_layer_params.clear();
     }
 
-    static std::vector<cldnn::primitive*> generate_specific_test_params()
+    static std::vector<std::shared_ptr<cldnn::primitive>> generate_specific_test_params()
     {
-        all_layer_params.push_back(new softmax("softmax", "input0", softmax::normalize_f));
+        all_layer_params.emplace_back(new softmax("softmax", "input0", softmax::normalize_f));
 
         //The test checks only valid combinations.
         //TODO: add more combinations.
@@ -986,7 +980,7 @@ public:
         }
     }
 
-    static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
+    static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>>& info)
     {
         std::stringstream res;
 
@@ -1015,11 +1009,11 @@ public:
 private:
 
     static std::vector<tests::test_params*> all_generic_params;
-    static std::vector<cldnn::primitive*> all_layer_params;
+    static std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
 
 };
 
-std::vector<cldnn::primitive*> softmax_test::all_layer_params = {};
+std::vector<std::shared_ptr<cldnn::primitive>> softmax_test::all_layer_params = {};
 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
 
 TEST_P(softmax_test, SOFTMAX)
index 302ca0b..9ed37dd 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/softmax_loss_grad.hpp"
-#include <api/CPP/data.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/softmax_loss_grad.hpp"
+#include <api/data.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
index 3c84efa..861b3d2 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/concatenation.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/concatenation.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
@@ -281,7 +281,6 @@ TEST(spatial_concatenate_f32_gpu, inputs_3) {
     tpl.add(input_layout("in3", input3.get_layout()));
     tpl.add(concatenation("conc", { "in1", "in2", "in3" }, concatenation::along_x));
 
-
     network net(eng, tpl);
     net.set_input_data("in1", input1);
     net.set_input_data("in2", input2);
index 921c382..b4ba3e1 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/split.hpp"
-#include "api/CPP/scale.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/reorder.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/split.hpp"
+#include "api/scale.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/reorder.hpp>
 #include "test_utils/test_utils.h"
 
 #include <sstream>
@@ -113,7 +113,7 @@ void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vec
         }
 
         // For all the other dimensions, copy from the split_input
-        for (int dimension = 0; dimension < CLDNN_TENSOR_DIM_MAX; dimension++)
+        for (int dimension = 0; dimension < cldnn::tensor_dim_max; dimension++)
         {
             size.raw[dimension]
                 = (size.raw[dimension] == 0) ? reference_input_size.raw[dimension] : size.raw[dimension];
@@ -198,7 +198,6 @@ TEST(split_gpu, split_1d_uneven_2_splits) {
     split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
 }
 
-
 TEST(split_gpu, basic_split_concat_optimization) {
 
     const auto& engine = get_test_engine();
index b397c09..827978a 100644 (file)
 */
 
 #include <gtest/gtest.h>
-#include <api/CPP/tensor.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/memory.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/data.hpp>
+#include <api/tensor.hpp>
+#include <api/engine.hpp>
+#include <api/memory.hpp>
+#include <api/input_layout.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
@@ -41,7 +41,6 @@ static engine _engine(engine_configuration(false,
 TEST(gpu_streams, can_allocate_memory_for_stream)
 {
 
-
     ASSERT_NO_THROW(memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4})));
     ASSERT_NO_THROW(memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}), 0));
     ASSERT_NO_THROW(memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}), 1));
@@ -69,7 +68,7 @@ TEST(gpu_streams, can_create_networks_for_stream)
 
     topology topology(
             input_layout("input", input.get_layout()),
-            activation("relu", "input", activation_relu_negative_slope, cldnn_activation_additional_params{ 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
+            activation("relu", "input", activation_func::relu_negative_slope, activation_additional_params{ 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 }));
     network network(_engine, topology, build_options(), 1);
 
     ASSERT_ANY_THROW(cldnn::network network(_engine, topology, build_options(), 2));
@@ -96,7 +95,6 @@ TEST(gpu_streams, can_create_networks_for_stream)
     EXPECT_EQ(f_size, 1);
     EXPECT_EQ(b_size, 1);
 
-
     for (size_t i = 0; i < output_vec.size(); ++i) {
         EXPECT_FLOAT_EQ(output_vec[i], output_ptr[i]);
     }
index c673071..b5d7e18 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/strided_slice.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include <api/input_layout.hpp>
+#include "api/strided_slice.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/data.hpp>
-
+#include <api/data.hpp>
 
 using namespace cldnn;
 using namespace tests;
 
-
 TEST(strided_slice_gpu_f32, test_2x2x2x2) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 0x0x0x0
index 3c48962..7b3393f 100644 (file)
@@ -15,7 +15,7 @@
 */
 
 #include <gtest/gtest.h>
-#include <api/CPP/tensor.hpp>
+#include <api/tensor.hpp>
 
 TEST(tensor_api, order_new_notation)
 {
@@ -24,7 +24,7 @@ TEST(tensor_api, order_new_notation)
     //sizes
     EXPECT_EQ(test.batch.size(), size_t(1));
     EXPECT_EQ(test.feature.size(), size_t(1));
-    EXPECT_EQ(test.spatial.size(), size_t(CLDNN_TENSOR_SPATIAL_DIM_MAX));
+    EXPECT_EQ(test.spatial.size(), size_t(cldnn::tensor_spatial_dim_max));
 
     //passed values
     EXPECT_EQ(test.spatial[0], cldnn::tensor::value_type(2));
@@ -47,7 +47,7 @@ TEST(tensor_api, order_new_notation_feature_default)
     //sizes
     EXPECT_EQ(test.batch.size(), size_t(1));
     EXPECT_EQ(test.feature.size(), size_t(1));
-    EXPECT_EQ(test.spatial.size(), size_t(CLDNN_TENSOR_SPATIAL_DIM_MAX));
+    EXPECT_EQ(test.spatial.size(), size_t(cldnn::tensor_spatial_dim_max));
 
     //passed values
     EXPECT_EQ(test.spatial[0], cldnn::tensor::value_type(2));
@@ -70,7 +70,7 @@ TEST(tensor_api, order)
     //sizes
     EXPECT_EQ(test.batch.size(), size_t(1));
     EXPECT_EQ(test.feature.size(), size_t(1));
-    EXPECT_EQ(test.spatial.size(), size_t(CLDNN_TENSOR_SPATIAL_DIM_MAX));
+    EXPECT_EQ(test.spatial.size(), size_t(cldnn::tensor_spatial_dim_max));
 
     //passed values
     EXPECT_EQ(test.spatial[1], cldnn::tensor::value_type(4));
index d3c55d6..897df97 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/tile.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/tile.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 
 #include <iostream>
@@ -87,7 +87,6 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) {
     network network(engine, topology);
     network.set_input_data("input", input);
 
-
     auto outputs = network.execute();
 
     auto output = outputs.at("tile").get_memory();
@@ -120,7 +119,6 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
     network network(engine, topology);
     network.set_input_data("input", input);
 
-
     auto outputs = network.execute();
 
     auto output = outputs.at("tile").get_memory();
@@ -153,7 +151,6 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_y) {
     network network(engine, topology);
     network.set_input_data("input", input);
 
-
     auto outputs = network.execute();
 
     auto output = outputs.at("tile").get_memory();
@@ -251,7 +248,6 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
     network network(engine, topology);
     network.set_input_data("input", input);
 
-
     auto outputs = network.execute();
 
     auto output = outputs.at("tile").get_memory();
index 3491933..277b93a 100644 (file)
@@ -1,22 +1,22 @@
 #include <gtest/gtest.h>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
 #include <include/topology_impl.h>
 #include <iostream>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/lrn.hpp>
-#include <api/CPP/convolution.hpp>
-#include <api/CPP/fully_connected.hpp>
-#include <api/CPP/pooling.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/scale.hpp>
-#include <api/CPP/eltwise.hpp>
-#include <api/CPP/softmax.hpp>
-#include <api/CPP/activation.hpp>
-#include <api/CPP/concatenation.hpp>
+#include "api/memory.hpp"
+#include <api/lrn.hpp>
+#include <api/convolution.hpp>
+#include <api/fully_connected.hpp>
+#include <api/pooling.hpp>
+#include <api/data.hpp>
+#include <api/reorder.hpp>
+#include <api/scale.hpp>
+#include <api/eltwise.hpp>
+#include <api/softmax.hpp>
+#include <api/activation.hpp>
+#include <api/concatenation.hpp>
 #include <deque>
 #include <set>
 
@@ -167,7 +167,7 @@ protected:
                 float k = 1.0f;
                 float alpha = 0.0001f;
                 float beta = 0.75f;
-                cldnn_lrn_norm_region norm_type = cldnn_lrn_norm_region_across_channel;
+                cldnn::lrn_norm_region norm_type = cldnn::lrn_norm_region_across_channel;
                 topology.add(cldnn::lrn(id, input_id, size, k, alpha, beta, norm_type));
                 return true;
             }
@@ -240,7 +240,7 @@ protected:
                 // todo: randomize params
                 cldnn::primitive_id input_id = topology_generator::CreateLayerId();
                 input_layouts.push_back({ input_id, output_layout });
-                topology.add(cldnn::activation(id, input_id, activation_relu));
+                topology.add(cldnn::activation(id, input_id, cldnn::activation_func::relu));
                 return true;
             }
         };
index 428881f..99ca881 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/concatenation.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
-#include <api/CPP/data.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/concatenation.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
+#include <api/data.hpp>
 #include "test_utils/test_utils.h"
 
 using namespace cldnn;
@@ -33,7 +33,6 @@ using namespace tests;
     This set of tests has been designed to check the correctness of trim_to_outputs optimization pass
 */
 
-
 /*
    In this test we check if the convolution conv2 will be eliminated from the network. This is expected to be done in trim_to_outputs optimization pass
 
index 8adf590..c63fa4a 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include <gtest/gtest.h>
-#include "api/CPP/memory.hpp"
-#include <api/CPP/input_layout.hpp>
-#include "api/CPP/upsampling.hpp"
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/input_layout.hpp>
+#include "api/upsampling.hpp"
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils/test_utils.h"
-#include <api/CPP/reorder.hpp>
-#include <api/CPP/data.hpp>
+#include <api/reorder.hpp>
+#include <api/data.hpp>
 
 using namespace cldnn;
 using namespace tests;
@@ -45,11 +45,11 @@ TEST(upsampling_gpu, basic_in2x3x2x2_nearest) {
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
 
-    uint32_t scale = 2;
+    auto output_size = tensor(batch(2), feature(2), spatial(6, 4));
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(upsampling("upsampling", "input", scale, 0.0f, upsampling_sample_type::nearest));
+    topology.add(upsampling("upsampling", "input", output_size, 0.0f, upsampling_sample_type::nearest));
 
     set_values(input, {
         1.f, 2.f, -10.f,
@@ -116,11 +116,11 @@ TEST(upsampling_gpu, basic_in2x3x2x2_bilinear) {
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
 
-    uint32_t scale = 2;
+    auto output_size = tensor(batch(1), feature(1), spatial(4, 4));
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(upsampling("upsampling", "input", scale, 1.0f, upsampling_sample_type::bilinear));
+    topology.add(upsampling("upsampling", "input", output_size, 1.0f, upsampling_sample_type::bilinear));
 
     set_values(input, {
         1.f, 2.f,
@@ -151,3 +151,103 @@ TEST(upsampling_gpu, basic_in2x3x2x2_bilinear) {
         }
     }
 }
+
+TEST(upsampling_gpu, nearest_asymmetric) {
+    //  Input  : 1x1x2x2
+    //  Output : 1x1x5x4
+    //  Sample Type: Nearest
+
+    //  Input:
+    //  f0: b0:  1    2
+    //  f0: b0:  3    4
+    //
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
+
+    auto output_size = tensor(batch(1), feature(1), spatial(5, 4));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(upsampling("upsampling", "input", output_size, 1.0f, upsampling_sample_type::nearest));
+
+    set_values(input, {
+        1.f, 2.f,
+        3.f, 4.f,
+    });
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("upsampling").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    EXPECT_EQ(output.get_layout().get_linear_size(), (size_t)20);
+
+    float answers[20] = {
+        1.f, 1.f, 1.f, 2.f, 2.f,
+        1.f, 1.f, 1.f, 2.f, 2.f,
+        3.f, 3.f, 3.f, 4.f, 4.f,
+        3.f, 3.f, 3.f, 4.f, 4.f,
+    };
+
+    for (int k = 0; k < 4; ++k) { //Y
+        for (int l = 0; l < 5; ++l) { //X
+            auto linear_id = l + k * 5;
+            EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 1e-05F);
+        }
+    }
+}
+
+TEST(upsampling_gpu, bilinear_asymmetric) {
+    //  Input  : 1x1x2x2
+    //  Output : 1x1x5x4
+    //  Sample Type: Nearest
+
+    //  Input:
+    //  f0: b0:  1    2
+    //  f0: b0:  3    4
+    //
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
+
+    auto output_size = tensor(batch(1), feature(1), spatial(6, 4));
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(upsampling("upsampling", "input", output_size, 1.0f, upsampling_sample_type::bilinear));
+
+    set_values(input, {
+        1.f, 2.f,
+        3.f, 4.f,
+               });
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("upsampling").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    EXPECT_EQ(output.get_layout().get_linear_size(), (size_t)24);
+
+    float answers[24] = {
+        0.5f, 0.75f, 1.f, 1.25f, 1.5f, 1.f,
+        1.f, 1.5f, 1.83f, 2.17f, 2.5f, 1.67f,
+        1.67f, 2.5f, 2.83f, 3.17f, 3.5f, 2.33f,
+        1.5f, 2.25f, 2.5f, 2.75f, 3.f, 2.f,
+    };
+
+    for (int k = 0; k < 4; ++k) { //Y
+        for (int l = 0; l < 6; ++l) { //X
+            auto linear_id = l + k * 6;
+            EXPECT_NEAR(answers[linear_id], output_ptr[linear_id], 5e-03F) << l << " " << k;
+        }
+    }
+}
index 8288a38..45eb159 100644 (file)
@@ -17,7 +17,6 @@
 #pragma once
 #include "include/math_utils.h"
 
-
 struct FLOAT16
 {
     struct representation
@@ -79,7 +78,6 @@ struct FLOAT16
     }
 };
 
-
 inline FLOAT16 operator +(const FLOAT16 &v1, const FLOAT16 &v2)
 {
     return (float)v1 + (float)v2;
index 179bdbd..35cbc8c 100644 (file)
 #include <vector>
 #include <iostream>
 
-
 namespace instrumentation {
     // initalize dumping directory for whole run
     const std::string logger::dump_dir = DUMP_DIRECTORY;
 
-    static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false)
+    static float convert_half_to_float(cldnn::half_t val, bool flush_denorm_to_zero = false)
     {
 #if defined HALF_HALF_HPP
         return val;
@@ -82,7 +81,7 @@ namespace instrumentation {
         return f;
     }
 
-    float convert_element(half_t h)
+    float convert_element(cldnn::half_t h)
     {
         return convert_half_to_float(h);
     }
@@ -412,7 +411,7 @@ namespace instrumentation {
         if (mem.get_layout().data_type == cldnn::data_types::f32)
             dump<float>(mem, dump_strings);
         else
-            dump<half_t>(mem, dump_strings);
+            dump<cldnn::half_t>(mem, dump_strings);
 
         for (cldnn::tensor::value_type b = 0; b < batch; b++)
             for (cldnn::tensor::value_type f = 0; f < feature; f++)
@@ -434,7 +433,7 @@ namespace instrumentation {
         if (mem.get_layout().data_type == cldnn::data_types::f32)
             dump<float>(mem, stream);
         else
-            dump<half_t>(mem, stream);
+            dump<cldnn::half_t>(mem, stream);
 
         std::string filename((dump_dir + "/" + prefix + ".txt"));
         std::ofstream file_stream(filename);
index 85a791d..9c46782 100644 (file)
@@ -18,7 +18,7 @@
 #include <chrono>
 #include <sstream>
 #include <iomanip>
-#include "api/CPP/memory.hpp"
+#include "api/memory.hpp"
 
 #define DUMP_DIRECTORY "./"
 
index 6e23969..c7209f0 100644 (file)
@@ -66,7 +66,6 @@ namespace rnd_generators
     static_assert(number_caps<float>::inv_exp2(7) == 0.0078125f, "1/exp2(7)");
     static_assert(number_caps<float>::inv_exp2(8) == 0.00390625f, "1/exp2(8)");
 
-
     template <>
     struct number_caps<double>
     {
@@ -155,7 +154,6 @@ namespace rnd_generators
         }
     };
 
-
     template <typename NumberTy, typename RndEngineTy>
     auto gen_number(RndEngineTy& rnd_engine,
         const unsigned significand_rnd_bits = number_caps<NumberTy>::significand_bits,
index 79aa8a0..9437b72 100644 (file)
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "api/CPP/memory.hpp"
-#include <api/CPP/primitive.hpp>
-#include <api/CPP/input_layout.hpp>
-#include <api/CPP/data.hpp>
-#include <api/CPP/topology.hpp>
-#include <api/CPP/network.hpp>
-#include <api/CPP/engine.hpp>
+#include "api/memory.hpp"
+#include <api/primitive.hpp>
+#include <api/input_layout.hpp>
+#include <api/data.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/engine.hpp>
 #include "test_utils.h"
 #include "float16.h"
 #include "instrumentation.h"
@@ -45,7 +45,7 @@ namespace tests
             generic_params->network_build_options.set_option(cldnn::build_option::graph_dumps_dir(DUMP_DIRECTORY));
         }
         topology topology;               
-        topology.add(*layer_params);
+        topology.add_primitive(layer_params);
 
         std::vector<memory> input_mems;
         std::vector<std::string> input_layouts_names = {};
@@ -427,4 +427,4 @@ namespace tests
     std::vector<int32_t> generic_test::test_feature_sizes = { 1, 2 };// , 3, 15};
     std::vector<tensor> generic_test::test_input_sizes = { { 1, 1, 100, 100 } ,{ 1, 1, 277, 277 } ,{ 1, 1, 400, 600 } };
 
-}
\ No newline at end of file
+}
index f2c45db..d881cf4 100644 (file)
 
 #pragma once
 
-#include "api/CPP/memory.hpp"
-#include "api/CPP/tensor.hpp"
-#include "api/CPP/program.hpp"
-#include "api/CPP/network.hpp"
+#include "api/memory.hpp"
+#include "api/tensor.hpp"
+#include "api/program.hpp"
+#include "api/network.hpp"
 #include <iostream>
 #include <limits>
 #include <random>
 #include <algorithm>
+#include <memory>
 #include <gtest/gtest.h>
-#include <api/CPP/primitive.hpp>
+#include <api/primitive.hpp>
 #include "float16.h"
 #include "random_gen.h"
-#include "api/CPP/concatenation.hpp"
-#include "api/CPP/lrn.hpp"
-#include "api/CPP/roi_pooling.hpp"
-#include "api/CPP/scale.hpp"
-#include "api/CPP/softmax.hpp"
-#include "api/CPP/reorder.hpp"
-#include "api/CPP/normalize.hpp"
-#include "api/CPP/convolution.hpp"
-#include "api/CPP/activation.hpp"
-#include "api/CPP/pooling.hpp"
+#include "api/concatenation.hpp"
+#include "api/lrn.hpp"
+#include "api/roi_pooling.hpp"
+#include "api/scale.hpp"
+#include "api/softmax.hpp"
+#include "api/reorder.hpp"
+#include "api/normalize.hpp"
+#include "api/convolution.hpp"
+#include "api/activation.hpp"
+#include "api/pooling.hpp"
 
 #include <chrono>
 
@@ -214,7 +215,6 @@ void set_values_per_batch_and_feature(const cldnn::memory& mem, std::vector<T> a
         }
     }
 
-
 }
 
 template<typename T>
@@ -229,7 +229,6 @@ void set_random_values(const cldnn::memory& mem, bool sign = false, unsigned sig
     }
 }
 
-
 // Tries to construct a network, checking if an expected error appears
 inline void check_exception_massage(const cldnn::engine& engine, cldnn::topology& topology, std::string msg_to_find)
 {
@@ -247,7 +246,6 @@ inline void check_exception_massage(const cldnn::engine& engine, cldnn::topology
     }
 }
 
-
 // Checks equality of floats.
 // For values less than absoulte_error_limit, absolute error will be counted
 // for others, the relatve error will be counted.
@@ -305,7 +303,6 @@ inline bool floating_point_equal(float x, float y, int max_ulps_diff = 4) {
     }
 }
 
-
 class test_params 
 {
 public:
@@ -358,7 +355,7 @@ private:
     const std::string name_str = ::testing::UnitTest::GetInstance()->current_test_info()->name();
 };
 
-class generic_test : public ::testing::TestWithParam<std::tuple<test_params*, cldnn::primitive*>>
+class generic_test : public ::testing::TestWithParam<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>>
 {
 
 public:
@@ -384,7 +381,7 @@ public:
     virtual cldnn::tensor get_expected_output_tensor();
 
     struct custom_param_name_functor {
-            std::string operator()(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info) {
+            std::string operator()(const ::testing::TestParamInfo<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>>& info) {
                     return std::to_string(info.index);
             }
     };
@@ -393,7 +390,7 @@ protected:
     const cldnn::engine& engine = get_test_engine();
     test_params* generic_params;
     test_dump test_info;
-    cldnn::primitive* layer_params;
+    std::shared_ptr<cldnn::primitive> layer_params;
     int max_ulps_diff_allowed; //Max number of ulps allowed between 2 values when comparing the output buffer and the reference buffer.
     bool random_values; // if set memory buffers will be filled with random values
     bool dump_graphs; // if set tests will dump graphs to file   
@@ -414,7 +411,7 @@ protected:
 // When a test assertion such as EXPECT_EQ fails, Google-Test prints the argument values to help with debugging.
 // It does this using a user - extensible value printer.
 // This function will be used to print the test params in case of an error.
-inline void PrintTupleTo(const std::tuple<tests::test_params*, cldnn::primitive*>& t, ::std::ostream* os)
+inline void PrintTupleTo(const std::tuple<tests::test_params*, std::shared_ptr<cldnn::primitive>>& t, ::std::ostream* os)
 {
     std::stringstream str;
 
@@ -431,13 +428,13 @@ inline void PrintTupleTo(const std::tuple<tests::test_params*, cldnn::primitive*
 
     if (primitive->type == cldnn::concatenation::type_id())
     {
-        auto dc = static_cast<cldnn::concatenation*>(primitive);
+        auto dc = std::static_pointer_cast<cldnn::concatenation>(primitive);
         (void)dc;
     }
     else if(primitive->type == cldnn::lrn::type_id())
     {
-        auto lrn = static_cast<cldnn::lrn *>(primitive);
-        std::string norm_region = (lrn->norm_region == cldnn_lrn_norm_region_across_channel) ? "across channel" : "within channel";
+        auto lrn = std::static_pointer_cast<cldnn::lrn >(primitive);
+        std::string norm_region = (lrn->norm_region == cldnn::lrn_norm_region_across_channel) ? "across channel" : "within channel";
         str << "Norm region: " << norm_region
             << " Size: " << lrn->size
             << " Alpha: " << lrn->alpha
@@ -446,7 +443,7 @@ inline void PrintTupleTo(const std::tuple<tests::test_params*, cldnn::primitive*
     }
     else if(primitive->type == cldnn::roi_pooling::type_id())
     {
-        auto p = static_cast<cldnn::roi_pooling *>(primitive);
+        auto p = std::static_pointer_cast<cldnn::roi_pooling >(primitive);
         str << "Pooling mode: " << (p->mode == cldnn::pooling_mode::max ? "MAX" : "AVG")
             << " Pooled width: " << p->pooled_width
             << " Pooled height: " << p->pooled_height
@@ -457,41 +454,40 @@ inline void PrintTupleTo(const std::tuple<tests::test_params*, cldnn::primitive*
     }
     else if(primitive->type == cldnn::scale::type_id())
     {
-        auto s = static_cast<cldnn::scale *>(primitive);
+        auto s = std::static_pointer_cast<cldnn::scale >(primitive);
         (void)s;
     }
     else if(primitive->type == cldnn::softmax::type_id())
     {
-        auto sm = static_cast<cldnn::softmax *>(primitive);
+        auto sm = std::static_pointer_cast<cldnn::softmax >(primitive);
         (void)sm;
     }
     else if (primitive->type == cldnn::reorder::type_id())
     {
-        auto reorder = static_cast<cldnn::reorder*>(primitive);
+        auto reorder = std::static_pointer_cast<cldnn::reorder>(primitive);
         str << "Output data type: " << cldnn::data_type_traits::name(*reorder->output_data_type) << " Mean: " << reorder->mean << "Subtract per feature: " << "TODO" /*std::vector<float> subtract_per_feature*/;
     }
     else if (primitive->type == cldnn::normalize::type_id())
     {
-        auto normalize = static_cast<cldnn::normalize*>(primitive);
+        auto normalize = std::static_pointer_cast<cldnn::normalize>(primitive);
         std::string norm_region = normalize->across_spatial ? "across_spatial" : "within_spatial";
         str << "Norm region: " << norm_region << " Epsilon: " << normalize->epsilon << " Scale input id: " << normalize->scale_input;
     }
     else if (primitive->type == cldnn::convolution::type_id()) 
     {
-        auto convolution = static_cast<cldnn::convolution*>(primitive);
+        auto convolution = std::static_pointer_cast<cldnn::convolution>(primitive);
         str << "Stride x: " << convolution->stride.spatial[0] << " Stride y: " << convolution->stride.spatial[1]
             << " Dilation x: " << convolution->dilation.spatial[0] << " Dilation y: " << convolution->dilation.spatial[1]
-            << " Input offset x: " << convolution->input_offset.spatial[0] << " Input offset y: " << convolution->input_offset.spatial[1]
-            << " Activation: " << convolution->with_activation << " Activation slope: " << convolution->activation_negative_slope;
+            << " Input offset x: " << convolution->input_offset.spatial[0] << " Input offset y: " << convolution->input_offset.spatial[1];
     }
     else if (primitive->type == cldnn::activation::type_id())
     {
-        auto activation = static_cast<cldnn::activation*>(primitive);
+        auto activation = std::static_pointer_cast<cldnn::activation>(primitive);
         str << "Negative slope: " << activation->additional_params.a << " Negative slope input id: " << activation->additional_params_input;
     }
     else if (primitive->type == cldnn::pooling::type_id())
     {
-        auto pooling = static_cast<cldnn::pooling*>(primitive);
+        auto pooling = std::static_pointer_cast<cldnn::pooling>(primitive);
         std::string pooling_mode = (pooling->mode == cldnn::pooling_mode::max) ? "max" : "average";
         str << "Pooling mode: " << pooling_mode
             << " Input offset x: " << pooling->input_offset.spatial[0] << " Input offset y: " << pooling->input_offset.spatial[1]
index 2c90ad2..953ea69 100644 (file)
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #pragma once
 
 #include <algorithm>
@@ -31,7 +30,6 @@
 #include <string>
 #include <utility>
 
-
 namespace cldnn { namespace tests { namespace distributions {
 
 /// @cond PRIVATE
@@ -115,7 +113,6 @@ private:
             std::numeric_limits<DataType>::has_signaling_NaN) +
         static_cast<std::size_t>(std::numeric_limits<DataType>::has_denorm != std::float_denorm_style::denorm_absent);
 
-
 public:
     /// @brief Estimated size of representation in bits.
     static constexpr std::size_t value = round_up_to_multiply(
@@ -133,7 +130,6 @@ struct estimate_repr_size_helper2_<DataType, false, true>
         std::numeric_limits<DataType>::digits, CHAR_BIT);
 };
 
-
 template <typename DataType,
           bool IsArithmetic = std::is_arithmetic<DataType>::value>
 struct estimate_repr_size_helper1_
@@ -148,7 +144,6 @@ template <typename DataType>
 struct estimate_repr_size_helper1_<DataType, true>
     : estimate_repr_size_helper2_<DataType> {};
 
-
 /// @brief Estimates size in bits (rounded to byte) of representation of specified DataType.
 ///
 /// @tparam DataType Data type which size of representation will be estimated.
@@ -220,7 +215,6 @@ public:
         _stream_ios.fill(_old_fill);
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // Data members.
     // ----------------------------------------------------------------------------------------------------------------
@@ -232,7 +226,6 @@ private:
     char_type         _old_fill;
 };
 
-
 /// @brief Creates guard that saves / restores upon destruction format state of stream.
 ///
 /// @tparam CharType   Character type use in stream.
@@ -335,7 +328,6 @@ private:
         /// @brief Number of parts needed to store representation of data_type.
         static constexpr auto data_repr_parts_count = data_type_mask::data_repr_parts_count;
 
-
         // ------------------------------------------------------------------------------------------------------------
         // data_parts: Constructors, special functions, destructors.
         // ------------------------------------------------------------------------------------------------------------
@@ -346,7 +338,6 @@ private:
         constexpr data_parts(data_parts_zero_mark)
             : parts{} {}
 
-
         // ------------------------------------------------------------------------------------------------------------
         // data_parts: Properties, Accessors.
         // ------------------------------------------------------------------------------------------------------------
@@ -377,7 +368,6 @@ private:
             (little_endian ? parts[data_repr_parts_count - 1U - index] : parts[index]) = val;
         }
 
-
         /// @brief Gets part by index (ordered from low-to-high mask bits).
         ///
         /// @param index Index of the part (@c 0-based). Must be lower than data_repr_parts_count.
@@ -405,7 +395,6 @@ private:
             (little_endian ? parts[index] : parts[data_repr_parts_count - 1U - index]) = val;
         }
 
-
         // ------------------------------------------------------------------------------------------------------------
         // data_parts: Functions.
         // ------------------------------------------------------------------------------------------------------------
@@ -442,7 +431,6 @@ private:
             return *this;
         }
 
-
         // ------------------------------------------------------------------------------------------------------------
         // data_parts: Operators.
         // ------------------------------------------------------------------------------------------------------------
@@ -559,7 +547,6 @@ private:
             return *this;
         }
 
-
         // ------------------------------------------------------------------------------------------------------------
         // data_parts: EqualityComparable implementation.
         // ------------------------------------------------------------------------------------------------------------
@@ -587,7 +574,6 @@ private:
             return !(lhs == rhs);
         }
 
-
         // ------------------------------------------------------------------------------------------------------------
         // data_parts: Data members.
         // ------------------------------------------------------------------------------------------------------------
@@ -595,7 +581,6 @@ private:
         part_type parts[data_repr_parts_count];
     };
 
-
 public:
     /// @brief Marker type that allows to select correct candidate constructor (that creates mask by position
     ///        or position and length).
@@ -604,7 +589,6 @@ public:
     ///        integral value of data type).
     static constexpr create_by_pos_mark create_by_pos{}; // "{}": WA for clang 3.6-3.9
 
-
 private:
     // ----------------------------------------------------------------------------------------------------------------
     // Constructors, special functions, destructors.
@@ -801,7 +785,6 @@ public:
         return *this;
     }
 
-
     /// @brief Serialize to stream.
     ///
     /// @tparam CharType   Stream character type.
@@ -921,7 +904,6 @@ public:
         return is;
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // EqualityComparable implementation.
     // ----------------------------------------------------------------------------------------------------------------
@@ -944,7 +926,6 @@ public:
         return !(lhs == rhs);
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // Data members.
     // ----------------------------------------------------------------------------------------------------------------
@@ -956,11 +937,9 @@ private:
     #pragma warning(pop)
 #endif
 
-
 } // namespace detail
 /// @endcond
 
-
 /// @brief Uniform real distribution that generates values in range <tt>[a; b]</tt>.
 ///
 /// @details Uniform distribution with following properties:
@@ -1021,7 +1000,6 @@ public:
     static constexpr unsigned significand_max_bits = ((std::numeric_limits<result_type>::digits - 1) > 0) ?
                                                        std::numeric_limits<result_type>::digits - 1 : 0;
 
-
     /// @brief Type of parameters used by distribution.
     ///
     /// @details The type meets @a CopyConstructible, @a CopyAssignable, @a MoveConstructable, @a MoveAssignable,
@@ -1034,7 +1012,6 @@ public:
         /// @brief Type of distribution for which current parameters are used.
         using distribution_type = uniform_quantized_real_distribution;
 
-
         // ------------------------------------------------------------------------------------------------------------
         // param_type: Constructors, special functions, destructors.
         // ------------------------------------------------------------------------------------------------------------
@@ -1089,7 +1066,6 @@ public:
             : param_type(result_zero, result_one, significand_rand_bits)
         {}
 
-
         // ------------------------------------------------------------------------------------------------------------
         // param_type: Properties, Accessors.
         // ------------------------------------------------------------------------------------------------------------
@@ -1140,7 +1116,6 @@ public:
             return !(lhs == rhs);
         }
 
-
         // ------------------------------------------------------------------------------------------------------------
         // param_type: Data members.
         // ------------------------------------------------------------------------------------------------------------
@@ -1169,7 +1144,6 @@ private:
         /// @brief Type of parameter set swapped and restored by current guard.
         using param_type        = typename distribution_type::param_type;
 
-
         // ------------------------------------------------------------------------------------------------------------
         // param_swap_guard: Constructors, special functions, destructors.
         // ------------------------------------------------------------------------------------------------------------
@@ -1197,7 +1171,6 @@ private:
             _distibution.param(_old_param);
         }
 
-
         // ------------------------------------------------------------------------------------------------------------
         // param_swap_guard: Data members.
         // ------------------------------------------------------------------------------------------------------------
@@ -1223,7 +1196,6 @@ private:
         return param_swap_guard<Distribution>(distribution, new_param);
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // Helper functions.
     // ----------------------------------------------------------------------------------------------------------------
@@ -1253,7 +1225,6 @@ private:
         return underlying_dist_type(result_one, upper_bound).param();
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // Constructors, special functions, destructors.
     // ----------------------------------------------------------------------------------------------------------------
@@ -1320,7 +1291,6 @@ public:
           _base_distribution(create_underlying_dist_param(_mask))
     {}
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // State and generators.
     // ----------------------------------------------------------------------------------------------------------------
@@ -1365,7 +1335,6 @@ public:
         return (*this)(generator);
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // Properties, Accessors.
     // ----------------------------------------------------------------------------------------------------------------
@@ -1429,7 +1398,6 @@ public:
         return _param.significand_rand_bits();
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // Operators.
     // ----------------------------------------------------------------------------------------------------------------
@@ -1503,7 +1471,6 @@ public:
         // Although the serialized members are named, the order must be preserved.
         auto guard = detail::create_format_guard(is);
 
-
         mask_type param_helper_a, param_helper_b, param_helper_m;
         unsigned param_helper_srb;
         underlying_dist_type param_helper_bd;
@@ -1535,19 +1502,16 @@ public:
         rhs.skip_to_char(is, '}');
         rhs.skip_to_char(is, '}');
 
-
         if (mask != param_helper_m)
             is.setstate(std::ios::failbit);
         if (!is) { return is; }
 
-
         rhs._param             = param;
         rhs._mask              = mask;
         rhs._base_distribution = param_helper_bd;
         return is;
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // EqualityComparable implementation.
     // ----------------------------------------------------------------------------------------------------------------
@@ -1573,7 +1537,6 @@ public:
         return !(lhs == rhs);
     }
 
-
     // ----------------------------------------------------------------------------------------------------------------
     // Data members.
     // ----------------------------------------------------------------------------------------------------------------
index 04bf5d2..3367fd1 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# ====================================== Helper constant variables =====================================
-
-# Order of scan for special capabilities files (.inc files with capabilities description).
-set(CLDNN__CAPS_SCAN_ORDER
-    "private"
-    "internal"
-    "public"
-  )
-
 # ========================================= Name / Output settings =====================================
 
 set(CLDNN_BUILD__PROJ             "clDNN_tests_core_internal")
@@ -52,67 +43,8 @@ endif()
 
 # ================================== Compiler preprocessor definitions =================================
 
-set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
-    CLDNN_EXPORTS
-    EXPORT_NEURAL_SYMBOLS
-    "CLDNN_VERSION_MAJOR=${CLDNN__VERSION_MAJOR}"
-    "CLDNN_VERSION_MINOR=${CLDNN__VERSION_MINOR}"
-    "CLDNN_VERSION_BUILD=${CLDNN__VERSION_BUILD}"
-    "CLDNN_VERSION_REVISION=${CLDNN__VERSION_REVISION}"
-  )
-
-
 # ========================================= Source/Header files ========================================
 
-set(__CLDNN_Directory__clDNN_copy             "${CMAKE_CURRENT_SOURCE_DIR}/../src")
-set(__CLDNN_Label__clDNN_copy                 "clDNN")
-file(GLOB __CLDNN_Sources__clDNN_copy
-    "${__CLDNN_Directory__clDNN_copy}/*.h"
-    "${__CLDNN_Directory__clDNN_copy}/*.hpp"
-    "${__CLDNN_Directory__clDNN_copy}/*.cpp"
-    "${__CLDNN_Directory__clDNN_copy}/*.inc"
-  )
-
-set(__CLDNN_Label__api                 "${__CLDNN_Label__clDNN_copy}\\api")
-file(GLOB __CLDNN_Headers__api
-    "${CLDNN__API_DIR}/*.h"
-    "${CLDNN__API_DIR}/*.hpp"
-  )
-
-set(__CLDNN_Directory__api__cpp "${CLDNN__API_DIR}/CPP")
-set(__CLDNN_Label__api__cpp     "${__CLDNN_Label__api}\\CPP")
-file(GLOB __CLDNN_Headers__api__cpp
-    "${__CLDNN_Directory__api__cpp}/*.h"
-    "${__CLDNN_Directory__api__cpp}/*.hpp"
-  )
-
-set(__CLDNN_Directory__api__c "${CLDNN__API_DIR}/C")
-set(__CLDNN_Label__api__c     "${__CLDNN_Label__api}\\C")
-file(GLOB __CLDNN_Headers__api__c
-    "${__CLDNN_Directory__api__c}/*.h"
-    "${__CLDNN_Directory__api__c}/*.hpp"
-  )
-
-set(__CLDNN_Label__api_extension       "${__CLDNN_Label__clDNN_copy}\\api_extension")
-file(GLOB __CLDNN_Headers__api_extension
-    "${CLDNN__API_EXTENSION_DIR}/*.h"
-    "${CLDNN__API_EXTENSION_DIR}/*.hpp"
-  )
-
-set(__CLDNN_Directory__api_extension__cpp "${CLDNN__API_EXTENSION_DIR}/CPP")
-set(__CLDNN_Label__api_extension__cpp     "${__CLDNN_Label__api_extension}\\CPP")
-file(GLOB __CLDNN_Headers__api_extension__cpp
-    "${__CLDNN_Directory__api_extension__cpp}/*.h"
-    "${__CLDNN_Directory__api_extension__cpp}/*.hpp"
-  )
-
-set(__CLDNN_Directory__api_extension__c "${CLDNN__API_EXTENSION_DIR}/C")
-set(__CLDNN_Label__api_extension__c     "${__CLDNN_Label__api_extension}\\C")
-file(GLOB __CLDNN_Headers__api_extension__c
-    "${__CLDNN_Directory__api_extension__c}/*.h"
-    "${__CLDNN_Directory__api_extension__c}/*.hpp"
-  )
-
 set(__CLDNN_Label__main                "")
 file(GLOB __CLDNN_Sources__main
     "${CMAKE_CURRENT_SOURCE_DIR}/*.h"
@@ -120,21 +52,6 @@ file(GLOB __CLDNN_Sources__main
     "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
   )
 
-set(__CLDNN_Directory__graph_opt       "${CMAKE_CURRENT_SOURCE_DIR}/../src/graph_optimizer")
-set(__CLDNN_Label__graph_opt           "${__CLDNN_Label__clDNN_copy}\\graph_optimizer")
-file(GLOB __CLDNN_Sources__graph_opt
-    "${__CLDNN_Directory__graph_opt}/*.h"
-    "${__CLDNN_Directory__graph_opt}/*.hpp"
-    "${__CLDNN_Directory__graph_opt}/*.cpp"
-  )
-
-set(__CLDNN_Directory__include         "${CMAKE_CURRENT_SOURCE_DIR}/../src/include")
-set(__CLDNN_Label__include             "${__CLDNN_Label__clDNN_copy}\\include")
-file(GLOB __CLDNN_Headers__include
-    "${__CLDNN_Directory__include}/*.h"
-    "${__CLDNN_Directory__include}/*.hpp"
-  )
-
 set(__CLDNN_Directory__test_cases      "${CMAKE_CURRENT_SOURCE_DIR}/test_cases")
 set(__CLDNN_Label__test_cases          "test cases")
 file(GLOB __CLDNN_Sources__test_cases
@@ -157,76 +74,8 @@ file(GLOB __CLDNN_Sources__gtest
     "${__CLDNN_Directory__gtest}/*.cc"
   )
 
-# Special handling of capabilities files.
-set(__CLDNN_Directory__caps            "${CMAKE_CURRENT_SOURCE_DIR}/../src/caps")
-set(__CLDNN_Label__caps                "${__CLDNN_Label__clDNN_copy}\\caps")
-foreach(__CLDNN_CapsScanDir ${CLDNN__CAPS_SCAN_ORDER})
-  string(REPLACE ";" "\;" __CLDNN_CapsScanDir "${__CLDNN_CapsScanDir}") # [WA#1] Must escape ; again if occurred in item.
-  file(GLOB __CLDNN_Sources__caps "${__CLDNN_Directory__caps}/${__CLDNN_CapsScanDir}/*.inc")
-  list(LENGTH __CLDNN_Sources__caps __CLDNN_CapsScanDirFileCount)
-  if(__CLDNN_CapsScanDirFileCount GREATER 0)
-    set(__CLDNN_IncDirectory__caps "${__CLDNN_Directory__caps}/${__CLDNN_CapsScanDir}")
-    message(STATUS "[clDNN] Selected capabilities: ${__CLDNN_CapsScanDir}")
-    break()
-  endif()
-endforeach()
-if(NOT (__CLDNN_CapsScanDirFileCount GREATER 0))
-  message(FATAL_ERROR "[clDNN] Cannot locate any capabilities files in \"${__CLDNN_Directory__caps}\" subdirectories.")
-endif()
-unset(__CLDNN_CapsScanDir)
-unset(__CLDNN_CapsScanDirFileCount)
-
-set(__CLDNN_Directory__gpu             "${CMAKE_CURRENT_SOURCE_DIR}/../src/gpu")
-set(__CLDNN_Label__gpu                 "${__CLDNN_Label__clDNN_copy}\\gpu")
-file(GLOB __CLDNN_Sources__gpu
-    "${__CLDNN_Directory__gpu}/*.h"
-    "${__CLDNN_Directory__gpu}/*.hpp"
-    "${__CLDNN_Directory__gpu}/*.cpp"
-    "${__CLDNN_Directory__gpu}/*.inc"
-  )
-
-set(__CLDNN_Directory__cache           "${__CLDNN_Directory__gpu}/cache")
-set(__CLDNN_Label__cache               "${__CLDNN_Label__gpu}\\cache")
-file(GLOB __CLDNN_Sources__cache
-    "${__CLDNN_Directory__cache}/*.h"
-    "${__CLDNN_Directory__cache}/*.hpp"
-    "${__CLDNN_Directory__cache}/*.cpp"
-  )
-
-set(__CLDNN_Directory__ch_kernels      "${__CLDNN_Directory__cache}/kernels")
-set(__CLDNN_Label__ch_kernels          "${__CLDNN_Label__cache}\\kernels")
-file(GLOB __CLDNN_Sources__ch_kernels
-    "${__CLDNN_Directory__ch_kernels}/*.cl"
-  )
-
-set(__CLDNN_Directory__cg_cache        "${CLDNN__CODEGEN_INCDIR}")
-set(__CLDNN_CGDirectory__cg_cache      "${CLDNN__CODEGEN_DIR}/cache")
-set(__CLDNN_Label__cg_cache            "${__CLDNN_Label__cache}\\codegen")
-
-set(__CLDNN_Directory__ks_main           "${CLDNN__KERNEL_SELECTOR_DIR}")
-set(__CLDNN_Directory__ks_core           "${CLDNN__KERNEL_SELECTOR_DIR}/core")
-set(__CLDNN_Directory__ks_common         "${CLDNN__KERNEL_SELECTOR_DIR}/common")
-set(__CLDNN_Directory__ks_core_common    "${__CLDNN_Directory__ks_core}/common")
-set(__CLDNN_Directory__ks_actual_kernels "${__CLDNN_Directory__ks_core}/actual_kernels")
-set(__CLDNN_Directory__ks_cache          "${__CLDNN_Directory__ks_core}/cache")
-
-
 set(__CLDNN_AllSources
-    ${__CLDNN_Sources__clDNN_copy}
-    ${__CLDNN_Headers__api}
-    ${__CLDNN_Sources__graph_opt}
-    ${__CLDNN_Headers__include}
-    ${__CLDNN_Sources__caps}
-    ${__CLDNN_Headers__api__cpp}
-    ${__CLDNN_Headers__api__c}
-    ${__CLDNN_Headers__api_extension}
-    ${__CLDNN_Headers__api_extension__c}
-    ${__CLDNN_Headers__api_extension__cpp}
     ${__CLDNN_Sources__main}
-    ${__CLDNN_Sources__gpu}
-    ${__CLDNN_Sources__cache}
-    ${__CLDNN_Sources__ch_kernels}
-    ${__CLDNN_Sources__cg_cache}
     ${__CLDNN_Sources__test_cases}
     ${__CLDNN_Sources__test_utils}
     ${__CLDNN_Sources__gtest}
@@ -237,40 +86,22 @@ set_property(SOURCE ${__CLDNN_Sources__cg_cache} PROPERTY GENERATED TRUE)
 
 # =============================================== Filters ==============================================
 
-source_group("${__CLDNN_Label__api}"                  FILES ${__CLDNN_Headers__api})
-source_group("${__CLDNN_Label__api__cpp}"             FILES ${__CLDNN_Headers__api__cpp})
-source_group("${__CLDNN_Label__api__c}"               FILES ${__CLDNN_Headers__api__c})
-source_group("${__CLDNN_Label__api_extension}"        FILES ${__CLDNN_Headers__api_extension})
-source_group("${__CLDNN_Label__api_extension__cpp}"   FILES ${__CLDNN_Headers__api_extension__cpp})
-source_group("${__CLDNN_Label__api_extension__c}"     FILES ${__CLDNN_Headers__api_extension__c})
-source_group("${__CLDNN_Label__include}"              FILES ${__CLDNN_Headers__include})
-source_group("${__CLDNN_Label__graph_opt}"            FILES ${__CLDNN_Sources__graph_opt})
-source_group("${__CLDNN_Label__caps}"                 FILES ${__CLDNN_Sources__caps})
 source_group("${__CLDNN_Label__main}"                 FILES ${__CLDNN_Sources__main})
-source_group("${__CLDNN_Label__gpu}"                  FILES ${__CLDNN_Sources__gpu})
-source_group("${__CLDNN_Label__cache}"                FILES ${__CLDNN_Sources__cache})
-source_group("${__CLDNN_Label__ch_kernels}"           FILES ${__CLDNN_Sources__ch_kernels})
-source_group("${__CLDNN_Label__cg_cache}"             FILES ${__CLDNN_Sources__cg_cache})
 source_group("${__CLDNN_Label__test_cases}"           FILES ${__CLDNN_Sources__test_cases})
 source_group("${__CLDNN_Label__test_utils}"           FILES ${__CLDNN_Sources__test_utils})
-source_group("${__CLDNN_Label__gtest}"                FILES ${__CLDNN_Sources__gtest})
-
 
 # ===================================== Include/Link directories =======================================
 
 include_directories(
     "${CLDNN__MAIN_DIR}"
     "${CLDNN__MAIN_DIR}/src"
+    "${CLDNN__MAIN_DIR}/src/include"
+    "${CLDNN__KERNEL_SELECTOR_DIR}/core"
+    "${CLDNN__KERNEL_SELECTOR_DIR}/core/common"
+    "${CLDNN__KERNEL_SELECTOR_DIR}/common"
     "${CLDNN__GTEST_DIR}"
     "${__CLDNN_Directory__test_utils}"
     "${CMAKE_CURRENT_SOURCE_DIR}"
-    "${__CLDNN_Directory__include}"
-    "${__CLDNN_IncDirectory__caps}"
-    "${__CLDNN_Directory__ks_core}"
-    "${__CLDNN_Directory__ks_core}/common"
-    "${__CLDNN_Directory__ks_actual_kernels}"
-    "${__CLDNN_Directory__ks_common}"
-    "${__CLDNN_Directory__gpu}"
   )
 
 # =================================== Link targets and dependencies ====================================
@@ -283,13 +114,8 @@ add_executable("${CLDNN_BUILD__PROJ}"
 set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}")
 set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME   "${CLDNN_BUILD__PROJ_OUTPUT_NAME}")
 
-
 # Set library dependencies
-target_link_libraries("${CLDNN_BUILD__PROJ}"
-    # "${CLDNN_BUILD__PROJ__clDNN}"
-    OpenCL
-    cldnn_kernel_selector
-  )
+target_link_libraries("${CLDNN_BUILD__PROJ}" "${CLDNN_BUILD__PROJ__clDNN}")
 
 if(WIN32)
   target_link_libraries("${CLDNN_BUILD__PROJ}" setupapi)
index c022b79..7b2fe96 100644 (file)
@@ -19,7 +19,6 @@
 #include <gtest/gtest.h>
 
 #include "program_impl.h"
-#include "api_impl.h"
 #include "topology_impl.h"
 #include "engine_impl.h"
 #include "memory_impl.h"
@@ -62,9 +61,9 @@ TEST(basic, test1) {
     topology.add(concatenation("concat", { "reorder1", "weights2" }, concatenation::along_x));
     topology.add(convolution("conv2", { "reorder2" }, { "concat" }));
 
-    program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false);
-    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog, 0);
-    network network = (cldnn::network) api_cast(net.get());
+    program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false);
+    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = engine.get()->allocate_network(*prog, 0);
+    network network = (cldnn::network) net.get();
 
     network.set_input_data("input", input);
 
@@ -109,14 +108,14 @@ TEST(add_intermediate_gpu, test1)
     topology.add(cldnn::convolution("conv1b", { "input" }, { "weights" }));
     topology.add(cldnn::convolution("conv2a", { "conv1a" }, { "weights2" }));
     auto new_reorder = std::make_shared<reorder>("reorder","nothing", input.get_layout());
-    program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
+    program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false, true);
     prog->add_intermediate(new_reorder, prog->get_node("conv1a"), 0);
     prog->dump_program("custom_dump", true);
 
     program_impl_wrapper::run_graph_compilation(*prog);
 
-    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog, 0);
-    network network = (cldnn::network) api_cast(net.get());
+    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = engine.get()->allocate_network(*prog, 0);
+    network network = (cldnn::network) net.get();
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
@@ -170,7 +169,7 @@ TEST(add_intermediate_gpu, test2)
     w_vec.push_back("weights");
     auto new_conv = std::make_shared<convolution>("conv1a", "input", w_vec);
     auto weights_node = std::make_shared<data>("weights", weights);
-    program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
+    program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false, true);
 
     prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true);
     program_impl_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv));
@@ -178,8 +177,8 @@ TEST(add_intermediate_gpu, test2)
 
     program_impl_wrapper::run_graph_compilation(*prog);
 
-    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog, 0);
-    network network = (cldnn::network) api_cast(net.get());
+    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = engine.get()->allocate_network(*prog, 0);
+    network network = (cldnn::network) net.get();
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
index 694530e..ff3d074 100644 (file)
@@ -55,11 +55,13 @@ std::map<primitive_id, network_output> test_prepare_conv_eltw_fusing(bool eltw1,
     topology.add(convolution("conv2", { "input" }, { "weights2" }));
     if (eltw1)
     {
-        topology.add(eltwise("eltw1", "conv1", "conv2", cldnn::eltwise_mode::sum, true));
+        topology.add(eltwise("eltw1_no_relu", "conv1", "conv2", cldnn::eltwise_mode::sum));
+        topology.add(activation("eltw1", "eltw1_no_relu", activation_func::relu));
     }
     if (eltw2)
     {
-        topology.add(eltwise("eltw2", "conv2", "conv1", cldnn::eltwise_mode::sum, true));
+        topology.add(eltwise("eltw2_no_relu", "conv2", "conv1", cldnn::eltwise_mode::sum));
+        topology.add(activation("eltw2", "eltw2_no_relu", activation_func::relu));
     }
     if (eltw1 && eltw2)
     {
@@ -77,15 +79,15 @@ std::map<primitive_id, network_output> test_prepare_conv_eltw_fusing(bool eltw1,
     {
         topology.add(eltwise("eltw3", "conv1", "conv2", cldnn::eltwise_mode::sum));
     }
-    program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
+    program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false, true);
 
     layout_optimizer lo;
     program_impl_wrapper::apply_opt_pass<prepare_conv_eltw_fusing>(*prog, lo);
 
     program_impl_wrapper::run_graph_compilation(*prog);
     program_impl_wrapper::prepare_memory_dependencies(*prog);
-    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog, 0);
-    network network = (cldnn::network) api_cast(net.get());
+    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = engine.get()->allocate_network(*prog, 0);
+    network network = (cldnn::network) net.get();
     network.set_input_data("input", input);
 
     return network.execute();
index 613c87f..c801bee 100644 (file)
@@ -44,6 +44,7 @@ set(gapi_srcs
     src/api/operators.cpp
     src/api/kernels_core.cpp
     src/api/kernels_imgproc.cpp
+    src/api/render.cpp
 
     # Compiler part
     src/compiler/gmodel.cpp
index 9dd5540..e8dbcaa 100644 (file)
@@ -37,6 +37,7 @@ set_property(TARGET ${FLUID_TARGET} PROPERTY CXX_STANDARD 11)
 
 if(MSVC)
   target_compile_options(${FLUID_TARGET} PUBLIC "/wd4251")
+  target_compile_options(${FLUID_TARGET} PUBLIC "/wd4275")
   target_compile_definitions(${FLUID_TARGET} PRIVATE _CRT_SECURE_NO_DEPRECATE)
 endif()
 
index a043a83..0600e24 100644 (file)
 @}
  */
 
-#include "opencv2/gapi/gmat.hpp"
-#include "opencv2/gapi/garray.hpp"
-#include "opencv2/gapi/gcomputation.hpp"
-#include "opencv2/gapi/gcompiled.hpp"
-#include "opencv2/gapi/gtyped.hpp"
-#include "opencv2/gapi/gkernel.hpp"
-#include "opencv2/gapi/operators.hpp"
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/gcomputation.hpp>
+#include <opencv2/gapi/gcompiled.hpp>
+#include <opencv2/gapi/gtyped.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/operators.hpp>
 
 #endif // OPENCV_GAPI_HPP
index 6455429..a38d747 100644 (file)
@@ -12,9 +12,9 @@
 
 #include <opencv2/imgproc.hpp>
 
-#include "opencv2/gapi/gmat.hpp"
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/gkernel.hpp"
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/gkernel.hpp>
 
 /** \defgroup gapi_core G-API core (basic) functionality
 @{
@@ -398,6 +398,16 @@ namespace core {
         }
     };
 
+    G_TYPED_KERNEL(GResizeP, <GMatP(GMatP,Size,int)>, "org.opencv.core.transform.resizeP") {
+        static GMatDesc outMeta(GMatDesc in, Size sz, int interp) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 3);
+            GAPI_Assert(in.planar);
+            GAPI_Assert(interp == cv::INTER_LINEAR);
+            return in.withSize(sz);
+        }
+    };
+
     G_TYPED_KERNEL(GMerge3, <GMat(GMat,GMat,GMat)>, "org.opencv.core.transform.merge3") {
         static GMatDesc outMeta(GMatDesc in, GMatDesc, GMatDesc) {
             // Preserve depth and add channel component
@@ -1342,10 +1352,28 @@ enlarge an image, it will generally look best with cv::INTER_CUBIC (slow) or cv:
 \f[\texttt{(double)dsize.height/src.rows}\f]
 @param interpolation interpolation method, see cv::InterpolationFlags
 
-@sa  warpAffine, warpPerspective, remap
+@sa  warpAffine, warpPerspective, remap, resizeP
  */
 GAPI_EXPORTS GMat resize(const GMat& src, const Size& dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 
+/** @brief Resizes a planar image.
+
+The function resizes the image src down to or up to the specified size.
+Planar image memory layout is three planes laying in the memory contiguously,
+so the image height should be plane_height*plane_number, image type is @ref CV_8UC1.
+
+Output image size will have the size dsize, the depth of output is the same as of src.
+
+@note Function textual ID is "org.opencv.core.transform.resizeP"
+
+@param src input image, must be of @ref CV_8UC1 type;
+@param dsize output image size;
+@param interpolation interpolation method, only cv::INTER_LINEAR is supported at the moment
+
+@sa  warpAffine, warpPerspective, remap, resize
+ */
+GAPI_EXPORTS GMatP resizeP(const GMatP& src, const Size& dsize, int interpolation = cv::INTER_LINEAR);
+
 /** @brief Creates one 3-channel (4-channel) matrix out of 3(4) single-channel ones.
 
 The function merges several matrices to make a single multi-channel matrix. That is, each
@@ -1564,25 +1592,6 @@ number of channels as in the input matrix.
 */
 GAPI_EXPORTS GMat LUT(const GMat& src, const Mat& lut);
 
-/** @brief Performs a 3D look-up table transform of a multi-channel matrix.
-
-The function LUT3D fills the output matrix with values from the look-up table. Indices of the entries
-are taken from the input matrix. Interpolation is applied for mapping 0-255 range values to 0-16 range of 3DLUT table.
-The function processes each element of src as follows:
-@code{.cpp}
-    dst[i][j][k] = lut3D[~src_r][~src_g][~src_b];
-@endcode
-where ~ means approximation.
-Output is a matrix of of @ref CV_8UC3.
-
-@note Function textual ID is "org.opencv.core.transform.LUT3D"
-
-@param src input matrix of @ref CV_8UC3.
-@param lut3D look-up table 17x17x17 3-channel elements.
-@param interpolation The depth of interpoolation to be used.
-*/
-GAPI_EXPORTS GMat LUT3D(const GMat& src, const GMat& lut3D, int interpolation = INTER_NEAREST);
-
 /** @brief Converts a matrix to another data depth with optional scaling.
 
 The method converts source pixel values to the target data depth. saturate_cast\<\> is applied at
index ec76fe5..ffd3596 100644 (file)
@@ -9,7 +9,7 @@
 #define OPENCV_GAPI_CPU_CORE_API_HPP
 
 #include <opencv2/gapi/gkernel.hpp> // GKernelPackage
-#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS
+#include <opencv2/gapi/own/exports.hpp> // GAPI_EXPORTS
 
 namespace cv {
 namespace gapi {
index 80392fe..4205776 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCPUKERNEL_HPP
@@ -19,6 +19,7 @@
 #include <opencv2/gapi/garg.hpp>
 #include <opencv2/gapi/own/convert.hpp> //to_ocv
 #include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
+#include <opencv2/gapi/util/util.hpp>
 
 // FIXME: namespace scheme for backends?
 namespace cv {
@@ -43,13 +44,12 @@ namespace cpu
      * stack. Every backend is hardware-oriented and thus can run its
      * kernels efficiently on the target platform.
      *
-     * Backends are usually "back boxes" for G-API users -- on the API
+     * Backends are usually "black boxes" for G-API users -- on the API
      * side, all backends are represented as different objects of the
-     * same class cv::gapi::GBackend. User can manipulate with backends
-     * mainly by specifying which kernels to use or where to look up
-     * for kernels first.
+     * same class cv::gapi::GBackend.
+     * User can manipulate with backends by specifying which kernels to use.
      *
-     * @sa @ref gapi_hld, cv::gapi::lookup_order()
+     * @sa @ref gapi_hld
      */
 
     /**
@@ -259,7 +259,8 @@ struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
 } // namespace detail
 
 template<class Impl, class K>
-class GCPUKernelImpl: public detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>
+class GCPUKernelImpl: public cv::detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
+                      public cv::detail::KernelTag
 {
     using P = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
 
index 8a72312..a5dd4a6 100644 (file)
@@ -17,9 +17,9 @@
 #include <opencv2/gapi/own/mat.hpp>
 #include <opencv2/gapi/gmat.hpp>
 
-#include "opencv2/gapi/util/optional.hpp"
-#include "opencv2/gapi/own/scalar.hpp"
-#include "opencv2/gapi/own/mat.hpp"
+#include <opencv2/gapi/util/optional.hpp>
+#include <opencv2/gapi/own/scalar.hpp>
+#include <opencv2/gapi/own/mat.hpp>
 
 namespace cv {
 namespace gapi {
index b6adf9e..79c5c5f 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_FLUID_KERNEL_HPP
@@ -99,12 +99,33 @@ struct GFluidOutputRois
     std::vector<cv::gapi::own::Rect> rois;
 };
 
+struct GFluidParallelOutputRois
+{
+    std::vector<GFluidOutputRois> parallel_rois;
+};
+
+struct GFluidParallelFor
+{
+    std::function<void(std::size_t, std::function<void(std::size_t)>)> parallel_for;
+};
+
 namespace detail
 {
 template<> struct CompileArgTag<GFluidOutputRois>
 {
     static const char* tag() { return "gapi.fluid.outputRois"; }
 };
+
+template<> struct CompileArgTag<GFluidParallelFor>
+{
+    static const char* tag() { return "gapi.fluid.parallelFor"; }
+};
+
+template<> struct CompileArgTag<GFluidParallelOutputRois>
+{
+    static const char* tag() { return "gapi.fluid.parallelOutputRois"; }
+};
+
 } // namespace detail
 
 namespace detail
@@ -275,7 +296,7 @@ struct FluidCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>, UseScratch
 
 
 template<class Impl, class K, bool UseScratch>
-class GFluidKernelImpl
+class GFluidKernelImpl : public cv::detail::KernelTag
 {
     static const int LPI = 1;
     static const auto Kind = GFluidKernel::Kind::Filter;
index f8a3170..8f912aa 100644 (file)
 #include <type_traits>
 
 #include <opencv2/gapi/opencv_includes.hpp>
-#include "opencv2/gapi/own/mat.hpp"
+#include <opencv2/gapi/own/mat.hpp>
 
-#include "opencv2/gapi/util/any.hpp"
-#include "opencv2/gapi/util/variant.hpp"
+#include <opencv2/gapi/util/any.hpp>
+#include <opencv2/gapi/util/variant.hpp>
 
-#include "opencv2/gapi/gmat.hpp"
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/garray.hpp"
-#include "opencv2/gapi/gtype_traits.hpp"
-#include "opencv2/gapi/gmetaarg.hpp"
-#include "opencv2/gapi/own/scalar.hpp"
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
+#include <opencv2/gapi/gmetaarg.hpp>
+#include <opencv2/gapi/own/scalar.hpp>
 
 namespace cv {
 
index 87d0015..b69fb5d 100644 (file)
@@ -18,7 +18,7 @@
 
 #include <opencv2/gapi/util/variant.hpp>
 #include <opencv2/gapi/util/throw.hpp>
-#include "opencv2/gapi/own/assert.hpp"
+#include <opencv2/gapi/own/assert.hpp>
 
 namespace cv
 {
@@ -55,6 +55,12 @@ namespace detail
     class VectorRef;
     using ConstructVec = std::function<void(VectorRef&)>;
 
+    // This is the base struct for GArrayU type holder
+    struct TypeHintBase{virtual ~TypeHintBase() = default;};
+
+    // This class holds type of initial GArray to be checked from GArrayU
+    template <typename T>
+    struct TypeHint final : public TypeHintBase{};
 
     // This class strips type information from GArray<T> and makes it usable
     // in the G-API graph compiler (expression unrolling, graph generation, etc).
@@ -64,6 +70,9 @@ namespace detail
     public:
         GArrayU(const GNode &n, std::size_t out); // Operation result constructor
 
+        template <typename T>
+        bool holds() const;                       // Check if was created from GArray<T>
+
         GOrigin& priv();                          // Internal use only
         const GOrigin& priv() const;              // Internal use only
 
@@ -73,7 +82,23 @@ namespace detail
 
         void setConstructFcn(ConstructVec &&cv);  // Store T-aware constructor
 
+        template <typename T>
+        void specifyType();                       // Store type of initial GArray<T>
+
         std::shared_ptr<GOrigin> m_priv;
+        std::shared_ptr<TypeHintBase> m_hint;
+    };
+
+    template <typename T>
+    bool GArrayU::holds() const{
+        GAPI_Assert(m_hint != nullptr);
+        using U = typename std::decay<T>::type;
+        return dynamic_cast<TypeHint<U>*>(m_hint.get()) != nullptr;
+    };
+
+    template <typename T>
+    void GArrayU::specifyType(){
+        m_hint.reset(new TypeHint<typename std::decay<T>::type>);
     };
 
     // This class represents a typed STL vector reference.
@@ -239,7 +264,10 @@ public:
 
 private:
     static void VCTor(detail::VectorRef& vref) { vref.reset<T>(); }
-    void putDetails() {m_ref.setConstructFcn(&VCTor); }
+    void putDetails() {
+        m_ref.setConstructFcn(&VCTor);
+        m_ref.specifyType<T>();
+    }
 
     detail::GArrayU m_ref;
 };
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gasync_context.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gasync_context.hpp
new file mode 100644 (file)
index 0000000..3e01577
--- /dev/null
@@ -0,0 +1,38 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_GASYNC_CONTEXT_HPP
+#define OPENCV_GAPI_GASYNC_CONTEXT_HPP
+
+#if !defined(GAPI_STANDALONE)
+#  include <opencv2/core/cvdef.h>
+#else   // Without OpenCV
+#  include <opencv2/gapi/own/cvdefs.hpp>
+#endif // !defined(GAPI_STANDALONE)
+
+#include <opencv2/gapi/own/exports.hpp>
+
+namespace cv {
+namespace gapi{
+namespace wip {
+
+class GAPI_EXPORTS GAsyncContext{
+    std::atomic<bool> cancelation_requested = {false};
+public:
+    //returns true if it was a first request to cancel the context
+    bool cancel();
+    bool isCanceled() const;
+};
+
+class GAPI_EXPORTS GAsyncCanceled : public std::exception {
+public:
+    virtual const char* what() const noexcept CV_OVERRIDE;
+};
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif //OPENCV_GAPI_GASYNC_CONTEXT_HPP
index 50223ce..87cba52 100644 (file)
@@ -8,10 +8,10 @@
 #ifndef OPENCV_GAPI_GCALL_HPP
 #define OPENCV_GAPI_GCALL_HPP
 
-#include "opencv2/gapi/garg.hpp"      // GArg
-#include "opencv2/gapi/gmat.hpp"      // GMat
-#include "opencv2/gapi/gscalar.hpp"   // GScalar
-#include "opencv2/gapi/garray.hpp"    // GArray<T>
+#include <opencv2/gapi/garg.hpp>      // GArg
+#include <opencv2/gapi/gmat.hpp>      // GMat
+#include <opencv2/gapi/gscalar.hpp>   // GScalar
+#include <opencv2/gapi/garray.hpp>    // GArray<T>
 
 namespace cv {
 
index 6a3f51f..dac640a 100644 (file)
@@ -14,9 +14,9 @@
 
 #include <opencv2/gapi/opencv_includes.hpp>
 
-#include "opencv2/gapi/util/any.hpp"
-#include "opencv2/gapi/own/exports.hpp"
-#include "opencv2/gapi/own/assert.hpp"
+#include <opencv2/gapi/util/any.hpp>
+#include <opencv2/gapi/own/exports.hpp>
+#include <opencv2/gapi/own/assert.hpp>
 
 namespace cv {
 
@@ -29,6 +29,12 @@ namespace detail
     {
         static const char* tag() { return ""; };
     };
+
+    // These structures are tags which separate kernels and transformations
+    struct KernelTag
+    {};
+    struct TransformTag
+    {};
 }
 
 // This definition is here because it is reused by both public(?) and internal
index ad491b7..c825edf 100644 (file)
@@ -10,9 +10,9 @@
 
 #include <vector>
 
-#include "opencv2/gapi/opencv_includes.hpp"
-#include "opencv2/gapi/own/assert.hpp"
-#include "opencv2/gapi/garg.hpp"
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/gapi/garg.hpp>
 
 namespace cv {
 
index a9d946b..96da0d7 100644 (file)
@@ -11,7 +11,8 @@
 #include <future>           //for std::future
 #include <exception>        //for std::exception_ptr
 #include <functional>       //for std::function
-#include "opencv2/gapi/garg.hpp"
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/own/exports.hpp>
 
 namespace cv {
     //fwd declaration
@@ -19,13 +20,22 @@ namespace cv {
 
 namespace gapi{
 namespace wip {
+    class GAsyncContext;
     //These functions asynchronously (i.e. probably on a separate thread of execution) call operator() member function of their first argument with copies of rest of arguments (except callback) passed in.
     //The difference between the function is the way to get the completion notification (via callback or a waiting on std::future object)
     //If exception is occurred during execution of apply it is transfered to the callback (via function parameter) or passed to future (and will be thrown on call to std::future::get)
+
+    //N.B. :
+    //Input arguments are copied on call to async function (actually on call to cv::gin) and thus do not have to outlive the actual completion of asynchronous activity.
+    //While Output arguments are "captured" by reference(pointer) and therefore _must_ outlive the asynchronous activity
+    //(i.e. live at least until callback is called or future is unblocked)
     GAPI_EXPORTS void                async(GCompiled& gcmpld, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs);
+    GAPI_EXPORTS void                async(GCompiled& gcmpld, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx);
+
     GAPI_EXPORTS std::future<void>   async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs);
-} // namespace gapi
+    GAPI_EXPORTS std::future<void>   async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx);
 } // namespace wip
+} // namespace gapi
 } // namespace cv
 
 #endif // OPENCV_GAPI_GCOMPILED_ASYNC_HPP
index f4c0234..7d960cd 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMPOUNDKERNEL_HPP
@@ -65,22 +65,6 @@ template<typename U> struct get_compound_in<cv::GArray<U>>
     }
 };
 
-// Kernel may return one object(GMat, GScalar) or a tuple of objects.
-// This helper is needed to cast return value to the same form(tuple)
-template<typename>
-struct tuple_wrap_helper;
-
-template<typename T> struct tuple_wrap_helper
-{
-    static std::tuple<T> get(T&& obj) { return std::make_tuple(std::move(obj)); }
-};
-
-template<typename... Objs>
-struct tuple_wrap_helper<std::tuple<Objs...>>
-{
-    static std::tuple<Objs...> get(std::tuple<Objs...>&& objs) { return std::forward<std::tuple<Objs...>>(objs); }
-};
-
 template<typename, typename, typename>
 struct GCompoundCallHelper;
 
@@ -104,7 +88,8 @@ struct GCompoundCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
 };
 
 template<class Impl, class K>
-class GCompoundKernelImpl: public cv::detail::GCompoundCallHelper<Impl, typename K::InArgs, typename K::OutArgs>
+class GCompoundKernelImpl: public cv::detail::GCompoundCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
+                           public cv::detail::KernelTag
 {
     using P = cv::detail::GCompoundCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
 
index 439b349..fdeef86 100644 (file)
 
 #include <functional>
 
-#include "opencv2/gapi/util/util.hpp"
-#include "opencv2/gapi/gcommon.hpp"
-#include "opencv2/gapi/gproto.hpp"
-#include "opencv2/gapi/garg.hpp"
-#include "opencv2/gapi/gcompiled.hpp"
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gcompiled.hpp>
 
 namespace cv {
 
@@ -315,7 +315,7 @@ public:
      * inputs/outputs which were used to define this GComputation.
      */
     void apply(const std::vector<cv::Mat>& ins,         // Compatibility overload
-               const std::vector<cv::Mat>& outs,
+                     std::vector<cv::Mat>& outs,
                GCompileArgs &&args = {});
 #endif // !defined(GAPI_STANDALONE)
     // Various versions of compile(): //////////////////////////////////////////
index f2a6d8d..661e097 100644 (file)
@@ -8,24 +8,35 @@
 #define OPENCV_GAPI_GCOMPUTATION_ASYNC_HPP
 
 
-#include <future>
+#include <future>                           //for std::future
 #include <exception>                        //for std::exception_ptr
 #include <functional>                       //for std::function
-#include "opencv2/gapi/garg.hpp"            //for GRunArgs, GRunArgsP
-#include "opencv2/gapi/gcommon.hpp"         //for GCompileArgs
+#include <opencv2/gapi/garg.hpp>            //for GRunArgs, GRunArgsP
+#include <opencv2/gapi/gcommon.hpp>         //for GCompileArgs
+#include <opencv2/gapi/own/exports.hpp>
+
 
 namespace cv {
     //fwd declaration
     class GComputation;
 namespace gapi {
 namespace wip  {
+    class GAsyncContext;
     //These functions asynchronously (i.e. probably on a separate thread of execution) call apply member function of their first argument with copies of rest of arguments (except callback) passed in.
     //The difference between the function is the way to get the completion notification (via callback or a waiting on std::future object)
     //If exception is occurred during execution of apply it is transfered to the callback (via function parameter) or passed to future (and will be thrown on call to std::future::get)
+
+    //N.B. :
+    //Input arguments are copied on call to async function (actually on call to cv::gin) and thus do not have to outlive the actual completion of asynchronous activity.
+    //While Output arguments are "captured" by reference(pointer) and therefore _must_ outlive the asynchronous activity
+    //(i.e. live at least until callback is called or future is unblocked)
     GAPI_EXPORTS void                async_apply(GComputation& gcomp, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {});
+    GAPI_EXPORTS void                async_apply(GComputation& gcomp, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx);
+
     GAPI_EXPORTS std::future<void>   async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {});
-} // nmaepspace gapi
+    GAPI_EXPORTS std::future<void>   async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args,  GAsyncContext& ctx);
 } // namespace wip
+} // namespace gapi
 } // namespace cv
 
 
index af18424..b8d1dbb 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GKERNEL_HPP
@@ -14,7 +14,6 @@
 #include <type_traits> // false_type, true_type
 #include <unordered_map> // map (for GKernelPackage)
 #include <utility> // tuple
-#include <vector>  // lookup order
 
 #include <opencv2/gapi/gcommon.hpp> // CompileArgTag
 #include <opencv2/gapi/util/util.hpp> // Seq
@@ -23,7 +22,7 @@
 #include <opencv2/gapi/gmetaarg.hpp>  // GMetaArg
 #include <opencv2/gapi/gtype_traits.hpp> // GTypeTraits
 #include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
-
+#include <opencv2/gapi/gtransform.hpp>
 
 namespace cv {
 
@@ -57,7 +56,6 @@ namespace detail
     //
     namespace
     {
-
         template<typename T> struct Yield;
         template<> struct Yield<cv::GMat>
         {
@@ -173,12 +171,12 @@ namespace detail
 // GKernelType and GKernelTypeM are base classes which implement typed ::on()
 // method based on kernel signature. GKernelTypeM stands for multiple-return-value kernels
 //
-// G_TYPED_KERNEL and G_TYPED_KERNEK_M macros inherit user classes from GKernelType and
+// G_TYPED_KERNEL and G_TYPED_KERNEL_M macros inherit user classes from GKernelType and
 // GKernelTypeM respectively.
 
 template<typename K, typename... R, typename... Args>
 class GKernelTypeM<K, std::function<std::tuple<R...>(Args...)> >:
-        public detail::MetaHelper<K, std::tuple<Args...>, std::tuple<R...> >
+        public detail::MetaHelper<K, std::tuple<Args...>, std::tuple<R...>>
 {
     template<int... IIs>
     static std::tuple<R...> yield(cv::GCall &call, detail::Seq<IIs...>)
@@ -202,7 +200,7 @@ template<typename, typename> class GKernelType;
 
 template<typename K, typename R, typename... Args>
 class GKernelType<K, std::function<R(Args...)> >:
-        public detail::MetaHelper<K, std::tuple<Args...>, R >
+        public detail::MetaHelper<K, std::tuple<Args...>, R>
 {
 public:
     using InArgs  = std::tuple<Args...>;
@@ -243,18 +241,11 @@ public:
 #define G_TYPED_KERNEL_M(Class, API, Id)                                    \
     G_ID_HELPER_BODY(Class, Id)                                             \
     struct Class final: public cv::GKernelTypeM<Class, std::function API >, \
-                        public detail::G_ID_HELPER_CLASS(Class)             \
+                        public detail::G_ID_HELPER_CLASS(Class)
 // {body} is to be defined by user
 
 namespace cv
 {
-// Declare <unite> in cv:: namespace
-enum class unite_policy
-{
-    REPLACE,
-    KEEP
-};
-
 namespace gapi
 {
     // Prework: model "Device" API before it gets to G-API headers.
@@ -303,42 +294,16 @@ namespace gapi {
      * @{
      */
 
-    // Lookup order is in fact a vector of Backends to traverse during look-up
-    /**
-     * @brief Priority list of backends to use during kernel
-     *   resolution process.
-     *
-     * Priority is descending -- the first backend in the list has the
-     * top priority, and the last one has the lowest priority.
-     *
-     * If there's multiple implementations available for a kernel at
-     * the moment of graph compilation, a kernel (and thus a backend)
-     * will be selected according to this order (if the parameter is passed).
-     *
-     * Default order is not specified (and by default, only
-     * CPU(OpenCV) backend is involved in graph compilation).
-     */
-    using GLookupOrder = std::vector<GBackend>;
-    /**
-     * @brief Create a backend lookup order -- priority list of
-     * backends to use during graph compilation process.
-     *
-     * @sa GLookupOrder, @ref gapi_std_backends
-     */
-    inline GLookupOrder lookup_order(std::initializer_list<GBackend> &&list)
-    {
-        return GLookupOrder(std::move(list));
-    }
-
     // FIXME: Hide implementation
     /**
      * @brief A container class for heterogeneous kernel
-     * implementation collections.
+     * implementation collections and graph transformations.
      *
      * GKernelPackage is a special container class which stores kernel
-     * _implementations_. Objects of this class are created and passed
-     * to cv::GComputation::compile() to specify which kernels to use
-     * in the compiled graph. GKernelPackage may contain kernels of
+     * _implementations_ and graph _transformations_. Objects of this class
+     * are created and passed to cv::GComputation::compile() to specify
+     * which kernels to use and which transformations to apply in the
+     * compiled graph. GKernelPackage may contain kernels of
      * different backends, e.g. be heterogeneous.
      *
      * The most easy way to create a kernel package is to use function
@@ -350,23 +315,23 @@ namespace gapi {
      * with an empty package (created with the default constructor)
      * and then by populating it with kernels via call to
      * GKernelPackage::include(). Note this method is also a template
-     * one since G-API kernel implementations are _types_, not objects.
+     * one since G-API kernel and transformation implementations are _types_,
+     * not objects.
      *
      * Finally, two kernel packages can be combined into a new one
-     * with function cv::gapi::combine(). There are different rules
-     * apply to this process, see also cv::gapi::unite_policy for
-     * details.
+     * with function cv::gapi::combine().
      */
     class GAPI_EXPORTS GKernelPackage
     {
+
         /// @private
-        using S = std::unordered_map<std::string, GKernelImpl>;
+        using M = std::unordered_map<std::string, std::pair<GBackend, GKernelImpl>>;
 
         /// @private
-        using M = std::unordered_map<GBackend, S>;
+        M m_id_kernels;
 
         /// @private
-        M m_backend_kernels;
+        std::vector<GTransform> m_transformations;
 
     protected:
         /// @private
@@ -378,30 +343,64 @@ namespace gapi {
         // Remove ALL implementations of the given API (identified by ID)
         void removeAPI(const std::string &id);
 
+        /// @private
+        // Partial include() specialization for kernels
+        template <typename KImpl>
+        typename std::enable_if<(std::is_base_of<detail::KernelTag, KImpl>::value), void>::type
+        includeHelper()
+        {
+            auto backend     = KImpl::backend();
+            auto kernel_id   = KImpl::API::id();
+            auto kernel_impl = GKernelImpl{KImpl::kernel()};
+            removeAPI(kernel_id);
+
+            m_id_kernels[kernel_id] = std::make_pair(backend, kernel_impl);
+        }
+
+        /// @private
+        // Partial include() specialization for transformations
+        template <typename TImpl>
+        typename std::enable_if<(std::is_base_of<detail::TransformTag, TImpl>::value), void>::type
+        includeHelper()
+        {
+            m_transformations.emplace_back(TImpl::transformation());
+        }
+
     public:
         /**
-         * @brief Returns total number of kernels in the package
-         * (across all backends included)
+         * @brief Returns total number of kernels
+         * in the package (across all backends included)
          *
          * @return a number of kernels in the package
          */
         std::size_t size() const;
 
         /**
+         * @brief Returns vector of transformations included in the package
+         *
+         * @return vector of transformations included in the package
+         */
+        const std::vector<GTransform>& get_transformations() const;
+
+        /**
          * @brief Test if a particular kernel _implementation_ KImpl is
          * included in this kernel package.
          *
          * @sa includesAPI()
          *
+         * @note cannot be applied to transformations
+         *
          * @return true if there is such kernel, false otherwise.
          */
         template<typename KImpl>
         bool includes() const
         {
-            const auto set_iter = m_backend_kernels.find(KImpl::backend());
-            return (set_iter != m_backend_kernels.end())
-                ? (set_iter->second.count(KImpl::API::id()) > 0)
-                : false;
+            static_assert(std::is_base_of<detail::KernelTag, KImpl>::value,
+                          "includes() can be applied to kernels only");
+
+            auto kernel_it = m_id_kernels.find(KImpl::API::id());
+            return kernel_it != m_id_kernels.end() &&
+                   kernel_it->second.first == KImpl::backend();
         }
 
         /**
@@ -439,47 +438,33 @@ namespace gapi {
         }
 
         /**
-         * @brief Find a kernel (by its API), given the look-up order.
+         * @brief Find a kernel (by its API)
          *
-         * If order is empty, returns first suitable implementation.
+         * Returns implementation corresponding id.
          * Throws if nothing found.
          *
          * @return Backend which hosts matching kernel implementation.
          *
-         * @sa cv::gapi::lookup_order
          */
         template<typename KAPI>
-        GBackend lookup(const GLookupOrder &order = {}) const
+        GBackend lookup() const
         {
-            return lookup(KAPI::id(), order).first;
+            return lookup(KAPI::id()).first;
         }
 
         /// @private
         std::pair<cv::gapi::GBackend, cv::GKernelImpl>
-        lookup(const std::string &id, const GLookupOrder &order = {}) const;
+        lookup(const std::string &id) const;
 
         // FIXME: No overwrites allowed?
         /**
-         * @brief Put a new kernel implementation KImpl into package.
-         *
-         * @param up unite policy to use. If the package has already
-         * implementation for this kernel (probably from another
-         * backend), and cv::unite_policy::KEEP is passed, the
-         * existing implementation remains in package; on
-         * cv::unite_policy::REPLACE all other existing
-         * implementations are first dropped from the package.
+         * @brief Put a new kernel implementation or a new transformation
+         * KImpl into the package.
          */
         template<typename KImpl>
-        void include(const cv::unite_policy up = cv::unite_policy::KEEP)
+        void include()
         {
-            auto backend     = KImpl::backend();
-            auto kernel_id   = KImpl::API::id();
-            auto kernel_impl = GKernelImpl{KImpl::kernel()};
-            if (up == cv::unite_policy::REPLACE) removeAPI(kernel_id);
-            else GAPI_Assert(up == cv::unite_policy::KEEP);
-
-            // Regardless of the policy, store new impl in its storage slot.
-            m_backend_kernels[backend][kernel_id] = std::move(kernel_impl);
+            includeHelper<KImpl>();
         }
 
         /**
@@ -492,36 +477,27 @@ namespace gapi {
         // TODO: Doxygen bug -- it wants me to place this comment
         // here, not below.
         /**
-         * @brief Create a new package based on `lhs` and `rhs`,
-         * with unity policy defined by `policy`.
+         * @brief Create a new package based on `lhs` and `rhs`.
          *
          * @param lhs "Left-hand-side" package in the process
          * @param rhs "Right-hand-side" package in the process
-         * @param policy Unite policy which is used in case of conflicts
-         * -- when the same kernel API is implemented in both packages by
-         * different backends; cv::unite_policy::KEEP keeps both
-         * implementation in the resulting package, while
-         * cv::unite_policy::REPLACE gives precedence two kernels from
-         * "Right-hand-side".
-         *
          * @return a new kernel package.
          */
         friend GAPI_EXPORTS GKernelPackage combine(const GKernelPackage  &lhs,
-                                                   const GKernelPackage  &rhs,
-                                                   const cv::unite_policy policy);
+                                                   const GKernelPackage  &rhs);
     };
 
     /**
      * @brief Create a kernel package object containing kernels
-     * specified in variadic template argument.
+     * and transformations specified in variadic template argument.
      *
-     * In G-API, kernel implementations are _types_. Every backend has
-     * its own kernel API (like GAPI_OCV_KERNEL() and
+     * In G-API, kernel implementations and transformations are _types_.
+     * Every backend has its own kernel API (like GAPI_OCV_KERNEL() and
      * GAPI_FLUID_KERNEL()) but all of that APIs define a new type for
      * each kernel implementation.
      *
      * Use this function to pass kernel implementations (defined in
-     * either way) to the system. Example:
+     * either way) and transformations to the system. Example:
      *
      * @snippet modules/gapi/samples/api_ref_snippets.cpp kernels_snippet
      *
@@ -531,6 +507,10 @@ namespace gapi {
      */
     template<typename... KK> GKernelPackage kernels()
     {
+        // FIXME: currently there is no check that transformations' signatures are unique
+        // and won't be any intersection in graph compilation stage
+        static_assert(detail::all_unique<typename KK::API...>::value, "Kernels API must be unique");
+
         GKernelPackage pkg;
 
         // For those who wonder - below is a trick to call a number of
@@ -539,7 +519,6 @@ namespace gapi {
         // Just note that `f(),a` always equals to `a` (with f() called!)
         // and parentheses are used to hide function call in the expanded sequence.
         // Leading 0 helps to handle case when KK is an empty list (kernels<>()).
-
         int unused[] = { 0, (pkg.include<KK>(), 0)... };
         cv::util::suppress_unused_warning(unused);
         return pkg;
@@ -548,8 +527,17 @@ namespace gapi {
     /** @} */
 
     GAPI_EXPORTS GKernelPackage combine(const GKernelPackage  &lhs,
-                                        const GKernelPackage  &rhs,
-                                        const cv::unite_policy policy);
+                                        const GKernelPackage  &rhs);
+    /**
+     * @brief cv::use_only() is a special combinator which hints G-API to use only
+     * kernels specified in cv::GComputation::compile() (and not to extend kernels available by
+     * default with that package).
+     */
+    struct GAPI_EXPORTS use_only
+    {
+        GKernelPackage pkg;
+    };
+
 } // namespace gapi
 
 namespace detail
@@ -558,9 +546,10 @@ namespace detail
     {
         static const char* tag() { return "gapi.kernel_package"; }
     };
-    template<> struct CompileArgTag<cv::gapi::GLookupOrder>
+
+    template<> struct CompileArgTag<cv::gapi::use_only>
     {
-        static const char* tag() { return "gapi.lookup_order"; }
+        static const char* tag() { return "gapi.use_only"; }
     };
 } // namespace detail
 } // namespace cv
index f0ce26b..d6b6e72 100644 (file)
@@ -14,9 +14,9 @@
 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/gcommon.hpp> // GShape
 
-#include "opencv2/gapi/own/types.hpp" // cv::gapi::own::Size
-#include "opencv2/gapi/own/convert.hpp" // to_own
-#include "opencv2/gapi/own/assert.hpp"
+#include <opencv2/gapi/own/types.hpp> // cv::gapi::own::Size
+#include <opencv2/gapi/own/convert.hpp> // to_own
+#include <opencv2/gapi/own/assert.hpp>
 
 // TODO GAPI_EXPORTS or so
 namespace cv
index abdea75..5b6e2ad 100644 (file)
 #include <vector>
 #include <type_traits>
 
-#include "opencv2/gapi/util/util.hpp"
-#include "opencv2/gapi/util/variant.hpp"
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/util/variant.hpp>
 
-#include "opencv2/gapi/gmat.hpp"
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/garray.hpp"
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/garray.hpp>
 
 namespace cv
 {
index a3bce1a..b9e206a 100644 (file)
 #include <vector>
 #include <ostream>
 
-#include "opencv2/gapi/util/variant.hpp"
+#include <opencv2/gapi/util/variant.hpp>
 
-#include "opencv2/gapi/gmat.hpp"
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/garray.hpp"
-#include "opencv2/gapi/garg.hpp"
-#include "opencv2/gapi/gmetaarg.hpp"
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gmetaarg.hpp>
 
 namespace cv {
 
index f780545..a7ee595 100644 (file)
@@ -8,10 +8,10 @@
 #ifndef OPENCV_GAPI_GPU_CORE_API_HPP
 #define OPENCV_GAPI_GPU_CORE_API_HPP
 /** @file
-* @deprecated Use "opencv2/gapi/ocl/core.hpp" instead.
+* @deprecated Use <opencv2/gapi/ocl/core.hpp> instead.
 */
 
-#include "opencv2/gapi/ocl/core.hpp"
+#include <opencv2/gapi/ocl/core.hpp>
 
 namespace cv {
 namespace gapi {
index f41cf13..b52c21d 100644 (file)
@@ -8,10 +8,10 @@
 #ifndef OPENCV_GAPI_GGPUKERNEL_HPP
 #define OPENCV_GAPI_GGPUKERNEL_HPP
 /** @file
-* @deprecated Use "opencv2/gapi/ocl/goclkernel.hpp" instead.
+* @deprecated Use <opencv2/gapi/ocl/goclkernel.hpp> instead.
 */
 
-#include "opencv2/gapi/ocl/goclkernel.hpp"
+#include <opencv2/gapi/ocl/goclkernel.hpp>
 #define GAPI_GPU_KERNEL GAPI_OCL_KERNEL
 
 
index 81ae0cb..b0df7ae 100644 (file)
@@ -8,10 +8,10 @@
 #ifndef OPENCV_GAPI_GPU_IMGPROC_API_HPP
 #define OPENCV_GAPI_GPU_IMGPROC_API_HPP
 /** @file
-* @deprecated Use "opencv2/gapi/ocl/imgproc.hpp" instead.
+* @deprecated Use <opencv2/gapi/ocl/imgproc.hpp> instead.
 */
 
-#include "opencv2/gapi/ocl/imgproc.hpp"
+#include <opencv2/gapi/ocl/imgproc.hpp>
 
 
 namespace cv {
index dd1205b..f65741e 100644 (file)
@@ -14,7 +14,7 @@
 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/gcommon.hpp> // GShape
 #include <opencv2/gapi/util/optional.hpp>
-#include "opencv2/gapi/own/scalar.hpp"
+#include <opencv2/gapi/own/scalar.hpp>
 
 namespace cv
 {
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtransform.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtransform.hpp
new file mode 100644 (file)
index 0000000..5d1b91b
--- /dev/null
@@ -0,0 +1,103 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_GTRANSFORM_HPP
+#define OPENCV_GAPI_GTRANSFORM_HPP
+
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
+#include <opencv2/gapi/util/compiler_hints.hpp>
+#include <opencv2/gapi/gcomputation.hpp>
+
+namespace cv
+{
+
+struct GAPI_EXPORTS GTransform
+{
+    // FIXME: consider another simplified
+    // class instead of GComputation
+    using F = std::function<GComputation()>;
+
+    std::string description;
+    F pattern;
+    F substitute;
+
+    GTransform(const std::string& d, const F &p, const F &s) : description(d), pattern(p), substitute(s){};
+};
+
+namespace detail
+{
+
+template <typename, typename, typename>
+struct TransHelper;
+
+template <typename K, typename... Ins, typename Out>
+struct TransHelper<K, std::tuple<Ins...>, Out>
+{
+    template <typename Callable, int... IIs, int... OIs>
+    static GComputation invoke(Callable f, Seq<IIs...>, Seq<OIs...>)
+    {
+        const std::tuple<Ins...> ins;
+        const auto r = tuple_wrap_helper<Out>::get(f(std::get<IIs>(ins)...));
+        return GComputation(cv::GIn(std::get<IIs>(ins)...),
+                            cv::GOut(std::get<OIs>(r)...));
+    }
+
+    static GComputation get_pattern()
+    {
+        return invoke(K::pattern, typename MkSeq<sizeof...(Ins)>::type(),
+                      typename MkSeq<std::tuple_size<typename tuple_wrap_helper<Out>::type>::value>::type());
+    }
+    static GComputation get_substitute()
+    {
+        return invoke(K::substitute, typename MkSeq<sizeof...(Ins)>::type(),
+                      typename MkSeq<std::tuple_size<typename tuple_wrap_helper<Out>::type>::value>::type());
+    }
+};
+} // namespace detail
+
+template <typename, typename>
+class GTransformImpl;
+
+template <typename K, typename R, typename... Args>
+class GTransformImpl<K, std::function<R(Args...)>> : public cv::detail::TransHelper<K, std::tuple<Args...>, R>,
+                                                     public cv::detail::TransformTag
+{
+public:
+    // FIXME: currently there is no check that transformations' signatures are unique
+    // and won't be any intersection in graph compilation stage
+    using API = K;
+
+    static GTransform transformation()
+    {
+        return GTransform(K::descr(), &K::get_pattern, &K::get_substitute);
+    }
+};
+} // namespace cv
+
+#define G_DESCR_HELPER_CLASS(Class) Class##DescrHelper
+
+#define G_DESCR_HELPER_BODY(Class, Descr)                       \
+    namespace detail                                            \
+    {                                                           \
+    struct G_DESCR_HELPER_CLASS(Class)                          \
+    {                                                           \
+        static constexpr const char *descr() { return Descr; }; \
+    };                                                          \
+    }
+
+#define GAPI_TRANSFORM(Class, API, Descr)                                     \
+    G_DESCR_HELPER_BODY(Class, Descr)                                         \
+    struct Class final : public cv::GTransformImpl<Class, std::function API>, \
+                         public detail::G_DESCR_HELPER_CLASS(Class)
+
+#endif // OPENCV_GAPI_GTRANSFORM_HPP
index a966f26..8e48e91 100644 (file)
 
 #include <vector>
 
-#include "opencv2/gapi/gcomputation.hpp"
-#include "opencv2/gapi/gcompiled.hpp"
-#include "opencv2/gapi/gproto.hpp"
-#include "opencv2/gapi/gcommon.hpp"
+#include <opencv2/gapi/gcomputation.hpp>
+#include <opencv2/gapi/gcompiled.hpp>
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/gcommon.hpp>
 
 namespace cv {
 
index db9ac46..ad3f7c1 100644 (file)
@@ -8,13 +8,13 @@
 #ifndef OPENCV_GAPI_IMGPROC_HPP
 #define OPENCV_GAPI_IMGPROC_HPP
 
-#include "opencv2/imgproc.hpp"
+#include <opencv2/imgproc.hpp>
 
 #include <utility> // std::tuple
 
-#include "opencv2/gapi/gkernel.hpp"
-#include "opencv2/gapi/gmat.hpp"
-#include "opencv2/gapi/gscalar.hpp"
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
 
 
 /** \defgroup gapi_imgproc G-API image processing functionality
@@ -187,6 +187,55 @@ namespace imgproc {
             return in.withType(CV_8U, 1);
         }
     };
+
+    G_TYPED_KERNEL(GBayerGR2RGB, <cv::GMat(cv::GMat)>, "org.opencv.imgproc.colorconvert.bayergr2rgb") {
+        static cv::GMatDesc outMeta(cv::GMatDesc in) {
+            return in.withType(CV_8U, 3);
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2HSV, <cv::GMat(cv::GMat)>, "org.opencv.imgproc.colorconvert.rgb2hsv") {
+        static cv::GMatDesc outMeta(cv::GMatDesc in) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2YUV422, <cv::GMat(cv::GMat)>, "org.opencv.imgproc.colorconvert.rgb2yuv422") {
+        static cv::GMatDesc outMeta(cv::GMatDesc in) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 3);
+            return in.withType(in.depth, 2);
+        }
+    };
+
+    G_TYPED_KERNEL(GNV12toRGBp, <GMatP(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12torgbp") {
+        static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
+            GAPI_Assert(inY.depth == CV_8U);
+            GAPI_Assert(inUV.depth == CV_8U);
+            GAPI_Assert(inY.chan == 1);
+            GAPI_Assert(inY.planar == false);
+            GAPI_Assert(inUV.chan == 2);
+            GAPI_Assert(inUV.planar == false);
+            GAPI_Assert(inY.size.width  == 2 * inUV.size.width);
+            GAPI_Assert(inY.size.height == 2 * inUV.size.height);
+            return inY.withType(CV_8U, 3).asPlanar();
+        }
+    };
+
+    G_TYPED_KERNEL(GNV12toBGRp, <GMatP(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12tobgrp") {
+        static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
+            GAPI_Assert(inY.depth == CV_8U);
+            GAPI_Assert(inUV.depth == CV_8U);
+            GAPI_Assert(inY.chan == 1);
+            GAPI_Assert(inY.planar == false);
+            GAPI_Assert(inUV.chan == 2);
+            GAPI_Assert(inUV.planar == false);
+            GAPI_Assert(inY.size.width  == 2 * inUV.size.width);
+            GAPI_Assert(inY.size.height == 2 * inUV.size.height);
+            return inY.withType(CV_8U, 3).asPlanar();
+        }
+    };
+
 }
 
 
@@ -784,6 +833,85 @@ Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
 @sa YUV2BGR, NV12toRGB
 */
 GAPI_EXPORTS GMat NV12toBGR(const GMat& src_y, const GMat& src_uv);
+
+/** @brief Converts an image from BayerGR color space to RGB.
+The function converts an input image from BayerGR color space to RGB.
+The conventional ranges for G, R, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.bayergr2rgb"
+
+@param src_gr input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+
+@sa YUV2BGR, NV12toRGB
+*/
+GAPI_EXPORTS GMat BayerGR2RGB(const GMat& src_gr);
+
+/** @brief Converts an image from RGB color space to HSV.
+The function converts an input image from RGB color space to HSV.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2hsv"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@sa YUV2BGR, NV12toRGB
+*/
+GAPI_EXPORTS GMat RGB2HSV(const GMat& src);
+
+/** @brief Converts an image from RGB color space to YUV422.
+The function converts an input image from RGB color space to YUV422.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2yuv422"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@sa YUV2BGR, NV12toRGB
+*/
+GAPI_EXPORTS GMat RGB2YUV422(const GMat& src);
+
+/** @brief Converts an image from NV12 (YUV420p) color space to RGB.
+The function converts an input image from NV12 color space to RGB.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned planar 3-channel image @ref CV_8UC1.
+Planar image memory layout is three planes laying in the memory contiguously,
+so the image height should be plane_height*plane_number,
+image type is @ref CV_8UC1.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgbp"
+
+@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@sa YUV2RGB, NV12toBGRp, NV12toRGB
+*/
+GAPI_EXPORTS GMatP NV12toRGBp(const GMat &src_y, const GMat &src_uv);
+
+/** @brief Converts an image from NV12 (YUV420p) color space to BGR.
+The function converts an input image from NV12 color space to BGR.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned planar 3-channel image @ref CV_8UC1.
+Planar image memory layout is three planes laying in the memory contiguously,
+so the image height should be plane_height*plane_number,
+image type is @ref CV_8UC1.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgbp"
+
+@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@sa YUV2RGB, NV12toRGBp, NV12toBGR
+*/
+GAPI_EXPORTS GMatP NV12toBGRp(const GMat &src_y, const GMat &src_uv);
+
 //! @} gapi_colorconvert
 } //namespace gapi
 } //namespace cv
index ea2cda0..6927492 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GOCLKERNEL_HPP
@@ -226,7 +226,8 @@ struct OCLCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
 } // namespace detail
 
 template<class Impl, class K>
-class GOCLKernelImpl: public detail::OCLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>
+class GOCLKernelImpl: public cv::detail::OCLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
+                      public cv::detail::KernelTag
 {
     using P = detail::OCLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
 
index 27a1d80..b20062c 100644 (file)
@@ -8,8 +8,8 @@
 #ifndef OPENCV_GAPI_OPERATORS_HPP
 #define OPENCV_GAPI_OPERATORS_HPP
 
-#include "opencv2/gapi/gmat.hpp"
-#include "opencv2/gapi/gscalar.hpp"
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
 
 GAPI_EXPORTS cv::GMat operator+(const cv::GMat&    lhs, const cv::GMat&    rhs);
 
index 8c1feb4..4a7394a 100644 (file)
@@ -13,7 +13,7 @@
 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/own/types.hpp>
 #include <opencv2/gapi/own/mat.hpp>
-#include "opencv2/gapi/own/scalar.hpp"
+#include <opencv2/gapi/own/scalar.hpp>
 
 namespace cv
 {
index e110536..71c2aa8 100644 (file)
@@ -108,6 +108,10 @@ typedef unsigned short ushort;
 #define CV_ELEM_SIZE(type) \
     (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
 
+#ifndef CV_OVERRIDE
+#  define CV_OVERRIDE override
+#endif
+
 // base.h:
 namespace cv
 {
index 0d955d0..53bff2a 100644 (file)
@@ -8,11 +8,13 @@
 #ifndef OPENCV_GAPI_OWN_TYPES_HPP
 #define OPENCV_GAPI_OWN_TYPES_HPP
 
-#   if 0
+#   if defined(__OPENCV_BUILD)
 #       include <opencv2/core/base.hpp>
 #       define GAPI_EXPORTS CV_EXPORTS
-
 #   else
+#       define GAPI_EXPORTS
+
+#if 0  // Note: the following version currently is not needed for non-OpenCV build
 #       if defined _WIN32
 #           define GAPI_EXPORTS __declspec(dllexport)
 #       elif defined __GNUC__ && __GNUC__ >= 4
@@ -22,6 +24,7 @@
 #       ifndef GAPI_EXPORTS
 #           define GAPI_EXPORTS
 #       endif
+#endif
 
 #   endif
 
index 73f3afc..20f5b55 100644 (file)
@@ -8,15 +8,15 @@
 #ifndef OPENCV_GAPI_OWN_MAT_HPP
 #define OPENCV_GAPI_OWN_MAT_HPP
 
-#include "opencv2/gapi/opencv_includes.hpp"
-#include "opencv2/gapi/own/types.hpp"
-#include "opencv2/gapi/own/scalar.hpp"
-#include "opencv2/gapi/own/saturate.hpp"
-#include "opencv2/gapi/own/assert.hpp"
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/types.hpp>
+#include <opencv2/gapi/own/scalar.hpp>
+#include <opencv2/gapi/own/saturate.hpp>
+#include <opencv2/gapi/own/assert.hpp>
 
 #include <memory>                   //std::shared_ptr
 #include <cstring>                  //std::memcpy
-#include "opencv2/gapi/util/throw.hpp"
+#include <opencv2/gapi/util/throw.hpp>
 
 namespace cv { namespace gapi { namespace own {
     namespace detail {
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/render.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/render.hpp
new file mode 100644 (file)
index 0000000..2dd60da
--- /dev/null
@@ -0,0 +1,113 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_RENDER_HPP
+#define OPENCV_GAPI_RENDER_HPP
+
+#include <string>
+#include <vector>
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/util/variant.hpp>
+#include <opencv2/gapi/own/exports.hpp>
+#include <opencv2/gapi/own/scalar.hpp>
+
+namespace cv
+{
+namespace gapi
+{
+namespace wip
+{
+namespace draw
+{
+
+/**
+ * A structure to represent parameters for drawing a text string.
+ */
+struct Text
+{
+    /*@{*/
+    std::string text;               //!< The text string to be drawn
+    cv::Point   org;                //!< The bottom-left corner of the text string in the image
+    int         ff;                 //!< The font type, see #HersheyFonts
+    double      fs;                 //!< The font scale factor that is multiplied by the font-specific base size
+    cv::Scalar  color;              //!< The text color
+    int         thick;              //!< The thickness of the lines used to draw a text
+    int         lt;                 //!< The line type. See #LineTypes
+    bool        bottom_left_origin; //!< When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner
+    /*@{*/
+};
+
+/**
+ * A structure to represent parameters for drawing a rectangle
+ */
+struct Rect
+{
+    cv::Rect   rect;  //!< Coordinates of the rectangle
+    cv::Scalar color; //!< The rectangle color or brightness (grayscale image)
+    int        thick; //!< The thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle
+    int        lt;    //!< The type of the line. See #LineTypes
+    int        shift; //!< The number of fractional bits in the point coordinates
+};
+
+/**
+ * A structure to represent parameters for drawing a circle
+ */
+struct Circle
+{
+    cv::Point  center; //!< The center of the circle
+    int        radius; //!< The radius of the circle
+    cv::Scalar color;  //!< The color of the  circle
+    int        thick;  //!< The thickness of the circle outline, if positive. Negative values, like #FILLED, mean that a filled circle is to be drawn
+    int        lt;     //!< The Type of the circle boundary. See #LineTypes
+    int        shift;  //!< The Number of fractional bits in the coordinates of the center and in the radius value
+};
+
+/**
+ * A structure to represent parameters for drawing a line
+ */
+struct Line
+{
+    cv::Point  pt1;    //!< The first point of the line segment
+    cv::Point  pt2;    //!< The second point of the line segment
+    cv::Scalar color;  //!< The line color
+    int        thick;  //!< The thickness of line
+    int        lt;     //!< The Type of the line. See #LineTypes
+    int        shift;  //!< The number of fractional bits in the point coordinates
+
+};
+
+using Prim  = util::variant
+    < Text
+    , Rect
+    , Circle
+    , Line
+    >;
+
+using Prims = std::vector<Prim>;
+
+/** @brief The function renders on the input image passed drawing primitivies
+
+@param bgr input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@param prims vector of drawing primitivies
+*/
+GAPI_EXPORTS void render(cv::Mat& bgr, const Prims& prims);
+
+/** @brief The function renders on two NV12 planes passed drawing primitivies
+
+@param y_plane input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param uv_plane input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+@param prims vector of drawing primitivies
+*/
+GAPI_EXPORTS void render(cv::Mat& y_plane, cv::Mat& uv_plane , const Prims& prims);
+
+} // namespace draw
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_RENDER_HPP
index 3146cb6..5f97e95 100644 (file)
@@ -13,7 +13,7 @@
 #include <typeinfo>
 #include <utility>
 
-#include "opencv2/gapi/util/throw.hpp"
+#include <opencv2/gapi/util/throw.hpp>
 
 #if defined(_MSC_VER)
    // disable MSVC warning on "multiple copy constructors specified"
index 54126d6..1aa2b26 100644 (file)
@@ -8,7 +8,7 @@
 #ifndef OPENCV_GAPI_UTIL_OPTIONAL_HPP
 #define OPENCV_GAPI_UTIL_OPTIONAL_HPP
 
-#include "opencv2/gapi/util/variant.hpp"
+#include <opencv2/gapi/util/variant.hpp>
 
 // A poor man's `optional` implementation, incompletely modeled against C++17 spec.
 namespace cv
index d0378e0..afcf559 100644 (file)
@@ -2,13 +2,13 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_UTIL_HPP
 #define OPENCV_GAPI_UTIL_HPP
 
-#include <utility> // std::tuple
+#include <tuple>
 
 // \cond HIDDEN_SYMBOLS
 // This header file contains some generic utility functions which are
@@ -84,6 +84,38 @@ namespace detail
     {
         static constexpr const std::size_t value = S;
     };
+
+    template <typename...>
+    struct contains : std::false_type{};
+
+    template <typename T1, typename T2, typename... Ts>
+    struct contains<T1, T2, Ts...> : std::integral_constant<bool, std::is_same<T1, T2>::value ||
+                                                                  contains<T1, Ts...>::value> {};
+    template<typename T, typename... Types>
+    struct contains<T, std::tuple<Types...>> : std::integral_constant<bool, contains<T, Types...>::value> {};
+
+    template <typename...>
+    struct all_unique : std::true_type{};
+
+    template <typename T1, typename... Ts>
+    struct all_unique<T1, Ts...> : std::integral_constant<bool, !contains<T1, Ts...>::value &&
+                                                                 all_unique<Ts...>::value> {};
+
+    template<typename>
+    struct tuple_wrap_helper;
+
+    template<typename T> struct tuple_wrap_helper
+    {
+        using type = std::tuple<T>;
+        static type get(T&& obj) { return std::make_tuple(std::move(obj)); }
+    };
+
+    template<typename... Objs>
+    struct tuple_wrap_helper<std::tuple<Objs...>>
+    {
+        using type = std::tuple<Objs...>;
+        static type get(std::tuple<Objs...>&& objs) { return std::forward<std::tuple<Objs...>>(objs); }
+    };
 } // namespace detail
 } // namespace cv
 
index a7e43c5..134ba66 100644 (file)
@@ -11,8 +11,8 @@
 #include <array>
 #include <type_traits>
 
-#include "opencv2/gapi/util/throw.hpp"
-#include "opencv2/gapi/util/util.hpp" // max_of_t
+#include <opencv2/gapi/util/throw.hpp>
+#include <opencv2/gapi/util/util.hpp> // max_of_t
 
 // A poor man's `variant` implementation, incompletely modeled against C++17 spec.
 namespace cv
index 154f7d3..c4e5bc6 100644 (file)
@@ -11,7 +11,7 @@
 
 
 #include "../../test/common/gapi_tests_common.hpp"
-#include "opencv2/gapi/imgproc.hpp"
+#include <opencv2/gapi/imgproc.hpp>
 
 namespace opencv_test
 {
@@ -43,5 +43,8 @@ class BGR2LUVPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCo
 class LUV2BGRPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
 class BGR2YUVPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
 class YUV2BGRPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
+class RGB2HSVPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
+class BayerGR2RGBPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
+class RGB2YUV422PerfTest  : public TestPerfParams<tuple<compare_f, cv::Size, cv::GCompileArgs>> {};
 }
 #endif //OPENCV_GAPI_IMGPROC_PERF_TESTS_HPP
index e23bbd2..3fea552 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_PERF_TESTS_INL_HPP
@@ -18,6 +18,41 @@ namespace opencv_test
 
   using namespace perf;
 
+  namespace
+  {
+      void rgb2yuyv(const uchar* rgb_line, uchar* yuv422_line, int width)
+      {
+          CV_Assert(width % 2 == 0);
+          for (int i = 0; i < width; i += 2)
+          {
+              uchar r = rgb_line[i * 3    ];
+              uchar g = rgb_line[i * 3 + 1];
+              uchar b = rgb_line[i * 3 + 2];
+
+              yuv422_line[i * 2    ] = cv::saturate_cast<uchar>(-0.14713 * r - 0.28886 * g + 0.436   * b + 128.f);  // U0
+              yuv422_line[i * 2 + 1] = cv::saturate_cast<uchar>( 0.299   * r + 0.587   * g + 0.114   * b        );  // Y0
+              yuv422_line[i * 2 + 2] = cv::saturate_cast<uchar>(0.615    * r - 0.51499 * g - 0.10001 * b + 128.f);  // V0
+
+              r = rgb_line[i * 3 + 3];
+              g = rgb_line[i * 3 + 4];
+              b = rgb_line[i * 3 + 5];
+
+              yuv422_line[i * 2 + 3] = cv::saturate_cast<uchar>(0.299 * r + 0.587   * g + 0.114   * b);   // Y1
+          }
+      }
+
+      void convertRGB2YUV422Ref(const cv::Mat& in, cv::Mat &out)
+      {
+          out.create(in.size(), CV_8UC2);
+
+          for (int i = 0; i < in.rows; ++i)
+          {
+              const uchar* in_line_p  = in.ptr<uchar>(i);
+              uchar* out_line_p = out.ptr<uchar>(i);
+              rgb2yuyv(in_line_p, out_line_p, in.cols);
+          }
+      }
+  }
 //------------------------------------------------------------------------------
 
 PERF_TEST_P_(SepFilterPerfTest, TestPerformance)
@@ -33,7 +68,7 @@ PERF_TEST_P_(SepFilterPerfTest, TestPerformance)
     cv::Mat kernelY(kernSize, 1, CV_32F);
     randu(kernelX, -1, 1);
     randu(kernelY, -1, 1);
-    initMatsRandN(type, sz, dtype, false);
+    initMatrixRandN(type, sz, dtype, false);
 
     cv::Point anchor = cv::Point(-1, -1);
 
@@ -75,7 +110,7 @@ PERF_TEST_P_(Filter2DPerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, kernSize, sz, borderType, dtype, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, dtype, false);
+    initMatrixRandN(type, sz, dtype, false);
 
     cv::Point anchor = {-1, -1};
     double delta = 0;
@@ -125,7 +160,7 @@ PERF_TEST_P_(BoxFilterPerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, filterSize, sz, borderType, dtype, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, dtype, false);
+    initMatrixRandN(type, sz, dtype, false);
 
     cv::Point anchor = {-1, -1};
     bool normalize = true;
@@ -169,7 +204,7 @@ PERF_TEST_P_(BlurPerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, filterSize, sz, borderType, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, type, false);
+    initMatrixRandN(type, sz, type, false);
 
     cv::Point anchor = {-1, -1};
 
@@ -215,7 +250,7 @@ PERF_TEST_P_(GaussianBlurPerfTest, TestPerformance)
     cv::Size kSize = cv::Size(kernSize, kernSize);
     auto& rng = cv::theRNG();
     double sigmaX = rng();
-    initMatsRandN(type, sz, type, false);
+    initMatrixRandN(type, sz, type, false);
 
     // OpenCV code ///////////////////////////////////////////////////////////
     cv::GaussianBlur(in_mat1, out_mat_ocv, kSize, sigmaX);
@@ -254,7 +289,7 @@ PERF_TEST_P_(MedianBlurPerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, kernSize, sz, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, type, false);
+    initMatrixRandN(type, sz, type, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -295,7 +330,7 @@ PERF_TEST_P_(ErodePerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, kernSize, sz, kernType,  compile_args) = GetParam();
 
-    initMatsRandN(type, sz, type, false);
+    initMatrixRandN(type, sz, type, false);
 
     cv::Mat kernel = cv::getStructuringElement(kernType, cv::Size(kernSize, kernSize));
 
@@ -338,7 +373,7 @@ PERF_TEST_P_(Erode3x3PerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, sz, numIters, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, type, false);
+    initMatrixRandN(type, sz, type, false);
 
     cv::Mat kernel = cv::getStructuringElement(cv::MorphShapes::MORPH_RECT, cv::Size(3, 3));
 
@@ -381,7 +416,7 @@ PERF_TEST_P_(DilatePerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, kernSize, sz, kernType, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, type, false);
+    initMatrixRandN(type, sz, type, false);
 
     cv::Mat kernel = cv::getStructuringElement(kernType, cv::Size(kernSize, kernSize));
 
@@ -424,7 +459,7 @@ PERF_TEST_P_(Dilate3x3PerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, sz, numIters, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, type, false);
+    initMatrixRandN(type, sz, type, false);
 
     cv::Mat kernel = cv::getStructuringElement(cv::MorphShapes::MORPH_RECT, cv::Size(3, 3));
 
@@ -467,7 +502,7 @@ PERF_TEST_P_(SobelPerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, kernSize, sz, dtype, dx, dy, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, dtype, false);
+    initMatrixRandN(type, sz, dtype, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -510,7 +545,7 @@ PERF_TEST_P_(SobelXYPerfTest, TestPerformance)
     cv::Mat out_mat_ocv2;
     cv::Mat out_mat_gapi2;
 
-    initMatsRandN(type, sz, dtype, false);
+    initMatrixRandN(type, sz, dtype, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -555,7 +590,7 @@ PERF_TEST_P_(CannyPerfTest, TestPerformance)
     cv::GCompileArgs compile_args;
     std::tie(cmpF, type, sz, thrLow, thrUp, apSize, l2gr, compile_args) = GetParam();
 
-    initMatsRandN(type, sz, CV_8UC1, false);
+    initMatrixRandN(type, sz, CV_8UC1, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -593,7 +628,7 @@ PERF_TEST_P_(EqHistPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC1, sz, CV_8UC1, false);
+    initMatrixRandN(CV_8UC1, sz, CV_8UC1, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -631,7 +666,7 @@ PERF_TEST_P_(RGB2GrayPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC1, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC1, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -669,7 +704,7 @@ PERF_TEST_P_(BGR2GrayPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC1, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC1, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -707,7 +742,7 @@ PERF_TEST_P_(RGB2YUVPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC3, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -745,7 +780,7 @@ PERF_TEST_P_(YUV2RGBPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC3, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -783,7 +818,7 @@ PERF_TEST_P_(RGB2LabPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC3, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -821,7 +856,7 @@ PERF_TEST_P_(BGR2LUVPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC3, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -859,7 +894,7 @@ PERF_TEST_P_(LUV2BGRPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC3, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -897,7 +932,7 @@ PERF_TEST_P_(BGR2YUVPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC3, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
 
     cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BGR2YUV);
 
@@ -927,7 +962,7 @@ PERF_TEST_P_(YUV2BGRPerfTest, TestPerformance)
     Size sz = get<1>(GetParam());
     cv::GCompileArgs compile_args = get<2>(GetParam());
 
-    initMatsRandN(CV_8UC3, sz, CV_8UC3, false);
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
 
     cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_YUV2BGR);
 
@@ -949,6 +984,92 @@ PERF_TEST_P_(YUV2BGRPerfTest, TestPerformance)
     SANITY_CHECK_NOTHING();
 }
 
+PERF_TEST_P_(BayerGR2RGBPerfTest, TestPerformance)
+{
+    compare_f cmpF = get<0>(GetParam());
+    Size sz = get<1>(GetParam());
+    cv::GCompileArgs compile_args = get<2>(GetParam());
+
+    initMatrixRandN(CV_8UC1, sz, CV_8UC3, false);
+
+    cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BayerGR2RGB);
+
+    cv::GMat in;
+    auto out = cv::gapi::BayerGR2RGB(in);
+    cv::GComputation c(in, out);
+
+    // Warm-up graph engine:
+    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+
+    TEST_CYCLE()
+    {
+        c.apply(in_mat1, out_mat_gapi);
+    }
+
+    EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    EXPECT_EQ(out_mat_gapi.size(), sz);
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P_(RGB2HSVPerfTest, TestPerformance)
+{
+    compare_f cmpF = get<0>(GetParam());
+    Size sz = get<1>(GetParam());
+    cv::GCompileArgs compile_args = get<2>(GetParam());
+
+    initMatrixRandN(CV_8UC3, sz, CV_8UC3, false);
+    cv::cvtColor(in_mat1, in_mat1, cv::COLOR_BGR2RGB);
+
+    cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_RGB2HSV);
+
+    cv::GMat in;
+    auto out = cv::gapi::RGB2HSV(in);
+    cv::GComputation c(in, out);
+
+    // Warm-up graph engine:
+    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+
+    TEST_CYCLE()
+    {
+        c.apply(in_mat1, out_mat_gapi);
+    }
+
+    EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    EXPECT_EQ(out_mat_gapi.size(), sz);
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P_(RGB2YUV422PerfTest, TestPerformance)
+{
+    compare_f cmpF = get<0>(GetParam());
+    Size sz = get<1>(GetParam());
+    cv::GCompileArgs compile_args = get<2>(GetParam());
+
+    initMatrixRandN(CV_8UC3, sz, CV_8UC2, false);
+    cv::cvtColor(in_mat1, in_mat1, cv::COLOR_BGR2RGB);
+
+    convertRGB2YUV422Ref(in_mat1, out_mat_ocv);
+
+    cv::GMat in;
+    auto out = cv::gapi::RGB2YUV422(in);
+    cv::GComputation c(in, out);
+
+    // Warm-up graph engine:
+    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+
+    TEST_CYCLE()
+    {
+        c.apply(in_mat1, out_mat_gapi);
+    }
+
+    EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    EXPECT_EQ(out_mat_gapi.size(), sz);
+
+    SANITY_CHECK_NOTHING();
+}
+
 //------------------------------------------------------------------------------
 
 }
index 82fabfd..4a681f9 100644 (file)
@@ -7,7 +7,7 @@
 
 #include "../perf_precomp.hpp"
 #include "../common/gapi_core_perf_tests.hpp"
-#include "opencv2/gapi/cpu/core.hpp"
+#include <opencv2/gapi/cpu/core.hpp>
 
 #define CORE_CPU cv::gapi::core::cpu::kernels()
 
index ea3d753..b2e5b3d 100644 (file)
@@ -7,8 +7,7 @@
 
 #include "../perf_precomp.hpp"
 #include "../common/gapi_imgproc_perf_tests.hpp"
-#include "opencv2/gapi/cpu/imgproc.hpp"
-
+#include <opencv2/gapi/cpu/imgproc.hpp>
 
 #define IMGPROC_CPU cv::gapi::imgproc::cpu::kernels()
 
@@ -185,4 +184,18 @@ INSTANTIATE_TEST_CASE_P(YUV2BGRPerfTestCPU, YUV2BGRPerfTest,
         Values(szVGA, sz720p, sz1080p),
         Values(cv::compile_args(IMGPROC_CPU))));
 
+INSTANTIATE_TEST_CASE_P(RGB2HSVPerfTestCPU, RGB2HSVPerfTest,
+        Combine(Values(AbsExact().to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_CPU))));
+
+INSTANTIATE_TEST_CASE_P(BayerGR2RGBPerfTestCPU, BayerGR2RGBPerfTest,
+        Combine(Values(AbsExact().to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_CPU))));
+
+INSTANTIATE_TEST_CASE_P(RGB2YUV422PerfTestCPU, RGB2YUV422PerfTest,
+        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_CPU))));
 }
index 635f2d0..6414a81 100644 (file)
@@ -173,6 +173,21 @@ INSTANTIATE_TEST_CASE_P(YUV2BGRPerfTestFluid, YUV2BGRPerfTest,
             Values(szVGA, sz720p, sz1080p),
             Values(cv::compile_args(IMGPROC_FLUID))));
 
+INSTANTIATE_TEST_CASE_P(BayerGR2RGBPerfTestFluid, BayerGR2RGBPerfTest,
+        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(RGB2YUV422PerfTestFluid, RGB2YUV422PerfTest,
+        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(RGB2HSVPerfTestFluid, RGB2HSVPerfTest,
+        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
 INSTANTIATE_TEST_CASE_P(BGR2LUVPerfTestFluid, BGR2LUVPerfTest,
     Combine(Values(AbsSimilarPoints(1, 0.05).to_compare_f()),
             Values(szVGA, sz720p, sz1080p),
index 4f9466e..bf19532 100644 (file)
 #include <cstdint>
 #include <vector>
 
-#include "opencv2/ts.hpp"
-#include "opencv2/gapi.hpp"
-#include "opencv2/gapi/imgproc.hpp"
-#include "opencv2/gapi/core.hpp"
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
-#include "opencv2/gapi/gpu/ggpukernel.hpp"
-#include "opencv2/gapi/gpu/imgproc.hpp"
-#include "opencv2/gapi/gpu/core.hpp"
-#include "opencv2/gapi/operators.hpp"
+#include <opencv2/ts.hpp>
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <opencv2/gapi/gpu/ggpukernel.hpp>
+#include <opencv2/gapi/gpu/imgproc.hpp>
+#include <opencv2/gapi/gpu/core.hpp>
+#include <opencv2/gapi/operators.hpp>
 
-#include "opencv2/gapi/fluid/core.hpp"
-#include "opencv2/gapi/fluid/imgproc.hpp"
+#include <opencv2/gapi/fluid/core.hpp>
+#include <opencv2/gapi/fluid/imgproc.hpp>
 
 #endif // __OPENCV_GAPI_PERF_PRECOMP_HPP__
index 5e8859d..2793aee 100644 (file)
@@ -49,8 +49,7 @@ int main(int argc, char *argv[])
     //! [apply_with_param]
     cv::gapi::GKernelPackage kernels = cv::gapi::combine
         (cv::gapi::core::fluid::kernels(),
-         cv::gapi::imgproc::fluid::kernels(),
-         cv::unite_policy::KEEP);
+         cv::gapi::imgproc::fluid::kernels());
     sobelEdge.apply(input, output, cv::compile_args(kernels));
     //! [apply_with_param]
 
index 793f44a..bdd46b9 100644 (file)
@@ -6,7 +6,7 @@
 
 
 #include "precomp.hpp"
-#include "opencv2/gapi/garray.hpp"
+#include <opencv2/gapi/garray.hpp>
 #include "api/gorigin.hpp"
 
 // cv::detail::GArrayU public implementation ///////////////////////////////////
index 3dfd2ef..43227bf 100644 (file)
@@ -8,8 +8,8 @@
 #include "precomp.hpp"
 #include <memory> // unique_ptr
 
-#include "opencv2/gapi/gkernel.hpp"
-#include "opencv2/gapi/own/convert.hpp"
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/own/convert.hpp>
 
 #include "api/gbackend_priv.hpp"
 #include "backends/common/gbackend.hpp"
@@ -45,6 +45,11 @@ void cv::gapi::GBackend::Priv::addBackendPasses(ade::ExecutionEngineSetupContext
     // add custom (backend-specific) graph transformations
 }
 
+cv::gapi::GKernelPackage cv::gapi::GBackend::Priv::auxiliaryKernels() const
+{
+    return {};
+}
+
 // GBackend public implementation //////////////////////////////////////////////
 cv::gapi::GBackend::GBackend()
 {
@@ -98,7 +103,7 @@ void bindInArg(Mag& mag, const RcDesc &rc, const GRunArg &arg, bool is_umat)
                 auto& mag_umat = mag.template slot<cv::UMat>()[rc.id];
                 mag_umat = to_ocv(util::get<cv::gapi::own::Mat>(arg)).getUMat(ACCESS_READ);
 #else
-                util::throw_error(std::logic_error("UMat is not supported in stadnalone build"));
+                util::throw_error(std::logic_error("UMat is not supported in standalone build"));
 #endif // !defined(GAPI_STANDALONE)
             }
             else
index 1c6e297..afc77b2 100644 (file)
@@ -47,6 +47,8 @@ public:
 
     virtual void addBackendPasses(ade::ExecutionEngineSetupContext &);
 
+    virtual cv::gapi::GKernelPackage auxiliaryKernels() const;
+
     virtual ~Priv() = default;
 };
 
index 51de047..4c052ff 100644 (file)
@@ -7,7 +7,7 @@
 
 #include "precomp.hpp"
 #include <cassert>
-#include "opencv2/gapi/gcall.hpp"
+#include <opencv2/gapi/gcall.hpp>
 #include "api/gcall_priv.hpp"
 
 // GCall private implementation ////////////////////////////////////////////////
index ab761ed..64a4495 100644 (file)
@@ -12,8 +12,8 @@
 
 #include "logger.hpp" // GAPI_LOG
 
-#include "opencv2/gapi/gcomputation.hpp"
-#include "opencv2/gapi/gkernel.hpp"
+#include <opencv2/gapi/gcomputation.hpp>
+#include <opencv2/gapi/gkernel.hpp>
 
 #include "api/gcomputation_priv.hpp"
 #include "api/gcall_priv.hpp"
@@ -159,16 +159,14 @@ void cv::GComputation::apply(cv::Mat in1, cv::Mat in2, cv::Scalar &out, GCompile
 }
 
 void cv::GComputation::apply(const std::vector<cv::Mat> &ins,
-                             const std::vector<cv::Mat> &outs,
+                                   std::vector<cv::Mat> &outs,
                              GCompileArgs &&args)
 {
     GRunArgs call_ins;
     GRunArgsP call_outs;
 
-    // Make a temporary copy of vector outs - cv::Mats are copies anyway
-    auto tmp = outs;
-    for (const cv::Mat &m : ins) { call_ins.emplace_back(m);   }
-    for (      cv::Mat &m : tmp) { call_outs.emplace_back(&m); }
+    for (const cv::Mat &m : ins)  { call_ins.emplace_back(m);   }
+    for (      cv::Mat &m : outs) { call_outs.emplace_back(&m); }
 
     apply(std::move(call_ins), std::move(call_outs), std::move(args));
 }
index f8c851a..6993e95 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
 #include <ade/util/algorithm.hpp>
 
 #include "logger.hpp"
-#include "opencv2/gapi/gkernel.hpp"
+#include <opencv2/gapi/gkernel.hpp>
 
 #include "api/gbackend_priv.hpp"
 
 // GKernelPackage public implementation ////////////////////////////////////////
 void cv::gapi::GKernelPackage::remove(const cv::gapi::GBackend& backend)
 {
-    m_backend_kernels.erase(backend);
+    std::vector<std::string> id_deleted_kernels;
+    for (const auto& p : m_id_kernels)
+    {
+        if (p.second.first == backend)
+        {
+            id_deleted_kernels.push_back(p.first);
+        }
+    }
+
+    for (const auto& kernel_id : id_deleted_kernels)
+    {
+        m_id_kernels.erase(kernel_id);
+    }
 }
 
 bool cv::gapi::GKernelPackage::includesAPI(const std::string &id) const
 {
-    // In current form not very efficient (n * log n)
-    auto it = std::find_if(m_backend_kernels.begin(),
-                           m_backend_kernels.end(),
-                           [&id](const M::value_type &p) {
-                               return ade::util::contains(p.second, id);
-                           });
-    return (it != m_backend_kernels.end());
+    return ade::util::contains(m_id_kernels, id);
 }
 
 void cv::gapi::GKernelPackage::removeAPI(const std::string &id)
 {
-    for (auto &bk : m_backend_kernels)
-        bk.second.erase(id);
+    m_id_kernels.erase(id);
 }
 
 std::size_t cv::gapi::GKernelPackage::size() const
 {
-    return std::accumulate(m_backend_kernels.begin(),
-                           m_backend_kernels.end(),
-                           static_cast<std::size_t>(0u),
-                           [](std::size_t acc, const M::value_type& v) {
-                               return acc + v.second.size();
-                           });
+    return m_id_kernels.size();
+}
+
+const std::vector<cv::GTransform> &cv::gapi::GKernelPackage::get_transformations() const
+{
+    return m_transformations;
 }
 
 cv::gapi::GKernelPackage cv::gapi::combine(const GKernelPackage  &lhs,
-                                           const GKernelPackage  &rhs,
-                                           const cv::unite_policy policy)
+                                           const GKernelPackage  &rhs)
 {
 
-    if (policy == cv::unite_policy::REPLACE)
-    {
-        // REPLACE policy: if there is a collision, prefer RHS
-        // to LHS
+        // If there is a collision, prefer RHS to LHS
         // since RHS package has a precedense, start with its copy
         GKernelPackage result(rhs);
         // now iterate over LHS package and put kernel if and only
         // if there's no such one
-        for (const auto &backend : lhs.m_backend_kernels)
+        for (const auto& kernel : lhs.m_id_kernels)
         {
-            for (const auto &kimpl : backend.second)
+            if (!result.includesAPI(kernel.first))
             {
-                if (!result.includesAPI(kimpl.first))
-                    result.m_backend_kernels[backend.first].insert(kimpl);
+                result.m_id_kernels.emplace(kernel.first, kernel.second);
             }
         }
-        return result;
-    }
-    else if (policy == cv::unite_policy::KEEP)
-    {
-        // KEEP policy: if there is a collision, just keep two versions
-        // of a kernel
-        GKernelPackage result(lhs);
-        for (const auto &p : rhs.m_backend_kernels)
-        {
-            result.m_backend_kernels[p.first].insert(p.second.begin(),
-                                                     p.second.end());
+        for (const auto &transforms : lhs.m_transformations){
+            result.m_transformations.push_back(transforms);
         }
         return result;
-    }
-    else GAPI_Assert(false);
-    return GKernelPackage();
 }
 
 std::pair<cv::gapi::GBackend, cv::GKernelImpl>
-cv::gapi::GKernelPackage::lookup(const std::string &id,
-                                 const GLookupOrder &order) const
+cv::gapi::GKernelPackage::lookup(const std::string &id) const
 {
-    if (order.empty())
+    auto kernel_it = m_id_kernels.find(id);
+    if (kernel_it != m_id_kernels.end())
     {
-        // If order is empty, return what comes first
-        auto it = std::find_if(m_backend_kernels.begin(),
-                               m_backend_kernels.end(),
-                               [&id](const M::value_type &p) {
-                                   return ade::util::contains(p.second, id);
-                               });
-        if (it != m_backend_kernels.end())
-        {
-            // FIXME: Two lookups!
-            return std::make_pair(it->first, it->second.find(id)->second);
-        }
-    }
-    else
-    {
-        // There is order, so:
-        // 1. Limit search scope only to specified backends
-        //    FIXME: Currently it is not configurable if search can fall-back
-        //    to other backends (not listed in order) if kernel hasn't been found
-        //    in the look-up list
-        // 2. Query backends in the specified order
-        for (const auto &selected_backend : order)
-        {
-            const auto kernels_it = m_backend_kernels.find(selected_backend);
-            if (kernels_it == m_backend_kernels.end())
-            {
-                GAPI_LOG_WARNING(NULL,
-                                 "Backend "
-                                  << &selected_backend.priv() // FIXME: name instead
-                                  << " was listed in lookup list but was not found "
-                                     "in the package");
-                continue;
-            }
-            if (ade::util::contains(kernels_it->second, id))
-            {
-                // FIXME: two lookups!
-                return std::make_pair(selected_backend, kernels_it->second.find(id)->second);
-            }
-        }
+        return kernel_it->second;
     }
-
-    // If reached here, kernel was not found among selected backends.
+    // If reached here, kernel was not found.
     util::throw_error(std::logic_error("Kernel " + id + " was not found"));
 }
 
 std::vector<cv::gapi::GBackend> cv::gapi::GKernelPackage::backends() const
 {
-    std::vector<cv::gapi::GBackend> result;
-    for (const auto &p : m_backend_kernels) result.emplace_back(p.first);
-    return result;
+    using kernel_type = std::pair<std::string, std::pair<cv::gapi::GBackend, cv::GKernelImpl>>;
+    std::unordered_set<cv::gapi::GBackend> unique_set;
+    ade::util::transform(m_id_kernels, std::inserter(unique_set, unique_set.end()),
+                                       [](const kernel_type& k) { return k.second.first; });
+
+    return std::vector<cv::gapi::GBackend>(unique_set.begin(), unique_set.end());
 }
index 1466a26..5bc55ef 100644 (file)
@@ -8,8 +8,8 @@
 #include "precomp.hpp"
 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/own/mat.hpp> //gapi::own::Mat
+#include <opencv2/gapi/gmat.hpp>
 
-#include "opencv2/gapi/gmat.hpp"
 #include "api/gorigin.hpp"
 
 // cv::GMat public implementation //////////////////////////////////////////////
index ad4ebf5..7129b2f 100644 (file)
 #include <set>   // set
 #include <map>   // map
 
-#include "opencv2/gapi/util/variant.hpp"   // variant
-#include "opencv2/gapi/gcommon.hpp"
-#include "opencv2/gapi/opencv_includes.hpp"
-#include "compiler/gobjref.hpp"
+#include <opencv2/gapi/util/variant.hpp>   // variant
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/opencv_includes.hpp>
 
+#include "compiler/gobjref.hpp"
 #include "api/gnode.hpp"
 
 namespace cv
index af5ba56..1106af9 100644 (file)
@@ -8,9 +8,9 @@
 #include "precomp.hpp"
 
 #include <ade/util/algorithm.hpp>
-#include "opencv2/gapi/util/throw.hpp"
-#include "opencv2/gapi/garg.hpp"
-#include "opencv2/gapi/gproto.hpp"
+#include <opencv2/gapi/util/throw.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gproto.hpp>
 
 #include "api/gorigin.hpp"
 #include "api/gproto_priv.hpp"
index 2e42ae6..fa7c0cd 100644 (file)
@@ -7,8 +7,8 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/own/convert.hpp"
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/own/convert.hpp>
 #include "api/gorigin.hpp"
 
 // cv::GScalar public implementation ///////////////////////////////////////////
index e46968b..b7bfc6b 100644 (file)
@@ -7,10 +7,10 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/gcall.hpp"
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/gkernel.hpp"
-#include "opencv2/gapi/core.hpp"
+#include <opencv2/gapi/gcall.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/core.hpp>
 
 #include <tuple>
 #include <numeric>
@@ -301,6 +301,11 @@ GMat resize(const GMat& src, const Size& dsize, double fx, double fy, int interp
     return core::GResize::on(src, dsize, fx, fy, interpolation);
 }
 
+GMatP resizeP(const GMatP& src, const Size& dsize, int interpolation)
+{
+    return core::GResizeP::on(src, dsize, interpolation);
+}
+
 GMat remap(const GMat& src, const Mat& map1, const Mat& map2,
            int interpolation, int borderMode,
            const Scalar& borderValue)
index b24af8c..05d2cc4 100644 (file)
@@ -7,10 +7,10 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/gcall.hpp"
-#include "opencv2/gapi/gkernel.hpp"
-#include "opencv2/gapi/imgproc.hpp"
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/gcall.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/imgproc.hpp>
 
 namespace cv { namespace gapi {
 
@@ -157,5 +157,27 @@ GMat RGB2Lab(const GMat& src)
     return imgproc::GRGB2Lab::on(src);
 }
 
+GMat BayerGR2RGB(const GMat& src_gr) {
+    return imgproc::GBayerGR2RGB::on(src_gr);
+}
+
+GMat RGB2HSV(const GMat& src) {
+    return imgproc::GRGB2HSV::on(src);
+}
+
+GMat RGB2YUV422(const GMat& src) {
+    return imgproc::GRGB2YUV422::on(src);
+}
+
+GMatP NV12toRGBp(const GMat &y, const GMat &uv)
+{
+    return imgproc::GNV12toRGBp::on(y, uv);
+}
+
+GMatP NV12toBGRp(const GMat &y, const GMat &uv)
+{
+    return imgproc::GNV12toBGRp::on(y, uv);
+}
+
 } //namespace gapi
 } //namespace cv
index 44fc4fa..6097c01 100644 (file)
@@ -7,10 +7,10 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/imgproc.hpp"
-#include "opencv2/gapi/core.hpp"
-#include "opencv2/gapi/gscalar.hpp"
-#include "opencv2/gapi/operators.hpp"
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/operators.hpp>
 
 cv::GMat operator+(const cv::GMat& lhs, const cv::GMat& rhs)
 {
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/render.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/render.cpp
new file mode 100644 (file)
index 0000000..b087c40
--- /dev/null
@@ -0,0 +1,91 @@
+#include <opencv2/imgproc.hpp>
+
+#include "opencv2/gapi/render.hpp"
+#include "opencv2/gapi/own/assert.hpp"
+
+#include "api/render_priv.hpp"
+
+using namespace cv::gapi::wip::draw;
+// FXIME util::visitor ?
+void cv::gapi::wip::draw::render(cv::Mat& bgr, const Prims& prims)
+{
+    for (const auto& p : prims)
+    {
+        switch (p.index())
+        {
+            case Prim::index_of<Rect>():
+            {
+                const auto& t_p = cv::util::get<Rect>(p);
+                cv::rectangle(bgr, t_p.rect, t_p.color , t_p.thick, t_p.lt, t_p.shift);
+                break;
+            }
+
+            case Prim::index_of<Text>():
+            {
+                const auto& t_p = cv::util::get<Text>(p);
+                cv::putText(bgr, t_p.text, t_p.org, t_p.ff, t_p.fs,
+                            t_p.color, t_p.thick, t_p.lt, t_p.bottom_left_origin);
+                break;
+            }
+
+            case Prim::index_of<Circle>():
+            {
+                const auto& c_p = cv::util::get<Circle>(p);
+                cv::circle(bgr, c_p.center, c_p.radius, c_p.color, c_p.thick, c_p.lt, c_p.shift);
+                break;
+            }
+
+            case Prim::index_of<Line>():
+            {
+                const auto& l_p = cv::util::get<Line>(p);
+                cv::line(bgr, l_p.pt1, l_p.pt2, l_p.color, l_p.thick, l_p.lt, l_p.shift);
+                break;
+            }
+
+            default: util::throw_error(std::logic_error("Unsupported draw operation"));
+        }
+    }
+}
+
+void cv::gapi::wip::draw::render(cv::Mat& y_plane, cv::Mat& uv_plane , const Prims& prims)
+{
+    cv::Mat bgr;
+    cv::cvtColorTwoPlane(y_plane, uv_plane, bgr, cv::COLOR_YUV2BGR_NV12);
+    render(bgr, prims);
+    BGR2NV12(bgr, y_plane, uv_plane);
+}
+
+void cv::gapi::wip::draw::splitNV12TwoPlane(const cv::Mat& yuv, cv::Mat& y_plane, cv::Mat& uv_plane) {
+    y_plane.create(yuv.size(),      CV_8UC1);
+    uv_plane.create(yuv.size() / 2, CV_8UC2);
+
+    // Fill Y plane
+    for (int i = 0; i < yuv.rows; ++i)
+    {
+        const uchar* in  = yuv.ptr<uchar>(i);
+        uchar* out       = y_plane.ptr<uchar>(i);
+        for (int j = 0; j < yuv.cols; j++) {
+            out[j] = in[3 * j];
+        }
+    }
+
+    // Fill UV plane
+    for (int i = 0; i < uv_plane.rows; i++)
+    {
+        const uchar* in = yuv.ptr<uchar>(2 * i);
+        uchar* out      = uv_plane.ptr<uchar>(i);
+        for (int j = 0; j < uv_plane.cols; j++) {
+            out[j * 2    ] = in[6 * j + 1];
+            out[j * 2 + 1] = in[6 * j + 2];
+        }
+    }
+}
+
+void cv::gapi::wip::draw::BGR2NV12(const cv::Mat& bgr, cv::Mat& y_plane, cv::Mat& uv_plane)
+{
+    GAPI_Assert(bgr.size().width  % 2 == 0);
+    GAPI_Assert(bgr.size().height % 2 == 0);
+
+    cvtColor(bgr, bgr, cv::COLOR_BGR2YUV);
+    splitNV12TwoPlane(bgr, y_plane, uv_plane);
+}
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/render_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/render_priv.hpp
new file mode 100644 (file)
index 0000000..29805ea
--- /dev/null
@@ -0,0 +1,30 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_RENDER_PRIV_HPP
+#define OPENCV_RENDER_PRIV_HPP
+
+#include <opencv2/gapi/render.hpp>
+
+namespace cv
+{
+namespace gapi
+{
+namespace wip
+{
+namespace draw
+{
+// FIXME only for tests
+GAPI_EXPORTS void BGR2NV12(const cv::Mat& bgr, cv::Mat& y_plane, cv::Mat& uv_plane);
+void splitNV12TwoPlane(const cv::Mat& yuv, cv::Mat& y_plane, cv::Mat& uv_plane);
+
+} // namespace draw
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_RENDER_PRIV_HPP
index 948898f..894c49e 100644 (file)
@@ -7,7 +7,7 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/gcompoundkernel.hpp" // compound::backend()
+#include <opencv2/gapi/gcompoundkernel.hpp> // compound::backend()
 
 #include "api/gbackend_priv.hpp"
 #include "compiler/gislandmodel.hpp" // GIslandExecutable
index 89abcef..05ed51d 100644 (file)
@@ -8,7 +8,7 @@
 #include "precomp.hpp"
 
 #include <ade/util/zip_range.hpp>   // util::indexed
-#include "opencv2/gapi/gcompoundkernel.hpp"
+#include <opencv2/gapi/gcompoundkernel.hpp>
 #include "compiler/gobjref.hpp"
 
 // FIXME move to backends
index 8924e3d..cfdb7ae 100644 (file)
 
 #include <ade/typed_graph.hpp>
 
-#include "opencv2/gapi/gcommon.hpp"
-#include "opencv2/gapi/util/any.hpp"
-#include "opencv2/gapi/gtype_traits.hpp"
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/util/any.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
 
 #include "compiler/gobjref.hpp"
 #include "compiler/gmodel.hpp"
 
 #include "backends/cpu/gcpubackend.hpp"
-#include "backends/cpu/gcpuimgproc.hpp"
-#include "backends/cpu/gcpucore.hpp"
+#include <opencv2/gapi/cpu/imgproc.hpp>
+#include <opencv2/gapi/cpu/core.hpp>
 
 #include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
 
index 218cac1..923a05e 100644 (file)
@@ -13,9 +13,9 @@
 #include <tuple>              // tuple
 #include <ade/util/algorithm.hpp> // type_list_index
 
-#include "opencv2/gapi/garg.hpp"
-#include "opencv2/gapi/gproto.hpp"
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
 
 #include "api/gorigin.hpp"
 #include "backends/common/gbackend.hpp"
index cec9abc..1c1ac58 100644 (file)
@@ -7,9 +7,9 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/core.hpp"
-#include "opencv2/gapi/cpu/core.hpp"
-#include "backends/cpu/gcpucore.hpp"
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/cpu/core.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
 
 GAPI_OCV_KERNEL(GCPUAdd, cv::gapi::core::GAdd)
 {
@@ -461,6 +461,22 @@ GAPI_OCV_KERNEL(GCPUResize, cv::gapi::core::GResize)
     }
 };
 
+GAPI_OCV_KERNEL(GCPUResizeP, cv::gapi::core::GResizeP)
+{
+    static void run(const cv::Mat& in, cv::Size out_sz, int interp, cv::Mat& out)
+    {
+        int inH = in.rows / 3;
+        int inW = in.cols;
+        int outH = out.rows / 3;
+        int outW = out.cols;
+        for (int i = 0; i < 3; i++) {
+            auto in_plane = in(cv::Rect(0, i*inH, inW, inH));
+            auto out_plane = out(cv::Rect(0, i*outH, outW, outH));
+            cv::resize(in_plane, out_plane, out_sz, 0, 0, interp);
+        }
+    }
+};
+
 GAPI_OCV_KERNEL(GCPURemap, cv::gapi::core::GRemap)
 {
     static void run(const cv::Mat& in, const cv::Mat& x, const cv::Mat& y, int a, int b, cv::Scalar s, cv::Mat& out)
@@ -589,6 +605,7 @@ cv::gapi::GKernelPackage cv::gapi::core::cpu::kernels()
          , GCPUSplit3
          , GCPUSplit4
          , GCPUResize
+         , GCPUResizeP
          , GCPUMerge3
          , GCPUMerge4
          , GCPURemap
index ab5d5d8..cab0520 100644 (file)
@@ -7,9 +7,11 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/imgproc.hpp"
-#include "opencv2/gapi/cpu/imgproc.hpp"
-#include "backends/cpu/gcpuimgproc.hpp"
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/cpu/imgproc.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+
+#include "backends/fluid/gfluidimgproc_func.hpp"
 
 namespace {
     cv::Mat add_border(const cv::Mat& in, const int ksize, const int borderType, const cv::Scalar& bordVal){
@@ -276,6 +278,74 @@ GAPI_OCV_KERNEL(GCPURGB2GrayCustom, cv::gapi::imgproc::GRGB2GrayCustom)
     }
 };
 
+GAPI_OCV_KERNEL(GCPUBayerGR2RGB, cv::gapi::imgproc::GBayerGR2RGB)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_BayerGR2RGB);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPURGB2HSV, cv::gapi::imgproc::GRGB2HSV)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_RGB2HSV);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPURGB2YUV422, cv::gapi::imgproc::GRGB2YUV422)
+{
+    static void run(const cv::Mat& in, cv::Mat &out)
+    {
+        out.create(in.size(), CV_8UC2);
+
+        for (int i = 0; i < in.rows; ++i)
+        {
+            const uchar* in_line_p  = in.ptr<uchar>(i);
+            uchar* out_line_p = out.ptr<uchar>(i);
+            cv::gapi::fluid::run_rgb2yuv422_impl(out_line_p, in_line_p, in.cols);
+        }
+    }
+};
+
+static void toPlanar(const cv::Mat& in, cv::Mat& out)
+{
+    GAPI_Assert(out.depth() == in.depth());
+    GAPI_Assert(out.channels() == 1);
+    GAPI_Assert(in.channels() == 3);
+    GAPI_Assert(out.cols == in.cols);
+    GAPI_Assert(out.rows == 3*in.rows);
+
+    std::vector<cv::Mat> outs(3);
+    for (int i = 0; i < 3; i++) {
+        outs[i] = out(cv::Rect(0, i*in.rows, in.cols, in.rows));
+    }
+    cv::split(in, outs);
+}
+
+
+GAPI_OCV_KERNEL(GCPUNV12toRGBp, cv::gapi::imgproc::GNV12toRGBp)
+{
+    static void run(const cv::Mat& inY, const cv::Mat& inUV, cv::Mat& out)
+    {
+        cv::Mat rgb;
+        cv::cvtColorTwoPlane(inY, inUV, rgb, cv::COLOR_YUV2RGB_NV12);
+        toPlanar(rgb, out);
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUNV12toBGRp, cv::gapi::imgproc::GNV12toBGRp)
+{
+    static void run(const cv::Mat& inY, const cv::Mat& inUV, cv::Mat& out)
+    {
+        cv::Mat rgb;
+        cv::cvtColorTwoPlane(inY, inUV, rgb, cv::COLOR_YUV2BGR_NV12);
+        toPlanar(rgb, out);
+    }
+};
+
+
 cv::gapi::GKernelPackage cv::gapi::imgproc::cpu::kernels()
 {
     static auto pkg = cv::gapi::kernels
@@ -303,6 +373,11 @@ cv::gapi::GKernelPackage cv::gapi::imgproc::cpu::kernels()
         , GCPUBGR2Gray
         , GCPURGB2Gray
         , GCPURGB2GrayCustom
+        , GCPUBayerGR2RGB
+        , GCPURGB2HSV
+        , GCPURGB2YUV422
+        , GCPUNV12toRGBp
+        , GCPUNV12toBGRp
         >();
     return pkg;
 }
index af13eed..5bc77aa 100644 (file)
@@ -9,7 +9,7 @@
 
 #include <cassert>
 
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
 
 const cv::gapi::own::Mat& cv::GCPUContext::inMat(int input)
 {
index 6d78edb..1321599 100644 (file)
 #include <ade/typed_graph.hpp>
 #include <ade/execution_engine/execution_engine.hpp>
 
-#include "opencv2/gapi/gcommon.hpp"
+#include <opencv2/gapi/gcommon.hpp>
 #include "logger.hpp"
 
-#include "opencv2/gapi/own/convert.hpp"
-#include "opencv2/gapi/gmat.hpp"    //for version of descr_of
+#include <opencv2/gapi/own/convert.hpp>
+#include <opencv2/gapi/gmat.hpp>    //for version of descr_of
 // PRIVATE STUFF!
 #include "compiler/gobjref.hpp"
 #include "compiler/gmodel.hpp"
@@ -91,7 +91,21 @@ namespace
                 cv::util::throw_error(std::logic_error("GFluidOutputRois feature supports only one-island graphs"));
 
             auto rois = out_rois.value_or(cv::GFluidOutputRois());
-            return EPtr{new cv::gimpl::GFluidExecutable(graph, nodes, std::move(rois.rois))};
+
+            auto graph_data = fluidExtractInputDataFromGraph(graph, nodes);
+            const auto parallel_out_rois = cv::gimpl::getCompileArg<cv::GFluidParallelOutputRois>(args);
+            const auto gpfor             = cv::gimpl::getCompileArg<cv::GFluidParallelFor>(args);
+
+            auto serial_for = [](std::size_t count, std::function<void(std::size_t)> f){
+                for (std::size_t i  = 0; i < count; ++i){
+                    f(i);
+                }
+            };
+            auto pfor  = gpfor.has_value() ? gpfor.value().parallel_for : serial_for;
+            return parallel_out_rois.has_value() ?
+                       EPtr{new cv::gimpl::GParallelFluidExecutable (graph, graph_data, std::move(parallel_out_rois.value().parallel_rois), pfor)}
+                     : EPtr{new cv::gimpl::GFluidExecutable         (graph, graph_data, std::move(rois.rois))}
+            ;
         }
 
         virtual void addBackendPasses(ade::ExecutionEngineSetupContext &ectx) override;
@@ -700,27 +714,31 @@ void cv::gimpl::GFluidExecutable::initBufferRois(std::vector<int>& readStarts,
     } // while (!nodesToVisit.empty())
 }
 
-cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph &g,
-                                              const std::vector<ade::NodeHandle> &nodes,
-                                              const std::vector<cv::gapi::own::Rect> &outputRois)
-    : m_g(g), m_gm(m_g)
+cv::gimpl::FluidGraphInputData cv::gimpl::fluidExtractInputDataFromGraph(const ade::Graph &g, const std::vector<ade::NodeHandle> &nodes)
 {
-    GConstFluidModel fg(m_g);
+    decltype(FluidGraphInputData::m_agents_data)       agents_data;
+    decltype(FluidGraphInputData::m_scratch_users)     scratch_users;
+    decltype(FluidGraphInputData::m_id_map)            id_map;
+    decltype(FluidGraphInputData::m_all_gmat_ids)      all_gmat_ids;
+    std::size_t                                        mat_count = 0;
+
+    GConstFluidModel fg(g);
+    GModel::ConstGraph m_gm(g);
 
     // Initialize vector of data buffers, build list of operations
     // FIXME: There _must_ be a better way to [query] count number of DATA nodes
-    std::size_t mat_count = 0;
-    std::size_t last_agent = 0;
 
     auto grab_mat_nh = [&](ade::NodeHandle nh) {
         auto rc = m_gm.metadata(nh).get<Data>().rc;
-        if (m_id_map.count(rc) == 0)
+        if (id_map.count(rc) == 0)
         {
-            m_all_gmat_ids[mat_count] = nh;
-            m_id_map[rc] = mat_count++;
+            all_gmat_ids[mat_count] = nh;
+            id_map[rc] = mat_count++;
         }
     };
 
+    std::size_t last_agent = 0;
+
     for (const auto &nh : nodes)
     {
         switch (m_gm.metadata(nh).get<NodeType>().t)
@@ -733,15 +751,10 @@ cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph &g,
         case NodeType::OP:
         {
             const auto& fu = fg.metadata(nh).get<FluidUnit>();
-            switch (fu.k.m_kind)
-            {
-            case GFluidKernel::Kind::Filter:    m_agents.emplace_back(new FluidFilterAgent(m_g, nh));    break;
-            case GFluidKernel::Kind::Resize:    m_agents.emplace_back(new FluidResizeAgent(m_g, nh));    break;
-            case GFluidKernel::Kind::NV12toRGB: m_agents.emplace_back(new FluidNV12toRGBAgent(m_g, nh)); break;
-            default: GAPI_Assert(false);
-            }
+
+            agents_data.push_back({fu.k.m_kind, nh, {}, {}});
             // NB.: in_buffer_ids size is equal to Arguments size, not Edges size!!!
-            m_agents.back()->in_buffer_ids.resize(m_gm.metadata(nh).get<Op>().args.size(), -1);
+            agents_data.back().in_buffer_ids.resize(m_gm.metadata(nh).get<Op>().args.size(), -1);
             for (auto eh : nh->inEdges())
             {
                 // FIXME Only GMats are currently supported (which can be represented
@@ -751,23 +764,23 @@ cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph &g,
                     const auto in_port = m_gm.metadata(eh).get<Input>().port;
                     const int  in_buf  = m_gm.metadata(eh->srcNode()).get<Data>().rc;
 
-                    m_agents.back()->in_buffer_ids[in_port] = in_buf;
+                    agents_data.back().in_buffer_ids[in_port] = in_buf;
                     grab_mat_nh(eh->srcNode());
                 }
             }
             // FIXME: Assumption that all operation outputs MUST be connected
-            m_agents.back()->out_buffer_ids.resize(nh->outEdges().size(), -1);
+            agents_data.back().out_buffer_ids.resize(nh->outEdges().size(), -1);
             for (auto eh : nh->outEdges())
             {
                 const auto& data = m_gm.metadata(eh->dstNode()).get<Data>();
                 const auto out_port = m_gm.metadata(eh).get<Output>().port;
                 const int  out_buf  = data.rc;
 
-                m_agents.back()->out_buffer_ids[out_port] = out_buf;
+                agents_data.back().out_buffer_ids[out_port] = out_buf;
                 if (data.shape == GShape::GMAT) grab_mat_nh(eh->dstNode());
             }
             if (fu.k.m_scratch)
-                m_scratch_users.push_back(last_agent);
+                scratch_users.push_back(last_agent);
             last_agent++;
             break;
         }
@@ -776,12 +789,50 @@ cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph &g,
     }
 
     // Check that IDs form a continiuos set (important for further indexing)
-    GAPI_Assert(m_id_map.size() >  0);
-    GAPI_Assert(m_id_map.size() == static_cast<size_t>(mat_count));
+    GAPI_Assert(id_map.size() >  0);
+    GAPI_Assert(id_map.size() == static_cast<size_t>(mat_count));
+
+    return FluidGraphInputData {std::move(agents_data), std::move(scratch_users), std::move(id_map), std::move(all_gmat_ids), mat_count};
+}
+
+cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph                       &g,
+                                              const cv::gimpl::FluidGraphInputData   &traverse_res,
+                                              const std::vector<cv::gapi::own::Rect> &outputRois)
+    : m_g(g), m_gm(m_g)
+{
+    GConstFluidModel fg(m_g);
+
+    auto tie_traverse_res = [&traverse_res](){
+        auto& r = traverse_res;
+        return std::tie(r.m_scratch_users, r.m_id_map, r.m_all_gmat_ids, r.m_mat_count);
+    };
+
+    auto tie_this   =  [this](){
+        return std::tie(m_scratch_users, m_id_map, m_all_gmat_ids, m_num_int_buffers);
+    };
+
+    tie_this() = tie_traverse_res();
+
+    auto create_fluid_agent = [&g](agent_data_t const& agent_data) -> std::unique_ptr<FluidAgent> {
+        std::unique_ptr<FluidAgent> agent_ptr;
+        switch (agent_data.kind)
+        {
+            case GFluidKernel::Kind::Filter:    agent_ptr.reset(new FluidFilterAgent(g, agent_data.nh));      break;
+            case GFluidKernel::Kind::Resize:    agent_ptr.reset(new FluidResizeAgent(g, agent_data.nh));      break;
+            case GFluidKernel::Kind::NV12toRGB: agent_ptr.reset(new FluidNV12toRGBAgent(g, agent_data.nh));   break;
+            default: GAPI_Assert(false);
+        }
+        std::tie(agent_ptr->in_buffer_ids, agent_ptr->out_buffer_ids) = std::tie(agent_data.in_buffer_ids, agent_data.out_buffer_ids);
+        return agent_ptr;
+    };
+
+    for (auto const& agent_data : traverse_res.m_agents_data){
+        m_agents.push_back(create_fluid_agent(agent_data));
+    }
 
     // Actually initialize Fluid buffers
-    GAPI_LOG_INFO(NULL, "Initializing " << mat_count << " fluid buffer(s)" << std::endl);
-    m_num_int_buffers = mat_count;
+    GAPI_LOG_INFO(NULL, "Initializing " << m_num_int_buffers << " fluid buffer(s)" << std::endl);
+
     const std::size_t num_scratch = m_scratch_users.size();
     m_buffers.resize(m_num_int_buffers + num_scratch);
 
@@ -847,6 +898,12 @@ cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph &g,
 
     makeReshape(outputRois);
 
+    GAPI_LOG_INFO(NULL, "Internal buffers: " << std::fixed << std::setprecision(2) << static_cast<float>(total_buffers_size())/1024 << " KB\n");
+}
+
+std::size_t cv::gimpl::GFluidExecutable::total_buffers_size() const
+{
+    GConstFluidModel fg(m_g);
     std::size_t total_size = 0;
     for (const auto &i : ade::util::indexed(m_buffers))
     {
@@ -854,7 +911,7 @@ cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph &g,
         const auto idx = ade::util::index(i);
         const auto b   = ade::util::value(i);
         if (idx >= m_num_int_buffers ||
-            fg.metadata(m_all_gmat_ids[idx]).get<FluidData>().internal == true)
+            fg.metadata(m_all_gmat_ids.at(idx)).get<FluidData>().internal == true)
         {
             GAPI_Assert(b.priv().size() > 0);
         }
@@ -863,7 +920,7 @@ cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph &g,
         // (There can be non-zero sized const border buffer allocated in such buffers)
         total_size += b.priv().size();
     }
-    GAPI_LOG_INFO(NULL, "Internal buffers: " << std::fixed << std::setprecision(2) << static_cast<float>(total_size)/1024 << " KB\n");
+    return total_size;
 }
 
 namespace
@@ -1208,6 +1265,11 @@ void cv::gimpl::GFluidExecutable::packArg(cv::GArg &in_arg, const cv::GArg &op_a
 void cv::gimpl::GFluidExecutable::run(std::vector<InObj>  &&input_objs,
                                       std::vector<OutObj> &&output_objs)
 {
+    run(input_objs, output_objs);
+}
+void cv::gimpl::GFluidExecutable::run(std::vector<InObj>  &input_objs,
+                                      std::vector<OutObj> &output_objs)
+{
     // Bind input buffers from parameters
     for (auto& it : input_objs)  bindInArg(it.first, it.second);
     for (auto& it : output_objs) bindOutArg(it.first, it.second);
@@ -1280,6 +1342,34 @@ void cv::gimpl::GFluidExecutable::run(std::vector<InObj>  &&input_objs,
     }
 }
 
+cv::gimpl::GParallelFluidExecutable::GParallelFluidExecutable(const ade::Graph                      &g,
+                                                              const FluidGraphInputData             &graph_data,
+                                                              const std::vector<GFluidOutputRois>   &parallelOutputRois,
+                                                              const decltype(parallel_for)          &pfor)
+: parallel_for(pfor)
+{
+    for (auto&& rois : parallelOutputRois){
+        tiles.emplace_back(new GFluidExecutable(g, graph_data, rois.rois));
+    }
+}
+
+
+void cv::gimpl::GParallelFluidExecutable::reshape(ade::Graph&, const GCompileArgs& )
+{
+    //TODO: implement ?
+    GAPI_Assert(false && "Not Implemented;");
+}
+
+void cv::gimpl::GParallelFluidExecutable::run(std::vector<InObj>  &&input_objs,
+                                              std::vector<OutObj> &&output_objs)
+{
+    parallel_for(tiles.size(), [&, this](std::size_t index){
+        GAPI_Assert((bool)tiles[index]);
+        tiles[index]->run(input_objs, output_objs);
+    });
+}
+
+
 // FIXME: these passes operate on graph global level!!!
 // Need to fix this for heterogeneous (island-based) processing
 void GFluidBackendImpl::addBackendPasses(ade::ExecutionEngineSetupContext &ectx)
index d540999..7923f0c 100644 (file)
 // FIXME? Actually gfluidbackend.hpp is not included anywhere
 // and can be placed in gfluidbackend.cpp
 
-#include "opencv2/gapi/garg.hpp"
-#include "opencv2/gapi/gproto.hpp"
-#include "opencv2/gapi/fluid/gfluidkernel.hpp"
-#include "opencv2/gapi/fluid/gfluidbuffer.hpp"
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/fluid/gfluidkernel.hpp>
+#include <opencv2/gapi/fluid/gfluidbuffer.hpp>
 
 // PRIVATE STUFF!
 #include "backends/common/gbackend.hpp"
@@ -51,6 +51,13 @@ struct FluidData
     gapi::fluid::BorderOpt border;
 };
 
+struct agent_data_t {
+     GFluidKernel::Kind  kind;
+     ade::NodeHandle     nh;
+     std::vector<int>    in_buffer_ids;
+     std::vector<int>    out_buffer_ids;
+ };
+
 struct FluidAgent
 {
 public:
@@ -96,8 +103,23 @@ private:
     virtual std::pair<int,int> linesReadAndnextWindow(std::size_t inPort) const = 0;
 };
 
+//helper data structure for accumulating graph traversal/analysis data
+struct FluidGraphInputData {
+
+    std::vector<agent_data_t>               m_agents_data;
+    std::vector<std::size_t>                m_scratch_users;
+    std::unordered_map<int, std::size_t>    m_id_map;           // GMat id -> buffer idx map
+    std::map<std::size_t, ade::NodeHandle>  m_all_gmat_ids;
+
+    std::size_t                             m_mat_count;
+};
+//local helper function to traverse the graph once and pass the results to multiple instances of GFluidExecutable
+FluidGraphInputData fluidExtractInputDataFromGraph(const ade::Graph &m_g, const std::vector<ade::NodeHandle> &nodes);
+
 class GFluidExecutable final: public GIslandExecutable
 {
+    GFluidExecutable(const GFluidExecutable&) = delete;  // due std::unique_ptr in members list
+
     const ade::Graph &m_g;
     GModel::ConstGraph m_gm;
 
@@ -121,17 +143,42 @@ class GFluidExecutable final: public GIslandExecutable
 
     void initBufferRois(std::vector<int>& readStarts, std::vector<cv::gapi::own::Rect>& rois, const std::vector<gapi::own::Rect> &out_rois);
     void makeReshape(const std::vector<cv::gapi::own::Rect>& out_rois);
+    std::size_t total_buffers_size() const;
 
 public:
-    GFluidExecutable(const ade::Graph &g,
-                     const std::vector<ade::NodeHandle> &nodes,
-                     const std::vector<cv::gapi::own::Rect> &outputRois);
-
     virtual inline bool canReshape() const override { return true; }
     virtual void reshape(ade::Graph& g, const GCompileArgs& args) override;
 
     virtual void run(std::vector<InObj>  &&input_objs,
                      std::vector<OutObj> &&output_objs) override;
+
+    void run(std::vector<InObj>  &input_objs,
+             std::vector<OutObj> &output_objs);
+
+
+     GFluidExecutable(const ade::Graph                          &g,
+                      const FluidGraphInputData                 &graph_data,
+                      const std::vector<cv::gapi::own::Rect>    &outputRois);
+};
+
+
+class GParallelFluidExecutable final: public GIslandExecutable {
+    GParallelFluidExecutable(const GParallelFluidExecutable&) = delete;  // due std::unique_ptr in members list
+
+    std::vector<std::unique_ptr<GFluidExecutable>> tiles;
+    decltype(GFluidParallelFor::parallel_for) parallel_for;
+public:
+    GParallelFluidExecutable(const ade::Graph                       &g,
+                             const FluidGraphInputData              &graph_data,
+                             const std::vector<GFluidOutputRois>    &parallelOutputRois,
+                             const decltype(parallel_for)           &pfor);
+
+
+    virtual inline bool canReshape() const override { return false; }
+    virtual void reshape(ade::Graph& g, const GCompileArgs& args) override;
+
+    virtual void run(std::vector<InObj>  &&input_objs,
+                     std::vector<OutObj> &&output_objs) override;
 };
 }} // cv::gimpl
 
index 0bfdd66..f0dc6ed 100644 (file)
@@ -9,12 +9,12 @@
 
 #include <iomanip>   // hex, dec (debug)
 
-#include "opencv2/gapi/own/convert.hpp"
-#include "opencv2/gapi/own/types.hpp"
+#include <opencv2/gapi/own/convert.hpp>
+#include <opencv2/gapi/own/types.hpp>
 
-#include "opencv2/gapi/fluid/gfluidbuffer.hpp"
+#include <opencv2/gapi/fluid/gfluidbuffer.hpp>
 #include "backends/fluid/gfluidbuffer_priv.hpp"
-#include "opencv2/gapi/opencv_includes.hpp"
+#include <opencv2/gapi/opencv_includes.hpp>
 
 #include "backends/fluid/gfluidutils.hpp" // saturate
 
index b58ec07..49d6d2e 100644 (file)
@@ -8,16 +8,16 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/own/assert.hpp"
-#include "opencv2/core/traits.hpp"
-#include "opencv2/core/hal/hal.hpp"
-#include "opencv2/core/hal/intrin.hpp"
+#include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/core/traits.hpp>
+#include <opencv2/core/hal/hal.hpp>
+#include <opencv2/core/hal/intrin.hpp>
 
-#include "opencv2/gapi/core.hpp"
+#include <opencv2/gapi/core.hpp>
 
-#include "opencv2/gapi/fluid/gfluidbuffer.hpp"
-#include "opencv2/gapi/fluid/gfluidkernel.hpp"
-#include "opencv2/gapi/fluid/core.hpp"
+#include <opencv2/gapi/fluid/gfluidbuffer.hpp>
+#include <opencv2/gapi/fluid/gfluidkernel.hpp>
+#include <opencv2/gapi/fluid/core.hpp>
 
 #include "gfluidbuffer_priv.hpp"
 #include "gfluidbackend.hpp"
index 49f1824..dfbce1e 100644 (file)
@@ -8,18 +8,18 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/own/assert.hpp"
-#include "opencv2/core/traits.hpp"
-#include "opencv2/imgproc/types_c.h"
+#include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/core/traits.hpp>
+#include <opencv2/imgproc/types_c.h>
 
-#include "opencv2/gapi/core.hpp"
-#include "opencv2/gapi/imgproc.hpp"
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/imgproc.hpp>
 
-#include "opencv2/gapi/own/types.hpp"
+#include <opencv2/gapi/own/types.hpp>
 
-#include "opencv2/gapi/fluid/gfluidbuffer.hpp"
-#include "opencv2/gapi/fluid/gfluidkernel.hpp"
-#include "opencv2/gapi/fluid/imgproc.hpp"
+#include <opencv2/gapi/fluid/gfluidbuffer.hpp>
+#include <opencv2/gapi/fluid/gfluidkernel.hpp>
+#include <opencv2/gapi/fluid/imgproc.hpp>
 
 #include "gfluidbuffer_priv.hpp"
 #include "gfluidbackend.hpp"
@@ -27,8 +27,8 @@
 
 #include "gfluidimgproc_func.hpp"
 
-#include "opencv2/imgproc/hal/hal.hpp"
-#include "opencv2/core/hal/intrin.hpp"
+#include <opencv2/imgproc/hal/hal.hpp>
+#include <opencv2/core/hal/intrin.hpp>
 
 #include <cmath>
 #include <cstdlib>
@@ -1683,6 +1683,121 @@ GAPI_FLUID_KERNEL(GFluidMedianBlur, cv::gapi::imgproc::GMedianBlur, false)
     }
 };
 
+GAPI_FLUID_KERNEL(GFluidRGB2YUV422, cv::gapi::imgproc::GRGB2YUV422, false)
+{
+    static const int Window = 1;
+    static const auto Kind = cv::GFluidKernel::Kind::Filter;
+
+    static void run(const cv::gapi::fluid::View&   in,
+            cv::gapi::fluid::Buffer& out)
+    {
+        const auto *src = in.InLine<uchar>(0);
+        auto *dst = out.OutLine<uchar>();
+
+        run_rgb2yuv422_impl(dst, src, in.length());
+    }
+};
+
+GAPI_FLUID_KERNEL(GFluidRGB2HSV, cv::gapi::imgproc::GRGB2HSV, true)
+{
+    static const int Window = 1;
+    static const auto Kind = cv::GFluidKernel::Kind::Filter;
+
+    static void run(const cv::gapi::fluid::View&   in,
+                    cv::gapi::fluid::Buffer& out,
+                    cv::gapi::fluid::Buffer& scratch)
+    {
+        const auto *src = in.InLine<uchar>(0);
+        auto *dst = out.OutLine<uchar>();
+
+        auto* sdiv_table = scratch.OutLine<int>(0);
+        auto* hdiv_table = sdiv_table + 256;
+
+        run_rgb2hsv_impl(dst, src, sdiv_table, hdiv_table, in.length());
+    }
+
+    static void initScratch(const cv::GMatDesc& /* in */,
+                            cv::gapi::fluid::Buffer& scratch)
+    {
+        const int hsv_shift = 12;
+
+        cv::GMatDesc desc;
+        desc.chan  = 1;
+        desc.depth = CV_32S;
+        desc.size  = cv::gapi::own::Size(512, 1);
+
+        cv::gapi::fluid::Buffer buffer(desc);
+        scratch = std::move(buffer);
+
+        auto* sdiv_table = scratch.OutLine<int>(0);
+        auto* hdiv_table = sdiv_table + 256;
+
+        sdiv_table[0] = hdiv_table[0] = 0;
+        for(int i = 1; i < 256; i++ )
+        {
+            sdiv_table[i] = cv::saturate_cast<int>((255 << hsv_shift)/(1.*i));
+            hdiv_table[i] = cv::saturate_cast<int>((180 << hsv_shift)/(6.*i));
+        }
+
+    }
+
+    static void resetScratch(cv::gapi::fluid::Buffer& /* scratch */)
+    {
+    }
+};
+
+GAPI_FLUID_KERNEL(GFluidBayerGR2RGB, cv::gapi::imgproc::GBayerGR2RGB, false)
+{
+    static const int Window = 3;
+    static const int LPI    = 2;
+
+    static void run(const cv::gapi::fluid::View& in,
+                    cv::gapi::fluid::Buffer& out)
+    {
+        const int height = in.meta().size.height;
+        const int border_size = 1;
+        const int width = in.length();
+
+        constexpr int num_lines = LPI + 2 * border_size;
+        const uchar* src[num_lines];
+        uchar* dst[LPI];
+
+        for (int i = 0; i < LPI; ++i)
+        {
+            dst[i] = out.OutLine<uchar>(i);
+        }
+
+        for (int i = 0; i < num_lines; ++i)
+        {
+            src[i] = in.InLine<uchar>(i - 1);
+        }
+
+        if (in.y() == -1)
+        {
+            run_bayergr2rgb_bg_impl(dst[1], src + border_size, width);
+            std::memcpy(dst[0], dst[1], width * 3);
+        }
+        else if (in.y() == height - LPI - 2 * border_size + 1)
+        {
+            run_bayergr2rgb_gr_impl(dst[0], src, width);
+            std::memcpy(dst[1], dst[0], width * 3);
+        }
+        else
+        {
+            run_bayergr2rgb_gr_impl(dst[0], src, width);
+            run_bayergr2rgb_bg_impl(dst[1], src + border_size, width);
+        }
+    }
+
+    static cv::gapi::fluid::Border getBorder(const cv::GMatDesc&)
+    {
+        int  borderType  = cv::BORDER_CONSTANT;
+        auto borderValue = cv::Scalar();
+
+        return { borderType, borderValue };
+    }
+};
+
 } // namespace fliud
 } // namespace gapi
 } // namespace cv
@@ -1709,6 +1824,9 @@ cv::gapi::GKernelPackage cv::gapi::imgproc::fluid::kernels()
       , GFluidGaussBlur
       , GFluidSobel
       , GFluidSobelXY
+      , GFluidRGB2YUV422
+      , GFluidRGB2HSV
+      , GFluidBayerGR2RGB
     #if 0
       , GFluidCanny        -- not fluid (?)
       , GFluidEqualizeHist -- not fluid
index 835fb82..3ea4676 100644 (file)
@@ -12,8 +12,8 @@
 
 #include "gfluidutils.hpp"
 
-#include "opencv2/core/cvdef.h"
-#include "opencv2/core/hal/intrin.hpp"
+#include <opencv2/core/cvdef.h>
+#include <opencv2/core/hal/intrin.hpp>
 
 #include <cmath>
 #include <cstdlib>
@@ -43,7 +43,35 @@ void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
 
 //--------------------------------------
 //
-// Fluid kernels: RGB-to-YUV, YUV-to-RGB
+// Fluid kernels: RGB-to-HSV
+//
+//--------------------------------------
+
+void run_rgb2hsv_impl(uchar out[], const uchar in[], const int sdiv_table[],
+                      const int hdiv_table[], int width)
+{
+    CV_CPU_DISPATCH(run_rgb2hsv_impl, (out, in, sdiv_table, hdiv_table, width), CV_CPU_DISPATCH_MODES_ALL);
+}
+
+//--------------------------------------
+//
+// Fluid kernels: RGB-to-BayerGR
+//
+//--------------------------------------
+
+void run_bayergr2rgb_bg_impl(uchar out[], const uchar **in, int width)
+{
+    CV_CPU_DISPATCH(run_bayergr2rgb_bg_impl, (out, in, width), CV_CPU_DISPATCH_MODES_ALL);
+}
+
+void run_bayergr2rgb_gr_impl(uchar out[], const uchar **in, int width)
+{
+    CV_CPU_DISPATCH(run_bayergr2rgb_gr_impl, (out, in, width), CV_CPU_DISPATCH_MODES_ALL);
+}
+
+//--------------------------------------
+//
+// Fluid kernels: RGB-to-YUV, RGB-to-YUV422, YUV-to-RGB
 //
 //--------------------------------------
 
@@ -57,6 +85,11 @@ void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef
     CV_CPU_DISPATCH(run_yuv2rgb_impl, (out, in, width, coef), CV_CPU_DISPATCH_MODES_ALL);
 }
 
+void run_rgb2yuv422_impl(uchar out[], const uchar in[], int width)
+{
+    CV_CPU_DISPATCH(run_rgb2yuv422_impl, (out, in, width), CV_CPU_DISPATCH_MODES_ALL);
+}
+
 //-------------------------
 //
 // Fluid kernels: sepFilter
index 191ac08..b89ccd8 100644 (file)
@@ -8,7 +8,7 @@
 
 #if !defined(GAPI_STANDALONE)
 
-#include "opencv2/core.hpp"
+#include <opencv2/core.hpp>
 
 namespace cv {
 namespace gapi {
@@ -25,7 +25,26 @@ void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
 
 //--------------------------------------
 //
-// Fluid kernels: RGB-to-YUV, YUV-to-RGB
+// Fluid kernels: RGB-to-HSV
+//
+//--------------------------------------
+
+void run_rgb2hsv_impl(uchar out[], const uchar in[], const int sdiv_table[],
+        const int hdiv_table[], int width);
+
+//--------------------------------------
+//
+// Fluid kernels: RGB-to-BayerGR
+//
+//--------------------------------------
+
+void run_bayergr2rgb_bg_impl(uchar out[], const uchar **in, int width);
+
+void run_bayergr2rgb_gr_impl(uchar out[], const uchar **in, int width);
+
+//--------------------------------------
+//
+// Fluid kernels: RGB-to-YUV,RGB-to-YUV422, YUV-to-RGB
 //
 //--------------------------------------
 
@@ -33,6 +52,8 @@ void run_rgb2yuv_impl(uchar out[], const uchar in[], int width, const float coef
 
 void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef[4]);
 
+void run_rgb2yuv422_impl(uchar out[], const uchar in[], int width);
+
 //-------------------------
 //
 // Fluid kernels: sepFilter
index 397d3b0..b5c5147 100644 (file)
@@ -47,7 +47,26 @@ void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
 
 //--------------------------------------
 //
-// Fluid kernels: RGB-to-YUV, YUV-to-RGB
+// Fluid kernels: RGB-to-HSV
+//
+//--------------------------------------
+
+void run_rgb2hsv_impl(uchar out[], const uchar in[], const int sdiv_table[],
+                      const int hdiv_table[], int width);
+
+//--------------------------------------
+//
+// Fluid kernels: RGB-to-BayerGR
+//
+//--------------------------------------
+
+void run_bayergr2rgb_bg_impl(uchar out[], const uchar **in, int width);
+
+void run_bayergr2rgb_gr_impl(uchar out[], const uchar **in, int width);
+
+//--------------------------------------
+//
+// Fluid kernels: RGB-to-YUV, RGB-to-YUV422, YUV-to-RGB
 //
 //--------------------------------------
 
@@ -55,6 +74,8 @@ void run_rgb2yuv_impl(uchar out[], const uchar in[], int width, const float coef
 
 void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef[4]);
 
+void run_rgb2yuv422_impl(uchar out[], const uchar in[], int width);
+
 //-------------------------
 //
 // Fluid kernels: sepFilter
@@ -249,6 +270,454 @@ void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
 
 //--------------------------------------
 //
+// Fluid kernels: RGB-to-HSV
+//
+//--------------------------------------
+//
+void run_rgb2hsv_impl(uchar out[], const uchar in[], const int sdiv_table[],
+                      const int hdiv_table[], int width)
+{
+    const int hsv_shift = 12;
+    const int hr = 180;
+
+    int j = 0;
+
+    #if CV_SIMD128
+        const int vectorStep = 16;
+
+        uint8_t ff = 0xff;
+        v_uint8x16 mask1(ff, 0, 0, 0, ff, 0, 0, 0, ff, 0, 0, 0, ff, 0, 0, 0);
+        v_uint8x16 mask2(0, ff, 0, 0, 0, ff, 0, 0, 0, ff, 0, 0, 0, ff, 0, 0);
+        v_uint8x16 mask3(0, 0, ff, 0, 0, 0, ff, 0, 0, 0, ff, 0, 0, 0, ff, 0);
+        v_uint8x16 mask4(0, 0, 0, ff, 0, 0, 0, ff, 0, 0, 0, ff, 0, 0, 0, ff);
+
+        for (int w = 0; w <= 3 * (width - vectorStep); w += 3 * vectorStep)
+        {
+            v_uint8x16 r, g, b;
+            v_load_deinterleave(in + w, r, g, b);
+
+            v_uint8x16 v_min_rgb = v_min(v_min(r, g), b);
+            v_uint8x16 v_max_rgb = v_max(v_max(r, g), b);
+
+            v_uint8x16 v_diff = v_max_rgb - v_min_rgb;
+
+            v_uint8x16 v_r_eq_max = (r == v_max_rgb);
+            v_uint8x16 v_g_eq_max = (g == v_max_rgb);
+
+            v_uint8x16 v;
+            // get V-ch
+            v = v_max_rgb;
+
+            // divide v into 4x4 vectors because later int32 required
+            v_uint32x4 v_idx[4];
+            v_idx[0] = v_reinterpret_as_u32(v & mask1);
+            v_idx[1] = v_reinterpret_as_u32(v & mask2) >> 8;
+            v_idx[2] = v_reinterpret_as_u32(v & mask3) >> 16;
+            v_idx[3] = v_reinterpret_as_u32(v & mask4) >> 24;
+
+            v_uint32x4 sv_elems_32[4];
+            sv_elems_32[0] = v_reinterpret_as_u32(v_lut(sdiv_table, v_reinterpret_as_s32(v_idx[0])));
+            sv_elems_32[1] = v_reinterpret_as_u32(v_lut(sdiv_table, v_reinterpret_as_s32(v_idx[1])));
+            sv_elems_32[2] = v_reinterpret_as_u32(v_lut(sdiv_table, v_reinterpret_as_s32(v_idx[2])));
+            sv_elems_32[3] = v_reinterpret_as_u32(v_lut(sdiv_table, v_reinterpret_as_s32(v_idx[3])));
+
+            // divide and calculate s according to above feature
+            v_uint32x4 ss[4];
+
+            v_uint32x4 v_add = v_setall_u32(1) << (hsv_shift - 1);
+
+            v_uint32x4 v_diff_exp[4];
+            v_diff_exp[0] = v_reinterpret_as_u32(v_reinterpret_as_u8(v_diff) & mask1);
+            v_diff_exp[1] = v_reinterpret_as_u32(v_reinterpret_as_u8(v_diff) & mask2) >> 8;
+            v_diff_exp[2] = v_reinterpret_as_u32(v_reinterpret_as_u8(v_diff) & mask3) >> 16;
+            v_diff_exp[3] = v_reinterpret_as_u32(v_reinterpret_as_u8(v_diff) & mask4) >> 24;
+
+            // s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
+            ss[0] = (v_diff_exp[0] * sv_elems_32[0] + v_add) >> hsv_shift;
+            ss[1] = (v_diff_exp[1] * sv_elems_32[1] + v_add) >> hsv_shift;
+            ss[2] = (v_diff_exp[2] * sv_elems_32[2] + v_add) >> hsv_shift;
+            ss[3] = (v_diff_exp[3] * sv_elems_32[3] + v_add) >> hsv_shift;
+
+            // reconstruct order of S-ch
+            v_uint32x4 zip[8];
+            v_zip(ss[0], ss[2], zip[0], zip[1]);
+            v_zip(ss[1], ss[3], zip[2], zip[3]);
+
+            v_zip(zip[0], zip[2], zip[4], zip[5]);
+            v_zip(zip[1], zip[3], zip[6], zip[7]);
+
+            v_uint8x16 s = v_pack(v_pack(zip[4], zip[5]), v_pack(zip[6], zip[7]));
+
+            // the same divination for H-ch
+            // FIXME: REALLY UGLY and slow
+            v_uint32x4 gg[4];
+            v_uint16x8 tmp_exp[2];
+            v_expand(g, tmp_exp[0], tmp_exp[1]);
+            v_expand(tmp_exp[0], gg[0], gg[1]);
+            v_expand(tmp_exp[1], gg[2], gg[3]);
+
+            v_uint32x4 rr[4];
+            v_expand(r, tmp_exp[0], tmp_exp[1]);
+            v_expand(tmp_exp[0], rr[0], rr[1]);
+            v_expand(tmp_exp[1], rr[2], rr[3]);
+
+            v_uint32x4 bb[4];
+            v_expand(b, tmp_exp[0], tmp_exp[1]);
+            v_expand(tmp_exp[0], bb[0], bb[1]);
+            v_expand(tmp_exp[1], bb[2], bb[3]);
+
+            v_int32x4 e[4];
+            v_int16x8 sig_exp[2];
+            v_expand(v_reinterpret_as_s8(v_r_eq_max), sig_exp[0], sig_exp[1]);
+            v_expand(sig_exp[0], e[0], e[1]);
+            v_expand(sig_exp[1], e[2], e[3]);
+
+            v_int32x4 p[4];
+            v_expand(v_reinterpret_as_s8(v_g_eq_max), sig_exp[0], sig_exp[1]);
+            v_expand(sig_exp[0], p[0], p[1]);
+            v_expand(sig_exp[1], p[2], p[3]);
+
+            // reconstruct order of v_diff
+            v_zip(v_diff_exp[0], v_diff_exp[2], zip[0], zip[1]);
+            v_zip(v_diff_exp[1], v_diff_exp[3], zip[2], zip[3]);
+
+            v_zip(zip[0], zip[2], zip[4], zip[5]);
+            v_zip(zip[1], zip[3], zip[6], zip[7]);
+
+            v_uint8x16 vd = v_pack(v_pack(zip[4], zip[5]), v_pack(zip[6], zip[7]));
+
+            v_uint32x4 vdd[4];
+            v_uint16x8 vvdd[2];
+            v_expand(vd, vvdd[0], vvdd[1]);
+            v_expand(vvdd[0], vdd[0], vdd[1]);
+            v_expand(vvdd[1], vdd[2], vdd[3]);
+
+            // start computing H-ch
+            //h = (_vr & (g - b)) + (~_vr & ((_vg & (b - r + 2 * diff)) + ((~_vg) & (r - g + 4 * diff))));
+            v_int32x4 hh[4];
+            hh[0] = v_reinterpret_as_s32(v_select(e[0], v_reinterpret_as_s32(gg[0] - bb[0]),
+                                         v_select(p[0], v_reinterpret_as_s32(bb[0] - rr[0] + v_setall_u32(2) * vdd[0]),
+                                                        v_reinterpret_as_s32(rr[0] - gg[0] + v_setall_u32(4) * vdd[0]))));
+            hh[1] = v_reinterpret_as_s32(v_select(e[1], v_reinterpret_as_s32(gg[1] - bb[1]),
+                                         v_select(p[1], v_reinterpret_as_s32(bb[1] - rr[1] + v_setall_u32(2) * vdd[1]),
+                                                        v_reinterpret_as_s32(rr[1] - gg[1] + v_setall_u32(4) * vdd[1]))));
+            hh[2] = v_reinterpret_as_s32(v_select(e[2], v_reinterpret_as_s32(gg[2] - bb[2]),
+                                         v_select(p[2], v_reinterpret_as_s32(bb[2] - rr[2] + v_setall_u32(2) * vdd[2]),
+                                                        v_reinterpret_as_s32(rr[2] - gg[2] + v_setall_u32(4) * vdd[2]))));
+            hh[3] = v_reinterpret_as_s32(v_select(e[3], v_reinterpret_as_s32(gg[3] - bb[3]),
+                                         v_select(p[3], v_reinterpret_as_s32(bb[3] - rr[3] + v_setall_u32(2) * vdd[3]),
+                                                        v_reinterpret_as_s32(rr[3] - gg[3] + v_setall_u32(4) * vdd[3]))));
+
+            //h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
+            v_uint32x4 h_elems_32[4];
+            h_elems_32[0] = v_reinterpret_as_u32(v_lut(hdiv_table, v_reinterpret_as_s32(vdd[0])));
+            h_elems_32[1] = v_reinterpret_as_u32(v_lut(hdiv_table, v_reinterpret_as_s32(vdd[1])));
+            h_elems_32[2] = v_reinterpret_as_u32(v_lut(hdiv_table, v_reinterpret_as_s32(vdd[2])));
+            h_elems_32[3] = v_reinterpret_as_u32(v_lut(hdiv_table, v_reinterpret_as_s32(vdd[3])));
+
+            hh[0] = (hh[0] * v_reinterpret_as_s32(h_elems_32[0]) + v_reinterpret_as_s32(v_add)) >> hsv_shift;
+            hh[1] = (hh[1] * v_reinterpret_as_s32(h_elems_32[1]) + v_reinterpret_as_s32(v_add)) >> hsv_shift;
+            hh[2] = (hh[2] * v_reinterpret_as_s32(h_elems_32[2]) + v_reinterpret_as_s32(v_add)) >> hsv_shift;
+            hh[3] = (hh[3] * v_reinterpret_as_s32(h_elems_32[3]) + v_reinterpret_as_s32(v_add)) >> hsv_shift;
+
+            // check for negative H
+            v_int32x4 v_h_less_0[4];
+            v_h_less_0[0] = (hh[0] < v_setall_s32(0));
+            v_h_less_0[1] = (hh[1] < v_setall_s32(0));
+            v_h_less_0[2] = (hh[2] < v_setall_s32(0));
+            v_h_less_0[3] = (hh[3] < v_setall_s32(0));
+
+            v_int32x4 v_h_180[4];
+            v_h_180[0] = hh[0] + v_setall_s32(180);
+            v_h_180[1] = hh[1] + v_setall_s32(180);
+            v_h_180[2] = hh[2] + v_setall_s32(180);
+            v_h_180[3] = hh[3] + v_setall_s32(180);
+
+            hh[0] = v_select(v_h_less_0[0], v_h_180[0], hh[0]);
+            hh[1] = v_select(v_h_less_0[1], v_h_180[1], hh[1]);
+            hh[2] = v_select(v_h_less_0[2], v_h_180[2], hh[2]);
+            hh[3] = v_select(v_h_less_0[3], v_h_180[3], hh[3]);
+
+            // pack H-ch
+            v_uint16x8 hh_16_1 = v_pack(v_reinterpret_as_u32(hh[0]), v_reinterpret_as_u32(hh[1]));
+            v_uint16x8 hh_16_2 = v_pack(v_reinterpret_as_u32(hh[2]), v_reinterpret_as_u32(hh[3]));
+
+            v_uint8x16 h = v_pack(hh_16_1, hh_16_2);
+
+            v_store_interleave(out + w, h, s, v);
+
+            // output offset
+            j += vectorStep;
+        }
+    v_cleanup();
+    #endif
+
+    for (; j < width; ++j)
+    {
+        int r = in[j * 3    ],
+            g = in[j * 3 + 1],
+            b = in[j * 3 + 2];
+
+        int h, s, v = b;
+        int vmin = std::min({r, g, b});
+        v = std::max({r, g, b});
+        int _vr, _vg;
+
+        uchar diff = cv::saturate_cast<uchar>(v - vmin);
+        _vr = v == r ? -1 : 0;
+        _vg = v == g ? -1 : 0;
+
+        s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
+
+        h = (_vr & (g - b)) +
+            (~_vr & ((_vg & (b - r + 2 * diff)) + ((~_vg) & (r - g + 4 * diff))));
+
+        h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
+        h += h < 0 ? hr : 0;
+
+        out[j * 3    ] = cv::saturate_cast<uchar>(h);
+        out[j * 3 + 1] = (uchar)(s);
+        out[j * 3 + 2] = (uchar)(v);
+    }
+}
+
+//--------------------------------------
+//
+// Fluid kernels: RGB-to-BayerGR
+//
+//--------------------------------------
+
+void run_bayergr2rgb_bg_impl(uchar out[], const uchar **in, int width)
+{
+
+    int j = 0;
+
+    #if CV_SIMD128
+        const int vectorStep = 16;
+
+        v_uint16x8 l_1, r_1, l_2, r_2;
+        v_uint16x8 l_3, r_3, l_4, r_4;
+
+        for (int w = 0; w <= width - 2 * vectorStep - 2; w += 2 * vectorStep) // -2 for offset vectors
+        {
+            v_uint8x16 g1, r1, g1_offset, r1_offset; // 1 line
+            v_uint8x16 b2, g2, b2_offset, g2_offset; // 2 line
+            v_uint8x16 g3, r3, g3_offset, r3_offset; // 3 line
+
+            v_load_deinterleave(in[0] + w + 1, r1, g1);
+            v_load_deinterleave(in[0] + w + 2 + 1, r1_offset, g1_offset);
+
+            v_load_deinterleave(in[1] + w, b2, g2);
+            v_load_deinterleave(in[1] + w + 2, b2_offset, g2_offset);
+
+            v_load_deinterleave(in[2] + w + 1, r3, g3);
+            v_load_deinterleave(in[2] + w + 2 + 1, r3_offset, g3_offset);
+
+
+            // calculate b-channel
+            v_expand(b2, l_1, r_1);
+            v_expand(b2_offset, l_2, r_2);
+            v_uint8x16 b2_sum = v_rshr_pack<1>(l_1 + l_2, r_1 + r_2);
+
+            v_uint8x16 b_low, b_high;
+            v_zip(b2_sum, b2_offset, b_low, b_high);
+
+
+            // calculate r-channel
+            v_expand(r1, l_1, r_1);
+            v_expand(r1_offset, l_2, r_2);
+            v_expand(r3, l_3, r_3);
+            v_expand(r3_offset, l_4, r_4);
+
+            v_uint8x16 r13offset_sum, r13_sum;
+            r13offset_sum = v_rshr_pack<2>(l_1 + l_2 + l_3 + l_4,
+                                           r_1 + r_2 + r_3 + r_4);
+            r13_sum = v_rshr_pack<1>(l_1 + l_3, r_1 + r_3);
+
+            v_uint8x16 r_low, r_high;
+            v_zip(r13_sum, r13offset_sum, r_low, r_high);
+
+
+            // calculate g-channel
+            v_expand(g1, l_1, r_1);
+            v_expand(g3, l_2, r_2);
+            v_expand(g2, l_3, r_3);
+            v_expand(g2_offset, l_4, r_4);
+
+            v_uint8x16 g_out_sum = v_rshr_pack<2>(l_1 + l_2 + l_3 + l_4,
+                                                  r_1 + r_2 + r_3 + r_4);
+
+            v_uint8x16 g_low, g_high;
+            v_zip(g2, g_out_sum, g_low, g_high);
+
+
+            v_store_interleave(out + w * 3 + 3, b_low, g_low, r_low);
+            v_store_interleave(out + w * 3 + vectorStep * 3 + 3, b_high, g_high, r_high);
+
+            // output offset for scalar code
+            j += vectorStep * 2;
+        }
+    #endif
+
+    bool curr_red = true;
+    int t0, t1, t2;
+
+    int i = 1;
+
+    for (; j < width - 1; ++j, curr_red = !curr_red)
+    {
+        if (!curr_red)
+        {
+            t0 = (in[i][j - 1] + in[i][j + 1] + 1) >> 1;
+            t1 =  in[i][j];
+            t2 = (in[i - 1][j] + in[i + 1][j] + 1) >> 1;
+
+
+            out[j * 3 + 0] = (uchar)t0;
+            out[j * 3 + 1] = (uchar)t1;
+            out[j * 3 + 2] = (uchar)t2;
+        }
+        else
+        {
+            t2 = (in[i - 1][j - 1] + in[i - 1][j + 1] +
+                  in[i + 1][j - 1] + in[i + 1][j + 1] + 2) >> 2;
+            t1 = (in[i][j - 1] + in[i][j + 1] +
+                  in[i - 1][j] + in[i + 1][j] + 2) >> 2;
+            t0 = in[i][j];
+
+            out[j * 3 + 0] = (uchar)t0;
+            out[j * 3 + 1] = (uchar)t1;
+            out[j * 3 + 2] = (uchar)t2;
+        }
+    }
+
+    out[0] = out[3];
+    out[1] = out[4];
+    out[2] = out[5];
+
+    out[3 * (width - 1)    ] = out[3 * (width - 2)    ];
+    out[3 * (width - 1) + 1] = out[3 * (width - 2) + 1];
+    out[3 * (width - 1) + 2] = out[3 * (width - 2) + 2];
+}
+
+void run_bayergr2rgb_gr_impl(uchar out[], const uchar **in, int width)
+{
+
+    int j = 0;
+
+    #if CV_SIMD128
+        const int vectorStep = 16;
+
+        v_uint16x8 l_1, r_1, l_2, r_2;
+        v_uint16x8 l_3, r_3, l_4, r_4;
+
+        for (int w = 0; w <= width - 2 * vectorStep - 2; w += 2 * vectorStep) // -2 for offset vectors
+        {
+            v_uint8x16 b1, g1, b1_offset, g1_offset; // 1 line
+            v_uint8x16 g2, r2, g2_offset, r2_offset; // 2 line
+            v_uint8x16 b3, g3, b3_offset, g3_offset; // 3 line
+
+            v_load_deinterleave(in[0] + w, b1, g1);
+            v_load_deinterleave(in[0] + w + 2, b1_offset, g1_offset);
+
+            v_load_deinterleave(in[1] + w, g2, r2);
+            v_load_deinterleave(in[1] + w + 2, g2_offset, r2_offset);
+
+            v_load_deinterleave(in[2] + w, b3, g3);
+            v_load_deinterleave(in[2] + w + 2, b3_offset, g3_offset);
+
+            // calculate r-channel
+            v_expand(r2, l_1, r_1);
+            v_expand(r2_offset, l_2, r_2);
+            v_uint8x16 r2_sum = v_rshr_pack<1>(l_1 + l_2, r_1 + r_2);
+
+            v_uint8x16 r_low, r_high;
+            v_zip(r2, r2_sum, r_low, r_high);
+
+
+            // calculate b-channel
+            v_expand(b1, l_1, r_1);
+            v_expand(b1_offset, l_2, r_2);
+            v_expand(b3, l_3, r_3);
+            v_expand(b3_offset, l_4, r_4);
+
+            v_uint8x16 b13offset_sum, b13_sum;
+            b13offset_sum = v_rshr_pack<2>(l_1 + l_2 + l_3 + l_4,
+                                           r_1 + r_2 + r_3 + r_4);
+            b13_sum = v_rshr_pack<1>(l_2 + l_4, r_2 + r_4);
+
+            v_uint8x16 b_low, b_high;
+            v_zip(b13offset_sum, b13_sum, b_low, b_high);
+
+
+            // calculate g-channel
+            v_expand(g1, l_1, r_1);
+            v_expand(g3, l_2, r_2);
+            v_expand(g2, l_3, r_3);
+            v_expand(g2_offset, l_4, r_4);
+
+            v_uint8x16 g_out_sum = v_rshr_pack<2>(l_1 + l_2 + l_3 + l_4,
+                                                  r_1 + r_2 + r_3 + r_4);
+
+            v_uint8x16 g_low, g_high;
+            v_zip(g_out_sum, g2_offset, g_low, g_high);
+
+
+            v_store_interleave(out + w * 3 + 3, b_low, g_low, r_low);
+            v_store_interleave(out + w * 3 + vectorStep * 3 + 3, b_high, g_high, r_high);
+
+            // output offset for scalar code
+            j += vectorStep * 2;
+        }
+    #endif
+
+    bool curr_blue = false;
+    int t0, t1, t2;
+
+    int i = 1;
+
+    for (; j < width - 1; ++j, curr_blue = !curr_blue)
+    {
+        if (!curr_blue)
+        {
+            // pixel at green at bgbg line
+            t2 = (in[i][j - 1] + in[i][j + 1] + 1) >> 1;
+            t1 =  in[i][j];
+            t0 = (in[i - 1][j] + in[i + 1][j] + 1) >> 1;
+
+            out[j * 3 + 0] = (uchar)t0;
+            out[j * 3 + 1] = (uchar)t1;
+            out[j * 3 + 2] = (uchar)t2;
+        }
+        else
+        {
+            // pixel at red at grgr line
+            t2 = in[i][j];
+
+            t1 = (in[i][j - 1] + in[i][j + 1] +
+                  in[i - 1][j] + in[i + 1][j] + 2) >> 2;
+
+            t0 = (in[i - 1][j - 1] + in[i - 1][j + 1] +
+                  in[i + 1][j - 1] + in[i + 1][j + 1] + 2) >> 2;
+
+            out[j * 3 + 0] = (uchar)t0;
+            out[j * 3 + 1] = (uchar)t1;
+            out[j * 3 + 2] = (uchar)t2;
+
+        }
+    }
+
+    out[0] = out[3];
+    out[1] = out[4];
+    out[2] = out[5];
+
+    out[3 * (width - 1)    ] = out[3 * (width - 2)    ];
+    out[3 * (width - 1) + 1] = out[3 * (width - 2) + 1];
+    out[3 * (width - 1) + 2] = out[3 * (width - 2) + 2];
+}
+
+//--------------------------------------
+//
 // Fluid kernels: RGB-to-YUV, YUV-to-RGB
 //
 //--------------------------------------
@@ -402,6 +871,112 @@ void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef
     }
 }
 
+// Y' = 0.299*R' + 0.587*G' + 0.114*B'
+// U' = (B' - Y')*0.492
+// V' = (R' - Y')*0.877
+static const float coef[5] = {0.299f, 0.587f, 0.114f, 0.492f, 0.877f};
+
+static const ushort c0 = static_cast<ushort>(coef[0]*(1 << 16) + 0.5f);
+static const ushort c1 = static_cast<ushort>(coef[1]*(1 << 16) + 0.5f);
+static const ushort c2 = static_cast<ushort>(coef[2]*(1 << 16) + 0.5f);
+static const short c3 = static_cast<short>(coef[3]*(1 << 12) + 0.5f);
+static const short c4 = static_cast<short>(coef[4]*(1 << 12) + 0.5f);
+
+void run_rgb2yuv422_impl(uchar out[], const uchar in[], int width)
+{
+    int w = 0, j = 0;
+
+    #if CV_SIMD128
+        const int vectorStep = 16;
+
+        for (; w <= 3 * (width - vectorStep); w += 3 * vectorStep)
+        {
+            v_uint8x16 r, g, b;
+            v_load_deinterleave(in + w, r, g, b);
+
+            // TODO: compute u and v  x2 less times
+            v_uint8x16 y, u, v;
+
+            v_uint16x8 rr1, gg1, bb1, rr2, gg2, bb2;
+            v_expand(r, rr1, rr2);
+            v_expand(g, gg1, gg2);
+            v_expand(b, bb1, bb2);
+
+            rr1 = rr1 << 7;
+            rr2 = rr2 << 7;
+            gg1 = gg1 << 7;
+            gg2 = gg2 << 7;
+            bb1 = bb1 << 7;
+            bb2 = bb2 << 7;
+
+            v_uint16x8 yy1, yy2;
+
+            yy1 = v_mul_hi(v_setall_u16(c0), rr1) +
+                  v_mul_hi(v_setall_u16(c1), gg1) +
+                  v_mul_hi(v_setall_u16(c2), bb1);
+
+            yy2 = v_mul_hi(v_setall_u16(c0), rr2) +
+                  v_mul_hi(v_setall_u16(c1), gg2) +
+                  v_mul_hi(v_setall_u16(c2), bb2);
+
+            v_int16x8 u1, u2, v1, v2;
+
+            u1 = v_mul_hi(v_setall_s16(c3), v_reinterpret_as_s16(bb1) - v_reinterpret_as_s16(yy1));
+            u2 = v_mul_hi(v_setall_s16(c3), v_reinterpret_as_s16(bb2) - v_reinterpret_as_s16(yy2));
+            v1 = v_mul_hi(v_setall_s16(c4), v_reinterpret_as_s16(rr1) - v_reinterpret_as_s16(yy1));
+            v2 = v_mul_hi(v_setall_s16(c4), v_reinterpret_as_s16(rr2) - v_reinterpret_as_s16(yy2));
+
+            y = v_pack((yy1 + v_setall_u16(1 << 6)) >> 7,
+                       (yy2 + v_setall_u16(1 << 6)) >> 7);
+            u = v_pack_u((u1 + v_setall_s16(257 << 2)) >> 3,
+                         (u2 + v_setall_s16(257 << 2)) >> 3);
+            v = v_pack_u((v1 + v_setall_s16(257 << 2)) >> 3,
+                         (v2 + v_setall_s16(257 << 2)) >> 3);
+
+            uint8_t ff = 0xff;
+            v_uint8x16 mask(ff, 0, ff, 0, ff, 0, ff, 0, ff, 0, ff, 0, ff, 0, ff, 0);
+            v_uint8x16 uu = u & mask;
+            v_uint8x16 vv = v & mask;
+            // extract even u and v
+            v_uint8x16 u_low = v_pack(v_reinterpret_as_u16(uu), v_reinterpret_as_u16(uu));
+            v_uint8x16 v_low = v_pack(v_reinterpret_as_u16(vv), v_reinterpret_as_u16(vv));
+
+            v_uint8x16 out1, out2;
+            v_zip(u_low, v_low, out1, out2);
+
+            v_store_interleave(out + j, out1, y);
+
+            // offset for output buffer
+            j += vectorStep * 2;
+        }
+    v_cleanup();
+    #endif
+
+    for (; w < width * 3; w += 6)
+    {
+        short r = in[w] << 7;
+        short g = in[w + 1] << 7;
+        short b = in[w + 2] << 7;
+        short y1 = (c0 * r + c1 * g + c2 * b) >> 16;
+        short u =  c3*(b - y1) >> 16;
+        short v =  c4*(r - y1) >> 16;
+
+        out[j]     = cv::saturate_cast<uchar>((u + (128 << 3) + (1 << 2)) >> 3); // u
+        out[j + 1] = cv::saturate_cast<uchar>((y1 + (1 << 6)) >> 7); // y1
+        out[j + 2] = cv::saturate_cast<uchar>((v + (128 << 3) + (1 << 2)) >> 3); // v
+
+        r = in[w + 3] << 7;
+        g = in[w + 4] << 7;
+        b = in[w + 5] << 7;
+        short y2 = (c0 * r + c1 * g + c2 * b) >> 16;
+
+        out[j + 3] = cv::saturate_cast<uchar>((y2 + (1 << 6)) >> 7); // y2
+
+        // offset for output buffer
+        j += 4;
+    }
+}
+
 //-------------------------
 //
 // Fluid kernels: sepFilter
index 6bfa74e..ab2c420 100644 (file)
@@ -18,9 +18,9 @@
 
 #include <ade/typed_graph.hpp>
 
-#include "opencv2/gapi/gcommon.hpp"
-#include "opencv2/gapi/util/any.hpp"
-#include "opencv2/gapi/gtype_traits.hpp"
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/util/any.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
 
 #include "compiler/gobjref.hpp"
 #include "compiler/gmodel.hpp"
@@ -150,6 +150,26 @@ void cv::gimpl::GOCLExecutable::run(std::vector<InObj>  &&input_objs,
     // has received from user (or from another Island, or mix...)
     // FIXME: Check input/output objects against GIsland protocol
 
+    // NB: We must clean-up m_res before this function returns because internally (bindInArg,
+    //     bindOutArg) we work with cv::UMats, not cv::Mats that were originally placed into the
+    //     input/output objects. If this is not done and cv::UMat "leaves" the local function scope,
+    //     certain problems may occur.
+    //
+    //     For example, if the original output (cv::Mat) is re-initialized by the user but we still
+    //     hold cv::UMat -> we get cv::UMat that has a parent that was already destroyed. Also,
+    //     since we don't own the data (the user does), there's no point holding it after we're done
+    const auto clean_up = [&input_objs, &output_objs] (cv::gimpl::Mag* p)
+    {
+        // Only clean-up UMat entries from current scope, we know that inputs and outputs are stored
+        // as UMats from the context below, so the following procedure is safe
+        auto& umats = p->slot<cv::UMat>();
+        // NB: avoid clearing the whole magazine, there's also pre-allocated internal data
+        for (auto& it : input_objs)  umats.erase(it.first.id);
+        for (auto& it : output_objs) umats.erase(it.first.id);
+    };
+    // RAII wrapper to clean-up m_res
+    std::unique_ptr<cv::gimpl::Mag, decltype(clean_up)> cleaner(&m_res, clean_up);
+
     for (auto& it : input_objs)   magazine::bindInArg (m_res, it.first, it.second, true);
     for (auto& it : output_objs)  magazine::bindOutArg(m_res, it.first, it.second, true);
 
index b57c662..52cf6d2 100644 (file)
@@ -13,9 +13,9 @@
 #include <tuple>              // tuple
 #include <ade/util/algorithm.hpp> // type_list_index
 
-#include "opencv2/gapi/garg.hpp"
-#include "opencv2/gapi/gproto.hpp"
-#include "opencv2/gapi/ocl/goclkernel.hpp"
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/ocl/goclkernel.hpp>
 
 #include "api/gorigin.hpp"
 #include "backends/common/gbackend.hpp"
index f55f7bb..9741100 100644 (file)
@@ -7,8 +7,8 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/core.hpp"
-#include "opencv2/gapi/ocl/core.hpp"
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/ocl/core.hpp>
 #include "backends/ocl/goclcore.hpp"
 
 GAPI_OCL_KERNEL(GOCLAdd, cv::gapi::core::GAdd)
index fb6b78a..1ed9c06 100644 (file)
@@ -11,7 +11,7 @@
 #include <map>
 #include <string>
 
-#include "opencv2/gapi/ocl/goclkernel.hpp"
+#include <opencv2/gapi/ocl/goclkernel.hpp>
 
 namespace cv { namespace gimpl {
 
index 6e99d00..5795f44 100644 (file)
@@ -7,8 +7,8 @@
 
 #include "precomp.hpp"
 
-#include "opencv2/gapi/imgproc.hpp"
-#include "opencv2/gapi/ocl/imgproc.hpp"
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/ocl/imgproc.hpp>
 #include "backends/ocl/goclimgproc.hpp"
 
 
index 7bb18f0..864f5fe 100644 (file)
@@ -11,7 +11,7 @@
 #include <map>
 #include <string>
 
-#include "opencv2/gapi/ocl/goclkernel.hpp"
+#include <opencv2/gapi/ocl/goclkernel.hpp>
 
 namespace cv { namespace gimpl {
 
index d01aae8..11ca51b 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <cassert>
 
-#include "opencv2/gapi/ocl/goclkernel.hpp"
+#include <opencv2/gapi/ocl/goclkernel.hpp>
 
 const cv::UMat& cv::GOCLContext::inMat(int input)
 {
index 00de699..b277257 100644 (file)
@@ -9,8 +9,8 @@
 
 #include <ade/graph.hpp>
 
-#include "opencv2/gapi/gproto.hpp" // can_describe
-#include "opencv2/gapi/gcompiled.hpp"
+#include <opencv2/gapi/gproto.hpp> // can_describe
+#include <opencv2/gapi/gcompiled.hpp>
 
 #include "compiler/gcompiled_priv.hpp"
 #include "backends/common/gbackend.hpp"
index 83d4da1..9925c19 100644 (file)
 
 // <FIXME:>
 #if !defined(GAPI_STANDALONE)
-#include "opencv2/gapi/cpu/core.hpp"    // Also directly refer to Core
-#include "opencv2/gapi/cpu/imgproc.hpp" // ...and Imgproc kernel implementations
+#include <opencv2/gapi/cpu/core.hpp>    // Also directly refer to Core
+#include <opencv2/gapi/cpu/imgproc.hpp> // ...and Imgproc kernel implementations
 #endif // !defined(GAPI_STANDALONE)
 // </FIXME:>
 
-#include "opencv2/gapi/gcompoundkernel.hpp" // compound::backend()
+#include <opencv2/gapi/gcompoundkernel.hpp> // compound::backend()
 
 #include "logger.hpp"
 
@@ -48,16 +48,28 @@ namespace
 {
     cv::gapi::GKernelPackage getKernelPackage(cv::GCompileArgs &args)
     {
+        auto withAuxKernels = [](const cv::gapi::GKernelPackage& pkg) {
+            cv::gapi::GKernelPackage aux_pkg;
+            for (const auto &b : pkg.backends()) {
+                aux_pkg = combine(aux_pkg, b.priv().auxiliaryKernels());
+            }
+            return combine(pkg, aux_pkg);
+        };
+
+        auto has_use_only = cv::gimpl::getCompileArg<cv::gapi::use_only>(args);
+        if (has_use_only)
+            return withAuxKernels(has_use_only.value().pkg);
+
         static auto ocv_pkg =
 #if !defined(GAPI_STANDALONE)
             combine(cv::gapi::core::cpu::kernels(),
-                    cv::gapi::imgproc::cpu::kernels(),
-                    cv::unite_policy::KEEP);
+                    cv::gapi::imgproc::cpu::kernels());
 #else
             cv::gapi::GKernelPackage();
 #endif // !defined(GAPI_STANDALONE)
         auto user_pkg = cv::gimpl::getCompileArg<cv::gapi::GKernelPackage>(args);
-        return combine(ocv_pkg, user_pkg.value_or(cv::gapi::GKernelPackage{}), cv::unite_policy::REPLACE);
+        auto user_pkg_with_aux = withAuxKernels(user_pkg.value_or(cv::gapi::GKernelPackage{}));
+        return combine(ocv_pkg, user_pkg_with_aux);
     }
 
     cv::util::optional<std::string> getGraphDumpDirectory(cv::GCompileArgs& args)
@@ -87,7 +99,6 @@ cv::gimpl::GCompiler::GCompiler(const cv::GComputation &c,
 {
     using namespace std::placeholders;
     m_all_kernels       = getKernelPackage(m_args);
-    auto lookup_order   = getCompileArg<gapi::GLookupOrder>(m_args).value_or(gapi::GLookupOrder());
     auto dump_path      = getGraphDumpDirectory(m_args);
 
     m_e.addPassStage("init");
@@ -107,8 +118,7 @@ cv::gimpl::GCompiler::GCompiler(const cv::GComputation &c,
 
     m_e.addPassStage("kernels");
     m_e.addPass("kernels", "resolve_kernels", std::bind(passes::resolveKernels, _1,
-                                                     std::ref(m_all_kernels), // NB: and not copied here
-                                                     lookup_order));
+                                              std::ref(m_all_kernels))); // NB: and not copied here
     m_e.addPass("kernels", "check_islands_content", passes::checkIslandsContent);
 
     m_e.addPassStage("meta");
index b369c14..3848434 100644 (file)
@@ -9,9 +9,9 @@
 #define OPENCV_GAPI_GCOMPILER_HPP
 
 
-#include "opencv2/gapi/gcommon.hpp"
-#include "opencv2/gapi/gkernel.hpp"
-#include "opencv2/gapi/gcomputation.hpp"
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/gcomputation.hpp>
 
 #include <ade/execution_engine/execution_engine.hpp>
 
index 03b42ff..d25db58 100644 (file)
@@ -15,8 +15,8 @@
 #include <ade/typed_graph.hpp>
 #include <ade/passes/topological_sort.hpp>
 
-#include "opencv2/gapi/util/optional.hpp"
-#include "opencv2/gapi/gkernel.hpp"
+#include <opencv2/gapi/util/optional.hpp>
+#include <opencv2/gapi/gkernel.hpp>
 
 #include "compiler/gobjref.hpp"
 
index 8a3cfde..53464cb 100644 (file)
@@ -14,7 +14,7 @@
 #include <ade/util/zip_range.hpp>   // util::indexed
 #include <ade/util/checked_cast.hpp>
 
-#include "opencv2/gapi/gproto.hpp"
+#include <opencv2/gapi/gproto.hpp>
 #include "api/gnode_priv.hpp"
 #include "compiler/gobjref.hpp"
 #include "compiler/gmodel.hpp"
@@ -114,7 +114,7 @@ void GModel::linkOut(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::s
     op.outs[out_port] = RcDesc{gm.rc, gm.shape, {}};
 }
 
-std::vector<ade::NodeHandle> GModel::orderedInputs(Graph &g, ade::NodeHandle nh)
+std::vector<ade::NodeHandle> GModel::orderedInputs(ConstGraph &g, ade::NodeHandle nh)
 {
     std::vector<ade::NodeHandle> sorted_in_nhs(nh->inEdges().size());
     for (const auto& in_eh : nh->inEdges())
@@ -126,7 +126,7 @@ std::vector<ade::NodeHandle> GModel::orderedInputs(Graph &g, ade::NodeHandle nh)
     return sorted_in_nhs;
 }
 
-std::vector<ade::NodeHandle> GModel::orderedOutputs(Graph &g, ade::NodeHandle nh)
+std::vector<ade::NodeHandle> GModel::orderedOutputs(ConstGraph &g, ade::NodeHandle nh)
 {
     std::vector<ade::NodeHandle> sorted_out_nhs(nh->outEdges().size());
     for (const auto& out_eh : nh->outEdges())
index 2e98fa1..98ab208 100644 (file)
@@ -22,8 +22,8 @@
 // This part of the system is API-unaware by its design.
 //
 
-#include "opencv2/gapi/garg.hpp"
-#include "opencv2/gapi/gkernel.hpp"
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gkernel.hpp>
 
 #include "compiler/gobjref.hpp"
 #include "compiler/gislandmodel.hpp"
@@ -203,8 +203,8 @@ namespace GModel
     GAPI_EXPORTS void redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to);
     GAPI_EXPORTS void redirectWriter (Graph &g, ade::NodeHandle from, ade::NodeHandle to);
 
-    GAPI_EXPORTS std::vector<ade::NodeHandle> orderedInputs (Graph &g, ade::NodeHandle nh);
-    GAPI_EXPORTS std::vector<ade::NodeHandle> orderedOutputs(Graph &g, ade::NodeHandle nh);
+    GAPI_EXPORTS std::vector<ade::NodeHandle> orderedInputs (ConstGraph &g, ade::NodeHandle nh);
+    GAPI_EXPORTS std::vector<ade::NodeHandle> orderedOutputs(ConstGraph &g, ade::NodeHandle nh);
 
     // Returns input meta array for given op node
     // Array is sparse, as metadata for non-gapi input objects is empty
index 2d06000..abe1c79 100644 (file)
@@ -11,8 +11,8 @@
 #include <map>
 #include <unordered_map>
 
-#include "opencv2/gapi/gproto.hpp"
-#include "opencv2/gapi/gcall.hpp"
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/gcall.hpp>
 
 #include "api/gorigin.hpp"
 #include "api/gnode.hpp"
index 89020d1..1eefe72 100644 (file)
@@ -14,7 +14,7 @@
 
 #include <ade/passes/check_cycles.hpp>
 
-#include "opencv2/gapi/gproto.hpp"
+#include <opencv2/gapi/gproto.hpp>
 #include "compiler/gmodel.hpp"
 #include "compiler/gislandmodel.hpp"
 #include "compiler/passes/passes.hpp"
index 8b20d60..73743a4 100644 (file)
@@ -17,7 +17,7 @@
 #include <ade/util/algorithm.hpp>   // contains
 #include <ade/util/chain_range.hpp> // chain
 
-#include "opencv2/gapi/util/optional.hpp"  // util::optional
+#include <opencv2/gapi/util/optional.hpp>  // util::optional
 #include "logger.hpp"    // GAPI_LOG
 
 #include "compiler/gmodel.hpp"
index 60bf36a..12267a3 100644 (file)
@@ -13,7 +13,7 @@
 
 #include <ade/util/filter_range.hpp>
 
-#include "opencv2/gapi/own/assert.hpp" // GAPI_Assert
+#include <opencv2/gapi/own/assert.hpp> // GAPI_Assert
 #include "compiler/passes/helpers.hpp"
 
 namespace {
index 0feb7b1..f5f0098 100644 (file)
@@ -11,7 +11,7 @@
 #include <ade/graph.hpp>
 #include <ade/passes/check_cycles.hpp>
 
-#include "opencv2/gapi/gcompoundkernel.hpp" // compound::backend()
+#include <opencv2/gapi/gcompoundkernel.hpp> // compound::backend()
 
 #include "compiler/gmodel.hpp"
 #include "compiler/passes/passes.hpp"
@@ -35,7 +35,7 @@ namespace
     // 1. Get GCompoundKernel implementation
     // 2. Create GCompoundContext
     // 3. Run GCompoundKernel with GCompoundContext
-    // 4. Build subgraph from imputs/outputs GCompoundKernel
+    // 4. Build subgraph from inputs/outputs GCompoundKernel
     // 5. Replace compound node to subgraph
 
     void expand(ade::Graph& g, ade::NodeHandle nh, const ImplInfo& impl_info)
@@ -101,8 +101,7 @@ namespace
 // This pass, given the kernel package, selects a kernel implementation
 // for every operation in the graph
 void cv::gimpl::passes::resolveKernels(ade::passes::PassContext   &ctx,
-                                       const gapi::GKernelPackage &kernels,
-                                       const gapi::GLookupOrder   &order)
+                                       const gapi::GKernelPackage &kernels)
 {
     std::unordered_set<cv::gapi::GBackend> active_backends;
 
@@ -114,8 +113,7 @@ void cv::gimpl::passes::resolveKernels(ade::passes::PassContext   &ctx,
             auto &op = gr.metadata(nh).get<Op>();
             cv::gapi::GBackend selected_backend;
             cv::GKernelImpl    selected_impl;
-            std::tie(selected_backend, selected_impl)
-                = kernels.lookup(op.k.name, order);
+            std::tie(selected_backend, selected_impl) = kernels.lookup(op.k.name);
 
             selected_backend.priv().unpackKernel(ctx.graph, nh, selected_impl);
             op.backend = selected_backend;
index 528d84c..1577a86 100644 (file)
@@ -75,7 +75,7 @@ void cv::gimpl::passes::inferMeta(ade::passes::PassContext &ctx, bool meta_is_in
             // Now ask kernel for it's output meta.
             // Resulting out_args may have a larger size than op.outs, since some
             // outputs could stay unused (unconnected)
-            const auto& out_metas = op.k.outMeta(input_meta_args, op.args);
+            const auto out_metas = op.k.outMeta(input_meta_args, op.args);
 
             // Walk through operation's outputs, update meta of output objects
             // appropriately
index 14f6acd..4daddab 100644 (file)
@@ -44,9 +44,8 @@ void storeResultingMeta(ade::passes::PassContext &ctx);
 void expandKernels(ade::passes::PassContext &ctx,
                    const gapi::GKernelPackage& kernels);
 
-void resolveKernels(ade::passes::PassContext       &ctx,
-                    const gapi::GKernelPackage &kernels,
-                    const gapi::GLookupOrder   &order);
+void resolveKernels(ade::passes::PassContext   &ctx,
+                    const gapi::GKernelPackage &kernels);
 
 void fuseIslands(ade::passes::PassContext &ctx);
 void syncIslandTags(ade::passes::PassContext &ctx);
index eba0951..b92dbdc 100644 (file)
@@ -4,10 +4,12 @@
 //
 // Copyright (C) 2019 Intel Corporation
 
-#include "opencv2/gapi/gcomputation_async.hpp"
-#include "opencv2/gapi/gcomputation.hpp"
-#include "opencv2/gapi/gcompiled_async.hpp"
-#include "opencv2/gapi/gcompiled.hpp"
+
+#include <opencv2/gapi/gcomputation_async.hpp>
+#include <opencv2/gapi/gcomputation.hpp>
+#include <opencv2/gapi/gcompiled_async.hpp>
+#include <opencv2/gapi/gcompiled.hpp>
+#include <opencv2/gapi/gasync_context.hpp>
 
 #include <condition_variable>
 
 namespace {
     //This is a tool to move initialize captures of a lambda in C++11
     template<typename T>
-    struct move_through_copy{
+    struct copy_through_move{
        T value;
-       move_through_copy(T&& g) : value(std::move(g)) {}
-       move_through_copy(move_through_copy&&) = default;
-       move_through_copy(move_through_copy const& lhs) : move_through_copy(std::move(const_cast<move_through_copy&>(lhs))) {}
+       copy_through_move(T&& g) : value(std::move(g)) {}
+       copy_through_move(copy_through_move&&) = default;
+       copy_through_move(copy_through_move const& lhs) : copy_through_move(std::move(const_cast<copy_through_move&>(lhs))) {}
     };
 }
 
@@ -80,6 +82,7 @@ public:
                 }};
             }
         }
+
         std::unique_lock<std::mutex> lck{mtx};
         bool first_task = q.empty();
         q.push(std::move(t));
@@ -108,8 +111,12 @@ async_service the_ctx;
 }
 
 namespace {
-template<typename f_t>
-std::exception_ptr call_and_catch(f_t&& f){
+template<typename f_t, typename context_t>
+std::exception_ptr call_and_catch(f_t&& f, context_t&& ctx){
+    if (std::forward<context_t>(ctx).isCanceled()){
+        return std::make_exception_ptr(GAsyncCanceled{});
+    }
+
     std::exception_ptr eptr;
     try {
         std::forward<f_t>(f)();
@@ -120,15 +127,21 @@ std::exception_ptr call_and_catch(f_t&& f){
     return eptr;
 }
 
-template<typename f_t, typename callback_t>
-void call_with_callback(f_t&& f, callback_t&& cb){
-    auto eptr = call_and_catch(std::forward<f_t>(f));
+struct DummyContext {
+    bool isCanceled() const {
+        return false;
+    }
+};
+
+template<typename f_t, typename callback_t, typename context_t>
+void call_with_callback(f_t&& f, callback_t&& cb, context_t&& ctx){
+    auto eptr =  call_and_catch(std::forward<f_t>(f), std::forward<context_t>(ctx));
     std::forward<callback_t>(cb)(eptr);
 }
 
-template<typename f_t>
-void call_with_futute(f_t&& f, std::promise<void>& p){
-    auto eptr = call_and_catch(std::forward<f_t>(f));
+template<typename f_t, typename context_t>
+void call_with_future(f_t&& f, std::promise<void>& p, context_t&& ctx){
+    auto eptr =  call_and_catch(std::forward<f_t>(f), std::forward<context_t>(ctx));
     if (eptr){
         p.set_exception(eptr);
     }
@@ -138,56 +151,126 @@ void call_with_futute(f_t&& f, std::promise<void>& p){
 }
 }//namespace
 
+bool GAsyncContext::cancel(){
+    bool expected = false;
+    bool updated  = cancelation_requested.compare_exchange_strong(expected, true);
+    return updated;
+}
+
+bool GAsyncContext::isCanceled() const {
+    return cancelation_requested.load();
+}
+
+const char* GAsyncCanceled::what() const noexcept {
+    return "GAPI asynchronous operation was canceled";
+}
+
 //For now these async functions are simply wrapping serial version of apply/operator() into a functor.
 //These functors are then serialized into single queue, which is processed by a devoted background thread.
 void async_apply(GComputation& gcomp, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args){
-    //TODO: use move_through_copy for all args except gcomp
+    //TODO: use copy_through_move for all args except gcomp
+    //TODO: avoid code duplication between versions of "async" functions
     auto l = [=]() mutable {
         auto apply_l = [&](){
             gcomp.apply(std::move(ins), std::move(outs), std::move(args));
         };
 
-        call_with_callback(apply_l,std::move(callback));
+        call_with_callback(apply_l,std::move(callback), DummyContext{});
     };
     impl::the_ctx.add_task(l);
 }
 
 std::future<void> async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args){
-    move_through_copy<std::promise<void>> prms{{}};
+    copy_through_move<std::promise<void>> prms{{}};
     auto f = prms.value.get_future();
     auto l = [=]() mutable {
         auto apply_l = [&](){
             gcomp.apply(std::move(ins), std::move(outs), std::move(args));
         };
 
-        call_with_futute(apply_l, prms.value);
+        call_with_future(apply_l, prms.value, DummyContext{});
     };
 
     impl::the_ctx.add_task(l);
     return f;
 }
 
+void async_apply(GComputation& gcomp, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx){
+    //TODO: use copy_through_move for all args except gcomp
+    auto l = [=, &ctx]() mutable {
+        auto apply_l = [&](){
+            gcomp.apply(std::move(ins), std::move(outs), std::move(args));
+        };
+
+        call_with_callback(apply_l,std::move(callback), ctx);
+    };
+    impl::the_ctx.add_task(l);
+}
+
+std::future<void> async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx){
+    copy_through_move<std::promise<void>> prms{{}};
+    auto f = prms.value.get_future();
+    auto l = [=, &ctx]() mutable {
+        auto apply_l = [&](){
+            gcomp.apply(std::move(ins), std::move(outs), std::move(args));
+        };
+
+        call_with_future(apply_l, prms.value, ctx);
+    };
+
+    impl::the_ctx.add_task(l);
+    return f;
+
+}
+
 void async(GCompiled& gcmpld, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs){
     auto l = [=]() mutable {
         auto apply_l = [&](){
             gcmpld(std::move(ins), std::move(outs));
         };
 
-        call_with_callback(apply_l,std::move(callback));
+        call_with_callback(apply_l,std::move(callback), DummyContext{});
+    };
+
+    impl::the_ctx.add_task(l);
+}
+
+void async(GCompiled& gcmpld, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx){
+    auto l = [=, &ctx]() mutable {
+        auto apply_l = [&](){
+            gcmpld(std::move(ins), std::move(outs));
+        };
+
+        call_with_callback(apply_l,std::move(callback), ctx);
     };
 
     impl::the_ctx.add_task(l);
 }
 
 std::future<void> async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs){
-    move_through_copy<std::promise<void>> prms{{}};
+    copy_through_move<std::promise<void>> prms{{}};
     auto f = prms.value.get_future();
     auto l = [=]() mutable {
         auto apply_l = [&](){
             gcmpld(std::move(ins), std::move(outs));
         };
 
-        call_with_futute(apply_l, prms.value);
+        call_with_future(apply_l, prms.value, DummyContext{});
+    };
+
+    impl::the_ctx.add_task(l);
+    return f;
+
+}
+std::future<void> async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx){
+    copy_through_move<std::promise<void>> prms{{}};
+    auto f = prms.value.get_future();
+    auto l = [=, &ctx]() mutable {
+        auto apply_l = [&](){
+            gcmpld(std::move(ins), std::move(outs));
+        };
+
+        call_with_future(apply_l, prms.value, ctx);
     };
 
     impl::the_ctx.add_task(l);
index 2594cde..aacc4d1 100644 (file)
@@ -11,7 +11,7 @@
 
 #include <ade/util/zip_range.hpp>
 
-#include "opencv2/gapi/opencv_includes.hpp"
+#include <opencv2/gapi/opencv_includes.hpp>
 #include "executor/gexecutor.hpp"
 #include "compiler/passes/passes.hpp"
 
@@ -152,17 +152,31 @@ void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args)
         {
             using cv::util::get;
             const auto desc = get<cv::GMatDesc>(d.meta);
+
+            auto check_own_mat = [&desc, &args, &index]()
+            {
+                auto& out_mat = *get<cv::gapi::own::Mat*>(args.outObjs.at(index));
+                GAPI_Assert(out_mat.data != nullptr &&
+                        desc.canDescribe(out_mat));
+            };
+
 #if !defined(GAPI_STANDALONE)
             // Building as part of OpenCV - follow OpenCV behavior
-            // if output buffer is not enough to hold the result, reallocate it
-            auto& out_mat   = *get<cv::Mat*>(args.outObjs.at(index));
-            createMat(desc, out_mat);
+            // In the case of cv::Mat if output buffer is not enough to hold the result, reallocate it
+            if (cv::util::holds_alternative<cv::Mat*>(args.outObjs.at(index)))
+            {
+                auto& out_mat = *get<cv::Mat*>(args.outObjs.at(index));
+                createMat(desc, out_mat);
+            }
+            // In the case of own::Mat never reallocated, checked to perfectly fit required meta
+            else
+            {
+                check_own_mat();
+            }
 #else
             // Building standalone - output buffer should always exist,
             // and _exact_ match our inferred metadata
-            auto& out_mat   = *get<cv::gapi::own::Mat*>(args.outObjs.at(index));
-            GAPI_Assert(out_mat.data != nullptr &&
-                        desc.canDescribe(out_mat))
+            check_own_mat();
 #endif // !defined(GAPI_STANDALONE)
         }
     }
index eebe9d8..6106cd9 100644 (file)
@@ -9,13 +9,13 @@
 #define __OPENCV_GAPI_PRECOMP_HPP__
 
 #if !defined(GAPI_STANDALONE)
-#  include "opencv2/core.hpp"
-#  include "opencv2/imgproc.hpp"
-#  include "opencv2/gapi/core.hpp"
-#  include "opencv2/gapi/imgproc.hpp"
+#  include <opencv2/core.hpp>
+#  include <opencv2/imgproc.hpp>
+#  include <opencv2/gapi/core.hpp>
+#  include <opencv2/gapi/imgproc.hpp>
 #endif //  !defined(GAPI_STANDALONE)
 
-#include "opencv2/gapi.hpp"
-#include "opencv2/gapi/gkernel.hpp"
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/gkernel.hpp>
 
 #endif // __OPENCV_GAPI_PRECOMP_HPP__
index d1d8793..a023759 100644 (file)
@@ -8,7 +8,7 @@
 // FIXME: move out from Common
 
 #include "../test_precomp.hpp"
-#include "opencv2/gapi/cpu/core.hpp"
+#include <opencv2/gapi/cpu/core.hpp>
 
 #include <ade/util/algorithm.hpp>
 
@@ -235,7 +235,7 @@ TEST(GCompoundKernel, ReplaceDefaultKernel)
     cv::GMat in1, in2;
     auto out = cv::gapi::add(in1, in2);
     const auto custom_pkg = cv::gapi::kernels<GCompoundAddImpl>();
-    const auto full_pkg   = cv::gapi::combine(cv::gapi::core::cpu::kernels(), custom_pkg, cv::unite_policy::REPLACE);
+    const auto full_pkg   = cv::gapi::combine(cv::gapi::core::cpu::kernels(), custom_pkg);
     cv::GComputation comp(cv::GIn(in1, in2), cv::GOut(out));
     cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC1),
             in_mat2 = cv::Mat::eye(3, 3, CV_8UC1),
@@ -257,7 +257,7 @@ TEST(GCompoundKernel, DoubleAddC)
     auto out       = cv::gapi::addC(super, s);
 
     const auto custom_pkg = cv::gapi::kernels<GCompoundDoubleAddCImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in1, in2, s), cv::GOut(out));
 
     cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC1),
@@ -282,7 +282,7 @@ TEST(GCompoundKernel, AddC)
     auto out       = cv::gapi::addC(super, s);
 
     const auto custom_pkg = cv::gapi::kernels<GCompoundAddCImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in1, in2, s), cv::GOut(out));
 
     cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC1),
@@ -308,7 +308,7 @@ TEST(GCompoundKernel, MergeWithSplit)
     auto out = cv::gapi::merge3(a2, b2, c2);
 
     const auto custom_pkg = cv::gapi::kernels<GCompoundMergeWithSplitImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in), cv::GOut(out));
 
     cv::Mat in_mat = cv::Mat::eye(3, 3, CV_8UC3), out_mat, ref_mat;
@@ -325,7 +325,7 @@ TEST(GCompoundKernel, AddWithAddC)
     auto out = GCompoundAddWithAddC::on(in1, in2, s);
 
     const auto custom_pkg = cv::gapi::kernels<GCompoundAddWithAddCImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in1, in2, s), cv::GOut(out));
 
     cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC1),
@@ -347,7 +347,7 @@ TEST(GCompoundKernel, SplitWithAdd)
     std::tie(out1, out2) = GCompoundSplitWithAdd::on(in);
 
     const auto custom_pkg = cv::gapi::kernels<GCompoundSplitWithAddImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in), cv::GOut(out1, out2));
 
     cv::Mat in_mat = cv::Mat::eye(3, 3, CV_8UC3),
@@ -375,7 +375,7 @@ TEST(GCompoundKernel, ParallelAddC)
     std::tie(out1, out2) = GCompoundParallelAddC::on(in1, in2);
 
     const auto custom_pkg = cv::gapi::kernels<GCompoundParallelAddCImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in1, in2), cv::GOut(out1, out2));
 
     cv::Mat in_mat = cv::Mat::eye(3, 3, CV_8UC1),
@@ -402,7 +402,7 @@ TEST(GCompoundKernel, GCompundKernelAndDefaultUseOneData)
     auto out = cv::gapi::add(GCompoundAddWithAddC::on(in1, in2, s), cv::gapi::addC(in2, s));
 
     const auto custom_pkg = cv::gapi::kernels<GCompoundAddWithAddCImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in1, in2, s), cv::GOut(out));
 
     cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC1),
@@ -428,7 +428,7 @@ TEST(GCompoundKernel, CompoundExpandedToCompound)
                                               GCompoundAddWithAddCImpl,
                                               GCompoundDoubleAddCImpl>();
 
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in1, in2, s), cv::GOut(out));
 
     cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC1),
@@ -449,7 +449,7 @@ TEST(GCompoundKernel, MaxInArray)
     GDoubleArray in;
     auto out = GCompoundMaxInArray::on(in);
     const auto custom_pkg = cv::gapi::kernels<GCompoundMaxInArrayImpl, GMaxInArrayImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in), cv::GOut(out));
     std::vector<double> v = { 1, 5, -2, 3, 10, 2};
     cv::Scalar out_scl;
@@ -465,7 +465,7 @@ TEST(GCompoundKernel, NegateArray)
     GDoubleArray in;
     GDoubleArray out = GCompoundNegateArray::on(in);
     const auto custom_pkg = cv::gapi::kernels<GCompoundNegateArrayImpl, GNegateArrayImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in), cv::GOut(out));
     std::vector<double> in_v = {1, 5, -2, -10, 3};
     std::vector<double> out_v;
@@ -483,7 +483,7 @@ TEST(GCompoundKernel, RightGArrayHandle)
     GDoubleArray a;
     cv::GMat out = GCompoundGMatGArrayGMat::on(in[0], a, in[1]);
     const auto custom_pkg = cv::gapi::kernels<GCompoundGMatGArrayGMatImpl, SetDiagKernelImpl>();
-    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels(), cv::unite_policy::KEEP);
+    const auto full_pkg   = cv::gapi::combine(custom_pkg, cv::gapi::core::cpu::kernels());
     cv::GComputation comp(cv::GIn(in[0], a, in[1]), cv::GOut(out));
     std::vector<double> in_v(3, 1.0);
     cv::Mat in_mat1 = cv::Mat::eye(cv::Size(3, 3), CV_8UC1),
index 6b5babc..5644c19 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CORE_TESTS_HPP
@@ -30,125 +30,100 @@ enum bitwiseOp
     NOT = 3
 };
 
-namespace
+// Note: namespace must match the namespace of the type of the printed object
+inline std::ostream& operator<<(std::ostream& os, mathOp op)
 {
-const char *MathOperations[] = {"ADD", "SUB", "MUL", "DIV"};
-const char *BitwiseOperations[] = {"And", "Or", "Xor"};
-const char *CompareOperations[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
-//corresponds to OpenCV
-const char *NormOperations[] = {"", "NORM_INF", "NORM_L1", "","NORM_L2"};
-}
-
-
-struct PrintMathOpCoreParams
-{
-    template <class TestParams>
-    std::string operator()(const ::testing::TestParamInfo<TestParams>& info) const
+#define CASE(v) case mathOp::v: os << #v; break
+    switch (op)
     {
-        std::stringstream ss;
-        cv::Size sz = std::get<4>(info.param);
-        ss<<MathOperations[std::get<0>(info.param)]
-                    <<"_"<<std::get<1>(info.param)
-                    <<"_"<<std::get<2>(info.param)
-                    <<"_"<<(int)std::get<3>(info.param)
-                    <<"_"<<sz.width
-                    <<"x"<<sz.height
-                    <<"_"<<(std::get<5>(info.param)+1)
-                    <<"_"<<std::get<6>(info.param)
-                    <<"_"<<std::get<7>(info.param);
-        return ss.str();
-   }
-};
-
-struct PrintCmpCoreParams
-{
-    template <class TestParams>
-    std::string operator()(const ::testing::TestParamInfo<TestParams>& info) const
-    {
-        std::stringstream ss;
-        cv::Size sz = std::get<3>(info.param);
-        ss<<CompareOperations[std::get<0>(info.param)]
-                    <<"_"<<std::get<1>(info.param)
-                    <<"_"<<std::get<2>(info.param)
-                    <<"_"<<sz.width
-                    <<"x"<<sz.height
-                    <<"_"<<std::get<4>(info.param);
-        return ss.str();
-   }
-};
+        CASE(ADD);
+        CASE(SUB);
+        CASE(MUL);
+        CASE(DIV);
+        default: GAPI_Assert(false && "unknown mathOp value");
+    }
+#undef CASE
+    return os;
+}
 
-struct PrintBWCoreParams
+// Note: namespace must match the namespace of the type of the printed object
+inline std::ostream& operator<<(std::ostream& os, bitwiseOp op)
 {
-    template <class TestParams>
-    std::string operator()(const ::testing::TestParamInfo<TestParams>& info) const
+#define CASE(v) case bitwiseOp::v: os << #v; break
+    switch (op)
     {
-        std::stringstream ss;
-        cv::Size sz = std::get<2>(info.param);
-        ss<<BitwiseOperations[std::get<0>(info.param)]
-                    <<"_"<<std::get<1>(info.param)
-                    <<"_"<<sz.width
-                    <<"x"<<sz.height
-                    <<"_"<<std::get<3>(info.param);
-        return ss.str();
-   }
-};
+        CASE(AND);
+        CASE(OR);
+        CASE(XOR);
+        CASE(NOT);
+        default: GAPI_Assert(false && "unknown bitwiseOp value");
+    }
+#undef CASE
+    return os;
+}
 
-struct PrintNormCoreParams
+GAPI_TEST_FIXTURE(MathOpTest, initMatsRandU, FIXTURE_API(mathOp,bool,double,bool), 4,
+    opType, testWithScalar, scale, doReverseOp)
+GAPI_TEST_FIXTURE(MulDoubleTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(DivTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(DivCTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(MeanTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(MaskTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(Polar2CartTest, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(Cart2PolarTest, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(CmpTest, initMatsRandU, FIXTURE_API(CmpTypes,bool), 2, opType, testWithScalar)
+GAPI_TEST_FIXTURE(BitwiseTest, initMatsRandU, FIXTURE_API(bitwiseOp), 1, opType)
+GAPI_TEST_FIXTURE(NotTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(SelectTest, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(MinTest, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(MaxTest, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(AbsDiffTest, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(AbsDiffCTest, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(SumTest, initMatrixRandU, FIXTURE_API(CompareScalars), 1, cmpF)
+GAPI_TEST_FIXTURE(AddWeightedTest, initMatsRandU, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(NormTest, initMatrixRandU, FIXTURE_API(CompareScalars,NormTypes), 2,
+    cmpF, opType)
+GAPI_TEST_FIXTURE(IntegralTest, initNothing, <>, 0)
+GAPI_TEST_FIXTURE(ThresholdTest, initMatrixRandU, FIXTURE_API(int), 1, tt)
+GAPI_TEST_FIXTURE(ThresholdOTTest, initMatrixRandU, FIXTURE_API(int), 1, tt)
+GAPI_TEST_FIXTURE(InRangeTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(Split3Test, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(Split4Test, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(ResizeTest, initNothing, FIXTURE_API(CompareMats,int,cv::Size), 3,
+    cmpF, interp, sz_out)
+GAPI_TEST_FIXTURE(ResizePTest, initNothing, FIXTURE_API(CompareMats,int,cv::Size), 3,
+    cmpF, interp, sz_out)
+GAPI_TEST_FIXTURE(ResizeTestFxFy, initNothing, FIXTURE_API(CompareMats,int,double,double), 4,
+    cmpF, interp, fx, fy)
+GAPI_TEST_FIXTURE(Merge3Test, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(Merge4Test, initMatsRandU, <>, 0)
+GAPI_TEST_FIXTURE(RemapTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(FlipTest, initMatrixRandU, FIXTURE_API(int), 1, flipCode)
+GAPI_TEST_FIXTURE(CropTest, initMatrixRandU, FIXTURE_API(cv::Rect), 1, rect_to)
+GAPI_TEST_FIXTURE(ConcatHorTest, initNothing, <>, 0)
+GAPI_TEST_FIXTURE(ConcatVertTest, initNothing, <>, 0)
+GAPI_TEST_FIXTURE(ConcatVertVecTest, initNothing, <>, 0)
+GAPI_TEST_FIXTURE(ConcatHorVecTest, initNothing, <>, 0)
+GAPI_TEST_FIXTURE(LUTTest, initNothing, <>, 0)
+GAPI_TEST_FIXTURE(ConvertToTest, initNothing, FIXTURE_API(CompareMats, double, double), 3,
+    cmpF, alpha, beta)
+GAPI_TEST_FIXTURE(PhaseTest, initMatsRandU, FIXTURE_API(bool), 1, angle_in_degrees)
+GAPI_TEST_FIXTURE(SqrtTest, initMatrixRandU, <>, 0)
+GAPI_TEST_FIXTURE(NormalizeTest, initNothing, FIXTURE_API(CompareMats,double,double,int,MatType2), 5,
+    cmpF, a, b, norm_type, ddepth)
+struct BackendOutputAllocationTest : TestWithParamBase<>
 {
-    template <class TestParams>
-    std::string operator()(const ::testing::TestParamInfo<TestParams>& info) const
+    BackendOutputAllocationTest()
     {
-        std::stringstream ss;
-        cv::Size sz = std::get<2>(info.param);
-        ss<<NormOperations[std::get<0>(info.param)]
-                    <<"_"<<std::get<1>(info.param)
-                    <<"_"<<sz.width
-                    <<"x"<<sz.height;
-        return ss.str();
-   }
+        in_mat1 = cv::Mat(sz, type);
+        in_mat2 = cv::Mat(sz, type);
+        cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(15));
+        cv::randu(in_mat2, cv::Scalar::all(1), cv::Scalar::all(15));
+    }
 };
-
-struct MathOpTest        : public TestParams<std::tuple<mathOp,bool,int,double,cv::Size,int,bool,bool,cv::GCompileArgs>>{};
-struct MulDoubleTest     : public TestParams<std::tuple<int,cv::Size,int,bool,cv::GCompileArgs>>{};
-struct DivTest           : public TestParams<std::tuple<int,cv::Size,int,bool, cv::GCompileArgs>>{};
-struct DivCTest          : public TestParams<std::tuple<int,cv::Size,int,bool, cv::GCompileArgs>>{};
-struct MeanTest          : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct MaskTest          : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct Polar2CartTest    : public TestParams<std::tuple<cv::Size,bool, cv::GCompileArgs>> {};
-struct Cart2PolarTest    : public TestParams<std::tuple<cv::Size,bool, cv::GCompileArgs>> {};
-struct CmpTest           : public TestParams<std::tuple<CmpTypes,bool,int,cv::Size,bool, cv::GCompileArgs>>{};
-struct BitwiseTest       : public TestParams<std::tuple<bitwiseOp,int,cv::Size,bool, cv::GCompileArgs>>{};
-struct NotTest           : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct SelectTest        : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct MinTest           : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>>{};
-struct MaxTest           : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>>{};
-struct AbsDiffTest       : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>>{};
-struct AbsDiffCTest      : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct SumTest           : public TestParams<std::tuple<int, cv::Size,bool, compare_scalar_f, cv::GCompileArgs>> {};
-struct AddWeightedTest   : public TestParams<std::tuple<int,cv::Size,int,bool, compare_f,cv::GCompileArgs>>{};
-struct NormTest          : public TestParams<std::tuple<NormTypes,int,cv::Size, compare_scalar_f, cv::GCompileArgs>>{};
-struct IntegralTest      : public TestWithParam<std::tuple<int,cv::Size, cv::GCompileArgs>> {};
-struct ThresholdTest     : public TestParams<std::tuple<int,cv::Size,int,bool, cv::GCompileArgs>> {};
-struct ThresholdOTTest   : public TestParams<std::tuple<int,cv::Size,int,bool, cv::GCompileArgs>> {};
-struct InRangeTest       : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct Split3Test        : public TestParams<std::tuple<cv::Size, cv::GCompileArgs>> {};
-struct Split4Test        : public TestParams<std::tuple<cv::Size, cv::GCompileArgs>> {};
-struct ResizeTest        : public TestWithParam<std::tuple<compare_f, int, int, cv::Size, cv::Size, cv::GCompileArgs>> {};
-struct ResizeTestFxFy    : public TestWithParam<std::tuple<compare_f, int, int, cv::Size, double, double, cv::GCompileArgs>> {};
-struct Merge3Test        : public TestParams<std::tuple<cv::Size, cv::GCompileArgs>> {};
-struct Merge4Test        : public TestParams<std::tuple<cv::Size, cv::GCompileArgs>> {};
-struct RemapTest         : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct FlipTest          : public TestParams<std::tuple<int, int, cv::Size,bool, cv::GCompileArgs>> {};
-struct CropTest          : public TestParams<std::tuple<int,cv::Rect,cv::Size,bool, cv::GCompileArgs>> {};
-struct ConcatHorTest     : public TestWithParam<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct ConcatVertTest    : public TestWithParam<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct ConcatVertVecTest : public TestWithParam<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct ConcatHorVecTest  : public TestWithParam<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct LUTTest           : public TestParams<std::tuple<int, int, cv::Size,bool, cv::GCompileArgs>> {};
-struct ConvertToTest     : public TestParams<std::tuple<int, int, cv::Size, cv::GCompileArgs>> {};
-struct PhaseTest         : public TestParams<std::tuple<int, cv::Size, bool, cv::GCompileArgs>> {};
-struct SqrtTest          : public TestParams<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct NormalizeTest : public TestParams<std::tuple<compare_f,MatType,cv::Size,double,double,int,MatType,bool,cv::GCompileArgs>> {};
+// FIXME: move all tests from this fixture to the base class once all issues are resolved
+struct BackendOutputAllocationLargeSizeWithCorrectSubmatrixTest : BackendOutputAllocationTest {};
+GAPI_TEST_FIXTURE(ReInitOutTest, initNothing, <cv::Size>, 1, out_sz)
 } // opencv_test
 
 #endif //OPENCV_GAPI_CORE_TESTS_HPP
index bf0ac98..5b22ef9 100644 (file)
@@ -2,13 +2,13 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CORE_TESTS_INL_HPP
 #define OPENCV_GAPI_CORE_TESTS_INL_HPP
 
-#include "opencv2/gapi/core.hpp"
+#include <opencv2/gapi/core.hpp>
 #include "gapi_core_tests.hpp"
 
 namespace opencv_test
@@ -16,15 +16,6 @@ namespace opencv_test
 
 TEST_P(MathOpTest, MatricesAccuracyTest )
 {
-    mathOp opType = ADD;
-    int type = 0, dtype = 0;
-    cv::Size sz;
-    double scale = 1; // mul, div
-    bool testWithScalar = false, initOutMatr = false, doReverseOp = false;
-    cv::GCompileArgs compile_args;
-    std::tie(opType, testWithScalar, type, scale, sz, dtype, initOutMatr, doReverseOp, compile_args) = GetParam();
-    initMatsRandU(type, sz, dtype, initOutMatr);
-
     // G-API code & corresponding OpenCV code ////////////////////////////////
     cv::GMat in1, in2, out;
     if( testWithScalar )
@@ -82,7 +73,7 @@ TEST_P(MathOpTest, MatricesAccuracyTest )
         }
         }
         cv::GComputation c(GIn(in1, sc1), GOut(out));
-        c.apply(gin(in_mat1, sc), gout(out_mat_gapi), std::move(compile_args));
+        c.apply(gin(in_mat1, sc), gout(out_mat_gapi), getCompileArgs());
     }
     else
     {
@@ -118,7 +109,7 @@ TEST_P(MathOpTest, MatricesAccuracyTest )
             FAIL() << "no such math operation type for matrix and matrix!";
         }}
         cv::GComputation c(GIn(in1, in2), GOut(out));
-        c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+        c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -148,22 +139,14 @@ TEST_P(MathOpTest, MatricesAccuracyTest )
 
 TEST_P(MulDoubleTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    int dtype = std::get<2>(param);
-    cv::Size sz_in = std::get<1>(param);
-    bool initOut = std::get<3>(param);
-
     auto& rng = cv::theRNG();
     double d = rng.uniform(0.0, 10.0);
-    auto compile_args = std::get<4>(param);
-    initMatrixRandU(type, sz_in, dtype, initOut);
 
     // G-API code ////////////////////////////////////////////////////////////
     cv::GMat in1, out;
     out = cv::gapi::mulC(in1, d, dtype);
     cv::GComputation c(in1, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
 
     // OpenCV code ///////////////////////////////////////////////////////////
     cv::multiply(in_mat1, d, out_mat_ocv, 1, dtype);
@@ -187,26 +170,19 @@ TEST_P(MulDoubleTest, AccuracyTest)
 #else
     EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
 #endif
-    EXPECT_EQ(out_mat_gapi.size(), sz_in);
+    EXPECT_EQ(out_mat_gapi.size(), sz);
 }
 
 TEST_P(DivTest, DISABLED_DivByZeroTest)  // https://github.com/opencv/opencv/pull/12826
 {
-    int type = 0, dtype = 0;
-    cv::Size sz_in;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(type, sz_in, dtype, initOut, compile_args) = GetParam();
-
-    initMatrixRandU(type, sz_in, dtype, initOut);
-    in_mat2 = cv::Mat(sz_in, type);
+    in_mat2 = cv::Mat(sz, type);
     in_mat2.setTo(cv::Scalar::all(0));
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2;
     auto out = cv::gapi::div(in1, in2, 1.0, dtype);
     cv::GComputation c(GIn(in1, in2), GOut(out));
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -216,19 +192,12 @@ TEST_P(DivTest, DISABLED_DivByZeroTest)  // https://github.com/opencv/opencv/pul
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(DivCTest, DISABLED_DivByZeroTest)  // https://github.com/opencv/opencv/pull/12826
 {
-    int type = 0, dtype = 0;
-    cv::Size sz_in;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(type, sz_in, dtype, initOut, compile_args) = GetParam();
-
-    initMatrixRandU(type, sz_in, dtype, initOut);
     sc = cv::Scalar::all(0);
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -237,7 +206,7 @@ TEST_P(DivCTest, DISABLED_DivByZeroTest)  // https://github.com/opencv/opencv/pu
     auto out = cv::gapi::divC(in1, sc1, dtype);
     cv::GComputation c(GIn(in1, sc1), GOut(out));
 
-    c.apply(gin(in_mat1, sc), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, sc), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -247,19 +216,13 @@ TEST_P(DivCTest, DISABLED_DivByZeroTest)  // https://github.com/opencv/opencv/pu
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        cv::Mat zeros = cv::Mat::zeros(sz_in, type);
+        cv::Mat zeros = cv::Mat::zeros(sz, type);
         EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != zeros));
     }
 }
 
 TEST_P(MeanTest, AccuracyTest)
 {
-    int type = 0;
-    bool initOut = false;
-    cv::Size sz_in;
-    cv::GCompileArgs compile_args;
-    std::tie(type, sz_in, initOut, compile_args) = GetParam();
-    initMatrixRandU(type, sz_in, initOut);
     cv::Scalar out_norm;
     cv::Scalar out_norm_ocv;
 
@@ -268,7 +231,7 @@ TEST_P(MeanTest, AccuracyTest)
     auto out = cv::gapi::mean(in);
 
     cv::GComputation c(cv::GIn(in), cv::GOut(out));
-    c.apply(cv::gin(in_mat1), cv::gout(out_norm), std::move(compile_args));
+    c.apply(cv::gin(in_mat1), cv::gout(out_norm), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         out_norm_ocv = cv::mean(in_mat1);
@@ -281,14 +244,7 @@ TEST_P(MeanTest, AccuracyTest)
 
 TEST_P(MaskTest, AccuracyTest)
 {
-    int type = 0;
-    bool initOut = false;
-    cv::Size sz_in;
-    cv::GCompileArgs compile_args;
-    std::tie(type, sz_in, initOut, compile_args) = GetParam();
-    initMatrixRandU(type, sz_in, type, initOut);
-
-    in_mat2 = cv::Mat(sz_in, CV_8UC1);
+    in_mat2 = cv::Mat(sz, CV_8UC1);
     cv::randu(in_mat2, cv::Scalar::all(0), cv::Scalar::all(255));
     in_mat2 = in_mat2 > 128;
 
@@ -297,7 +253,7 @@ TEST_P(MaskTest, AccuracyTest)
     auto out = cv::gapi::mask(in, m);
 
     cv::GComputation c(cv::GIn(in, m), cv::GOut(out));
-    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), std::move(compile_args));
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         out_mat_ocv = cv::Mat::zeros(in_mat1.size(), in_mat1.type());
@@ -311,17 +267,12 @@ TEST_P(MaskTest, AccuracyTest)
 
 TEST_P(Polar2CartTest, AccuracyTest)
 {
-    auto param = GetParam();
-    cv::Size sz_in = std::get<0>(param);
-    auto compile_args = std::get<2>(param);
-    initMatsRandU(CV_32FC1, sz_in, CV_32FC1, std::get<1>(param));
-
     cv::Mat out_mat2;
     cv::Mat out_mat_ocv2;
-    if(std::get<1>(param) == true)
+    if (dtype != -1)
     {
-        out_mat2 = cv::Mat(sz_in, CV_32FC1);
-        out_mat_ocv2 = cv::Mat(sz_in, CV_32FC1);
+        out_mat2 = cv::Mat(sz, dtype);
+        out_mat_ocv2 = cv::Mat(sz, dtype);
     }
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -329,7 +280,7 @@ TEST_P(Polar2CartTest, AccuracyTest)
     std::tie(out1, out2) = cv::gapi::polarToCart(in1, in2);
 
     cv::GComputation c(GIn(in1, in2), GOut(out1, out2));
-    c.apply(gin(in_mat1,in_mat2), gout(out_mat_gapi, out_mat2), std::move(compile_args));
+    c.apply(gin(in_mat1,in_mat2), gout(out_mat_gapi, out_mat2), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::polarToCart(in_mat1, in_mat2, out_mat_ocv, out_mat_ocv2);
@@ -361,19 +312,14 @@ TEST_P(Polar2CartTest, AccuracyTest)
 
         EXPECT_EQ(0, cv::countNonZero(difx > 1e-6*absx));
         EXPECT_EQ(0, cv::countNonZero(dify > 1e-6*absy));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(Cart2PolarTest, AccuracyTest)
 {
-    auto param = GetParam();
-    cv::Size sz_in = std::get<0>(param);
-    auto compile_args = std::get<2>(param);
-    initMatsRandU(CV_32FC1, sz_in, CV_32FC1, std::get<1>(param));
-
-    cv::Mat out_mat2(sz_in, CV_32FC1);
-    cv::Mat out_mat_ocv2(sz_in, CV_32FC1);
+    cv::Mat out_mat2(sz, dtype);
+    cv::Mat out_mat_ocv2(sz, dtype);
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2, out1, out2;
@@ -414,20 +360,12 @@ TEST_P(Cart2PolarTest, AccuracyTest)
         //        (expected relative accuracy like 1e-6)
         EXPECT_EQ(0, cv::countNonZero(difm > 1e-6*absm));
         EXPECT_EQ(0, cv::countNonZero(difa > 1e-3*absa));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(CmpTest, AccuracyTest)
 {
-    CmpTypes opType = CMP_EQ;
-    int type = 0;
-    cv::Size sz;
-    bool testWithScalar = false, initOutMatr = false;
-    cv::GCompileArgs compile_args;
-    std::tie(opType, testWithScalar, type, sz, initOutMatr, compile_args) = GetParam();
-    initMatsRandU(type, sz, CV_8U, initOutMatr);
-
     // G-API code & corresponding OpenCV code ////////////////////////////////
     cv::GMat in1, out;
     if( testWithScalar )
@@ -447,7 +385,7 @@ TEST_P(CmpTest, AccuracyTest)
         cv::compare(in_mat1, sc, out_mat_ocv, opType);
 
         cv::GComputation c(GIn(in1, in2), GOut(out));
-        c.apply(gin(in_mat1, sc), gout(out_mat_gapi), std::move(compile_args));
+        c.apply(gin(in_mat1, sc), gout(out_mat_gapi), getCompileArgs());
     }
     else
     {
@@ -466,7 +404,7 @@ TEST_P(CmpTest, AccuracyTest)
         cv::compare(in_mat1, in_mat2, out_mat_ocv, opType);
 
         cv::GComputation c(GIn(in1, in2), GOut(out));
-        c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+        c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -478,14 +416,6 @@ TEST_P(CmpTest, AccuracyTest)
 
 TEST_P(BitwiseTest, AccuracyTest)
 {
-    bitwiseOp opType = AND;
-    int type = 0;
-    cv::Size sz;
-    bool initOutMatr = false;
-    cv::GCompileArgs compile_args;
-    std::tie(opType, type, sz, initOutMatr, compile_args) = GetParam();
-    initMatsRandU(type, sz, type, initOutMatr);
-
     // G-API code & corresponding OpenCV code ////////////////////////////////
     cv::GMat in1, in2, out;
     switch(opType)
@@ -514,7 +444,7 @@ TEST_P(BitwiseTest, AccuracyTest)
         }
     }
     cv::GComputation c(GIn(in1, in2), GOut(out));
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
 
     // Comparison //////////////////////////////////////////////////////////////
     {
@@ -525,17 +455,12 @@ TEST_P(BitwiseTest, AccuracyTest)
 
 TEST_P(NotTest, AccuracyTest)
 {
-    auto param = GetParam();
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<3>(param);
-    initMatrixRandU(std::get<0>(param), sz_in, std::get<0>(param), std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::bitwise_not(in);
     cv::GComputation c(in, out);
 
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -544,18 +469,13 @@ TEST_P(NotTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(SelectTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<3>(param);
-    initMatsRandU(type, sz_in, type, std::get<2>(param));
-    cv::Mat in_mask(sz_in, CV_8UC1);
+    cv::Mat in_mask(sz, CV_8UC1);
     cv::randu(in_mask, cv::Scalar::all(0), cv::Scalar::all(255));
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -563,7 +483,7 @@ TEST_P(SelectTest, AccuracyTest)
     auto out = cv::gapi::select(in1, in2, in3);
     cv::GComputation c(GIn(in1, in2, in3), GOut(out));
 
-    c.apply(gin(in_mat1, in_mat2, in_mask), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2, in_mask), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -573,23 +493,18 @@ TEST_P(SelectTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(MinTest, AccuracyTest)
 {
-    auto param = GetParam();
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<3>(param);
-    initMatsRandU(std::get<0>(param), sz_in, std::get<0>(param), std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2;
     auto out = cv::gapi::min(in1, in2);
     cv::GComputation c(GIn(in1, in2), GOut(out));
 
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -598,23 +513,18 @@ TEST_P(MinTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(MaxTest, AccuracyTest)
 {
-    auto param = GetParam();
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<3>(param);
-    initMatsRandU(std::get<0>(param), sz_in, std::get<0>(param), std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2;
     auto out = cv::gapi::max(in1, in2);
     cv::GComputation c(GIn(in1, in2), GOut(out));
 
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -623,23 +533,18 @@ TEST_P(MaxTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(AbsDiffTest, AccuracyTest)
 {
-    auto param = GetParam();
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<3>(param);
-    initMatsRandU(std::get<0>(param), sz_in, std::get<0>(param), std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2;
     auto out = cv::gapi::absDiff(in1, in2);
     cv::GComputation c(GIn(in1, in2), GOut(out));
 
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -648,24 +553,19 @@ TEST_P(AbsDiffTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(AbsDiffCTest, AccuracyTest)
 {
-    auto param = GetParam();
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<3>(param);
-    initMatsRandU(std::get<0>(param), sz_in, std::get<0>(param), std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1;
     cv::GScalar sc1;
     auto out = cv::gapi::absDiffC(in1, sc1);
     cv::GComputation c(cv::GIn(in1, sc1), cv::GOut(out));
 
-    c.apply(gin(in_mat1, sc), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, sc), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -674,20 +574,12 @@ TEST_P(AbsDiffCTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(SumTest, AccuracyTest)
 {
-    auto param = GetParam();
-    compare_scalar_f cmpF = get<3>(GetParam());
-    MatType type = std::get<0>(param);
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<4>(param);
-    initMatrixRandU(type, sz_in, type, std::get<2>(param));
-
-
     cv::Scalar out_sum;
     cv::Scalar out_sum_ocv;
 
@@ -696,7 +588,7 @@ TEST_P(SumTest, AccuracyTest)
     auto out = cv::gapi::sum(in);
 
     cv::GComputation c(cv::GIn(in), cv::GOut(out));
-    c.apply(cv::gin(in_mat1), cv::gout(out_sum), std::move(compile_args));
+    c.apply(cv::gin(in_mat1), cv::gout(out_sum), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         out_sum_ocv = cv::sum(in_mat1);
@@ -709,25 +601,17 @@ TEST_P(SumTest, AccuracyTest)
 
 TEST_P(AddWeightedTest, AccuracyTest)
 {
-    int type = 0, dtype = 0;
-    cv::Size sz_in;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    compare_f cmpF;
-    std::tie(type, sz_in, dtype, initOut, cmpF, compile_args) = GetParam();
-
     auto& rng = cv::theRNG();
     double alpha = rng.uniform(0.0, 1.0);
     double beta = rng.uniform(0.0, 1.0);
     double gamma = rng.uniform(0.0, 1.0);
-    initMatsRandU(type, sz_in, dtype, initOut);
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2;
     auto out = cv::gapi::addWeighted(in1, alpha, in2, beta, gamma, dtype);
     cv::GComputation c(GIn(in1, in2), GOut(out));
 
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -735,20 +619,11 @@ TEST_P(AddWeightedTest, AccuracyTest)
     }
     // Comparison //////////////////////////////////////////////////////////////
     EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-    EXPECT_EQ(out_mat_gapi.size(), sz_in);
-
+    EXPECT_EQ(out_mat_gapi.size(), sz);
 }
 
 TEST_P(NormTest, AccuracyTest)
 {
-    compare_scalar_f cmpF;
-    NormTypes opType = NORM_INF;
-    int type = 0;
-    cv::Size sz;
-    cv::GCompileArgs compile_args;
-    std::tie(opType, type, sz, cmpF, compile_args) = GetParam();
-    initMatrixRandU(type, sz, type, false);
-
     cv::Scalar out_norm;
     cv::Scalar out_norm_ocv;
 
@@ -764,7 +639,7 @@ TEST_P(NormTest, AccuracyTest)
     }
     out_norm_ocv = cv::norm(in_mat1, opType);
     cv::GComputation c(GIn(in1), GOut(out));
-    c.apply(gin(in_mat1), gout(out_norm), std::move(compile_args));
+    c.apply(gin(in_mat1), gout(out_norm), getCompileArgs());
 
     // Comparison //////////////////////////////////////////////////////////////
     {
@@ -774,16 +649,12 @@ TEST_P(NormTest, AccuracyTest)
 
 TEST_P(IntegralTest, AccuracyTest)
 {
-    int type = std::get<0>(GetParam());
-    cv::Size sz_in = std::get<1>(GetParam());
-    auto compile_args = std::get<2>(GetParam());
-
     int type_out = (type == CV_8U) ? CV_32SC1 : CV_64FC1;
-    cv::Mat in_mat1(sz_in, type);
+    in_mat1 = cv::Mat(sz, type);
 
     cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
 
-    cv::Size sz_out = cv::Size(sz_in.width + 1, sz_in.height + 1);
+    cv::Size sz_out = cv::Size(sz.width + 1, sz.height + 1);
     cv::Mat out_mat1(sz_out, type_out);
     cv::Mat out_mat_ocv1(sz_out, type_out);
 
@@ -795,7 +666,7 @@ TEST_P(IntegralTest, AccuracyTest)
     std::tie(out1, out2)  = cv::gapi::integral(in1, type_out, CV_64FC1);
     cv::GComputation c(cv::GIn(in1), cv::GOut(out1, out2));
 
-    c.apply(cv::gin(in_mat1), cv::gout(out_mat1, out_mat2), std::move(compile_args));
+    c.apply(cv::gin(in_mat1), cv::gout(out_mat1, out_mat2), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -810,15 +681,8 @@ TEST_P(IntegralTest, AccuracyTest)
 
 TEST_P(ThresholdTest, AccuracyTestBinary)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_in = std::get<1>(param);
-    int tt = std::get<2>(param);
-
-    auto compile_args = std::get<4>(param);
     cv::Scalar thr = initScalarRandU(50);
     cv::Scalar maxval = initScalarRandU(50) + cv::Scalar(50, 50, 50, 50);
-    initMatrixRandU(type, sz_in, type, std::get<3>(param));
     cv::Scalar out_scalar;
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -827,7 +691,7 @@ TEST_P(ThresholdTest, AccuracyTestBinary)
     out = cv::gapi::threshold(in1, th1, mv1, tt);
     cv::GComputation c(GIn(in1, th1, mv1), GOut(out));
 
-    c.apply(gin(in_mat1, thr, maxval), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, thr, maxval), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -835,21 +699,14 @@ TEST_P(ThresholdTest, AccuracyTestBinary)
     }
     // Comparison //////////////////////////////////////////////////////////////
     {
-        ASSERT_EQ(out_mat_gapi.size(), sz_in);
+        ASSERT_EQ(out_mat_gapi.size(), sz);
         EXPECT_EQ(0, cv::norm(out_mat_ocv, out_mat_gapi, NORM_L1));
     }
 }
 
 TEST_P(ThresholdOTTest, AccuracyTestOtsu)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_in = std::get<1>(param);
-    int tt = std::get<2>(param);
-
-    auto compile_args = std::get<4>(param);
     cv::Scalar maxval = initScalarRandU(50) + cv::Scalar(50, 50, 50, 50);
-    initMatrixRandU(type, sz_in, type, std::get<3>(param));
     cv::Scalar out_gapi_scalar;
     double ocv_res;
 
@@ -859,7 +716,7 @@ TEST_P(ThresholdOTTest, AccuracyTestOtsu)
     std::tie<cv::GMat, cv::GScalar>(out, scout) = cv::gapi::threshold(in1, mv1, tt);
     cv::GComputation c(cv::GIn(in1, mv1), cv::GOut(out, scout));
 
-    c.apply(gin(in_mat1, maxval), gout(out_mat_gapi, out_gapi_scalar), std::move(compile_args));
+    c.apply(gin(in_mat1, maxval), gout(out_mat_gapi, out_gapi_scalar), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -868,21 +725,15 @@ TEST_P(ThresholdOTTest, AccuracyTestOtsu)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
         EXPECT_EQ(ocv_res, out_gapi_scalar.val[0]);
     }
 }
 
 TEST_P(InRangeTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_in = std::get<1>(param);
-
-    auto compile_args = std::get<3>(param);
     cv::Scalar thrLow = initScalarRandU(100);
     cv::Scalar thrUp = initScalarRandU(100) + cv::Scalar(100, 100, 100, 100);
-    initMatrixRandU(type, sz_in, type, std::get<2>(param));
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1;
@@ -890,7 +741,7 @@ TEST_P(InRangeTest, AccuracyTest)
     auto out = cv::gapi::inRange(in1, th1, mv1);
     cv::GComputation c(GIn(in1, th1, mv1), GOut(out));
 
-    c.apply(gin(in_mat1, thrLow, thrUp), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, thrLow, thrUp), gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -899,26 +750,22 @@ TEST_P(InRangeTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(Split3Test, AccuracyTest)
 {
-    cv::Size sz_in = std::get<0>(GetParam());
-    auto compile_args = std::get<1>(GetParam());
-    initMatrixRandU(CV_8UC3, sz_in, CV_8UC1);
-
-    cv::Mat out_mat2 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat3 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat_ocv2 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat_ocv3 = cv::Mat(sz_in, CV_8UC1);
+    cv::Mat out_mat2 = cv::Mat(sz, dtype);
+    cv::Mat out_mat3 = cv::Mat(sz, dtype);
+    cv::Mat out_mat_ocv2 = cv::Mat(sz, dtype);
+    cv::Mat out_mat_ocv3 = cv::Mat(sz, dtype);
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, out1, out2, out3;
     std::tie(out1, out2, out3)  = cv::gapi::split3(in1);
     cv::GComputation c(cv::GIn(in1), cv::GOut(out1, out2, out3));
 
-    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi, out_mat2, out_mat3), std::move(compile_args));
+    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi, out_mat2, out_mat3), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         std::vector<cv::Mat> out_mats_ocv = {out_mat_ocv, out_mat_ocv2, out_mat_ocv3};
@@ -934,22 +781,19 @@ TEST_P(Split3Test, AccuracyTest)
 
 TEST_P(Split4Test, AccuracyTest)
 {
-    cv::Size sz_in = std::get<0>(GetParam());
-    auto compile_args = std::get<1>(GetParam());
-    initMatrixRandU(CV_8UC4, sz_in, CV_8UC1);
-    cv::Mat out_mat2 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat3 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat4 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat_ocv2 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat_ocv3 = cv::Mat(sz_in, CV_8UC1);
-    cv::Mat out_mat_ocv4 = cv::Mat(sz_in, CV_8UC1);
+    cv::Mat out_mat2 = cv::Mat(sz, dtype);
+    cv::Mat out_mat3 = cv::Mat(sz, dtype);
+    cv::Mat out_mat4 = cv::Mat(sz, dtype);
+    cv::Mat out_mat_ocv2 = cv::Mat(sz, dtype);
+    cv::Mat out_mat_ocv3 = cv::Mat(sz, dtype);
+    cv::Mat out_mat_ocv4 = cv::Mat(sz, dtype);
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, out1, out2, out3, out4;
     std::tie(out1, out2, out3, out4)  = cv::gapi::split4(in1);
     cv::GComputation c(cv::GIn(in1), cv::GOut(out1, out2, out3, out4));
 
-    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi, out_mat2, out_mat3, out_mat4), std::move(compile_args));
+    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi, out_mat2, out_mat3, out_mat4), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         std::vector<cv::Mat> out_mats_ocv = {out_mat_ocv, out_mat_ocv2, out_mat_ocv3, out_mat_ocv4};
@@ -964,7 +808,8 @@ TEST_P(Split4Test, AccuracyTest)
     }
 }
 
-static void ResizeAccuracyTest(compare_f cmpF, int type, int interp, cv::Size sz_in, cv::Size sz_out, double fx, double fy, cv::GCompileArgs&& compile_args)
+static void ResizeAccuracyTest(const CompareMats& cmpF, int type, int interp, cv::Size sz_in,
+    cv::Size sz_out, double fx, double fy, cv::GCompileArgs&& compile_args)
 {
     cv::Mat in_mat1 (sz_in, type );
     cv::Scalar mean = cv::Scalar::all(127);
@@ -996,31 +841,48 @@ static void ResizeAccuracyTest(compare_f cmpF, int type, int interp, cv::Size sz
 
 TEST_P(ResizeTest, AccuracyTest)
 {
-    compare_f cmpF;
-    int type = 0, interp = 0;
-    cv::Size sz_in, sz_out;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, interp, sz_in, sz_out, compile_args) = GetParam();
-    ResizeAccuracyTest(cmpF, type, interp, sz_in, sz_out, 0.0, 0.0, std::move(compile_args));
+    ResizeAccuracyTest(cmpF, type, interp, sz, sz_out, 0.0, 0.0, getCompileArgs());
 }
 
 TEST_P(ResizeTestFxFy, AccuracyTest)
 {
-    compare_f cmpF;
-    int type = 0, interp = 0;
-    cv::Size sz_in;
-    double fx = 0.0, fy = 0.0;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, interp, sz_in, fx, fy, compile_args) = GetParam();
-    ResizeAccuracyTest(cmpF, type, interp, sz_in, cv::Size{0, 0}, fx, fy, std::move(compile_args));
+    ResizeAccuracyTest(cmpF, type, interp, sz, cv::Size{0, 0}, fx, fy, getCompileArgs());
+}
+
+TEST_P(ResizePTest, AccuracyTest)
+{
+    constexpr int planeNum = 3;
+    cv::Size sz_in_p {sz.width,  sz.height*planeNum};
+    cv::Size sz_out_p{sz_out.width, sz_out.height*planeNum};
+
+    cv::Mat in_mat(sz_in_p, CV_8UC1);
+    cv::randn(in_mat, cv::Scalar::all(127.0f), cv::Scalar::all(40.f));
+
+    cv::Mat out_mat    (sz_out_p, CV_8UC1);
+    cv::Mat out_mat_ocv_p(sz_out_p, CV_8UC1);
+
+    cv::GMatP in;
+    auto out = cv::gapi::resizeP(in, sz_out, interp);
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+
+    c.compile(cv::descr_of(in_mat).asPlanar(planeNum), getCompileArgs())
+             (cv::gin(in_mat), cv::gout(out_mat));
+
+    for (int i = 0; i < planeNum; i++) {
+        const cv::Mat in_mat_roi = in_mat(cv::Rect(0, i*sz.height,  sz.width,  sz.height));
+        cv::Mat out_mat_roi = out_mat_ocv_p(cv::Rect(0, i*sz_out.height, sz_out.width, sz_out.height));
+        cv::resize(in_mat_roi, out_mat_roi, sz_out, 0, 0, interp);
+    }
+
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat, out_mat_ocv_p));
+    }
 }
 
 TEST_P(Merge3Test, AccuracyTest)
 {
-    cv::Size sz_in = std::get<0>(GetParam());
-    initMatsRandU(CV_8UC1, sz_in, CV_8UC3);
-    auto compile_args = std::get<1>(GetParam());
-    cv::Mat in_mat3(sz_in,  CV_8UC1);
+    cv::Mat in_mat3(sz, type);
     cv::Scalar mean = cv::Scalar::all(127);
     cv::Scalar stddev = cv::Scalar::all(40.f);
 
@@ -1031,7 +893,7 @@ TEST_P(Merge3Test, AccuracyTest)
     auto out = cv::gapi::merge3(in1, in2, in3);
 
     cv::GComputation c(cv::GIn(in1, in2, in3), cv::GOut(out));
-    c.apply(cv::gin(in_mat1, in_mat2, in_mat3), cv::gout(out_mat_gapi), std::move(compile_args));
+    c.apply(cv::gin(in_mat1, in_mat2, in_mat3), cv::gout(out_mat_gapi), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         std::vector<cv::Mat> in_mats_ocv = {in_mat1, in_mat2, in_mat3};
@@ -1045,11 +907,8 @@ TEST_P(Merge3Test, AccuracyTest)
 
 TEST_P(Merge4Test, AccuracyTest)
 {
-    cv::Size sz_in = std::get<0>(GetParam());
-    initMatsRandU(CV_8UC1, sz_in, CV_8UC4);
-    auto compile_args = std::get<1>(GetParam());
-    cv::Mat in_mat3(sz_in,  CV_8UC1);
-    cv::Mat in_mat4(sz_in,  CV_8UC1);
+    cv::Mat in_mat3(sz, type);
+    cv::Mat in_mat4(sz, type);
     cv::Scalar mean = cv::Scalar::all(127);
     cv::Scalar stddev = cv::Scalar::all(40.f);
 
@@ -1061,7 +920,7 @@ TEST_P(Merge4Test, AccuracyTest)
     auto out = cv::gapi::merge4(in1, in2, in3, in4);
 
     cv::GComputation c(cv::GIn(in1, in2, in3, in4), cv::GOut(out));
-    c.apply(cv::gin(in_mat1, in_mat2, in_mat3, in_mat4), cv::gout(out_mat_gapi), std::move(compile_args));
+    c.apply(cv::gin(in_mat1, in_mat2, in_mat3, in_mat4), cv::gout(out_mat_gapi), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         std::vector<cv::Mat> in_mats_ocv = {in_mat1, in_mat2, in_mat3, in_mat4};
@@ -1075,12 +934,7 @@ TEST_P(Merge4Test, AccuracyTest)
 
 TEST_P(RemapTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_in = std::get<1>(param);
-    auto compile_args = std::get<3>(param);
-    initMatrixRandU(type, sz_in, type, std::get<2>(param));
-    cv::Mat in_map1(sz_in,  CV_16SC2);
+    cv::Mat in_map1(sz, CV_16SC2);
     cv::Mat in_map2 = cv::Mat();
     cv::randu(in_map1, cv::Scalar::all(0), cv::Scalar::all(255));
     cv::Scalar bv = cv::Scalar();
@@ -1090,7 +944,7 @@ TEST_P(RemapTest, AccuracyTest)
     auto out = cv::gapi::remap(in1, in_map1, in_map2, cv::INTER_NEAREST,  cv::BORDER_REPLICATE, bv);
     cv::GComputation c(in1, out);
 
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -1099,25 +953,18 @@ TEST_P(RemapTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(FlipTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    int flipCode =  std::get<1>(param);
-    cv::Size sz_in = std::get<2>(param);
-    initMatrixRandU(type, sz_in, type, false);
-    auto compile_args = std::get<4>(GetParam());
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::flip(in, flipCode);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::flip(in_mat1, out_mat_ocv, flipCode);
@@ -1125,24 +972,17 @@ TEST_P(FlipTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(CropTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Rect rect_to = std::get<1>(param);
-    cv::Size sz_in = std::get<2>(param);
-    auto compile_args = std::get<4>(param);
-
-    initMatrixRandU(type, sz_in, type, false);
     cv::Size sz_out = cv::Size(rect_to.width, rect_to.height);
-    if( std::get<3>(param) == true )
+    if (dtype != -1)
     {
-        out_mat_gapi = cv::Mat(sz_out, type);
-        out_mat_ocv = cv::Mat(sz_out, type);
+        out_mat_gapi = cv::Mat(sz_out, dtype);
+        out_mat_ocv = cv::Mat(sz_out, dtype);
     }
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -1150,7 +990,7 @@ TEST_P(CropTest, AccuracyTest)
     auto out = cv::gapi::crop(in, rect_to);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::Mat(in_mat1, rect_to).copyTo(out_mat_ocv);
@@ -1164,17 +1004,14 @@ TEST_P(CropTest, AccuracyTest)
 
 TEST_P(ConcatHorTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_out = std::get<1>(param);
-    auto compile_args = std::get<2>(param);
+    cv::Size sz_out = sz;
 
     int wpart = sz_out.width / 4;
     cv::Size sz_in1 = cv::Size(wpart, sz_out.height);
     cv::Size sz_in2 = cv::Size(sz_out.width - wpart, sz_out.height);
 
-    cv::Mat in_mat1 (sz_in1, type );
-    cv::Mat in_mat2 (sz_in2, type);
+    in_mat1 = cv::Mat(sz_in1, type );
+    in_mat2 = cv::Mat(sz_in2, type);
     cv::Scalar mean = cv::Scalar::all(127);
     cv::Scalar stddev = cv::Scalar::all(40.f);
 
@@ -1182,17 +1019,17 @@ TEST_P(ConcatHorTest, AccuracyTest)
     cv::randn(in_mat2, mean, stddev);
 
     cv::Mat out_mat(sz_out, type);
-    cv::Mat out_mat_ocv(sz_out, type);
+    out_mat_ocv = cv::Mat(sz_out, type);
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2;
     auto out = cv::gapi::concatHor(in1, in2);
 
     cv::GComputation c(GIn(in1, in2), GOut(out));
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
-        cv::hconcat(in_mat1, in_mat2, out_mat_ocv );
+        cv::hconcat(in_mat1, in_mat2, out_mat_ocv);
     }
     // Comparison //////////////////////////////////////////////////////////////
     {
@@ -1202,17 +1039,14 @@ TEST_P(ConcatHorTest, AccuracyTest)
 
 TEST_P(ConcatVertTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_out = std::get<1>(param);
-    auto compile_args = std::get<2>(param);
+    cv::Size sz_out = sz;
 
     int hpart = sz_out.height * 2/3;
     cv::Size sz_in1 = cv::Size(sz_out.width, hpart);
     cv::Size sz_in2 = cv::Size(sz_out.width, sz_out.height - hpart);
 
-    cv::Mat in_mat1 (sz_in1, type);
-    cv::Mat in_mat2 (sz_in2, type);
+    in_mat1 = cv::Mat(sz_in1, type);
+    in_mat2 = cv::Mat(sz_in2, type);
     cv::Scalar mean = cv::Scalar::all(127);
     cv::Scalar stddev = cv::Scalar::all(40.f);
 
@@ -1220,14 +1054,14 @@ TEST_P(ConcatVertTest, AccuracyTest)
     cv::randn(in_mat2, mean, stddev);
 
     cv::Mat out_mat(sz_out, type);
-    cv::Mat out_mat_ocv(sz_out, type);
+    out_mat_ocv = cv::Mat(sz_out, type);
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in1, in2;
     auto out = cv::gapi::concatVert(in1, in2);
 
     cv::GComputation c(GIn(in1, in2), GOut(out));
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::vconcat(in_mat1, in_mat2, out_mat_ocv );
@@ -1240,10 +1074,7 @@ TEST_P(ConcatVertTest, AccuracyTest)
 
 TEST_P(ConcatVertVecTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_out = std::get<1>(param);
-    auto compile_args = std::get<2>(param);
+    cv::Size sz_out = sz;
 
     int hpart1 = sz_out.height * 2/5;
     int hpart2 = sz_out.height / 5;
@@ -1251,9 +1082,9 @@ TEST_P(ConcatVertVecTest, AccuracyTest)
     cv::Size sz_in2 = cv::Size(sz_out.width, hpart2);
     cv::Size sz_in3 = cv::Size(sz_out.width, sz_out.height - hpart1 - hpart2);
 
-    cv::Mat in_mat1 (sz_in1, type);
-    cv::Mat in_mat2 (sz_in2, type);
-    cv::Mat in_mat3 (sz_in3, type);
+    in_mat1 = cv::Mat(sz_in1, type);
+    in_mat2 = cv::Mat(sz_in2, type);
+    cv::Mat in_mat3(sz_in3, type);
     cv::Scalar mean = cv::Scalar::all(127);
     cv::Scalar stddev = cv::Scalar::all(40.f);
 
@@ -1262,7 +1093,7 @@ TEST_P(ConcatVertVecTest, AccuracyTest)
     cv::randn(in_mat3, mean, stddev);
 
     cv::Mat out_mat(sz_out, type);
-    cv::Mat out_mat_ocv(sz_out, type);
+    out_mat_ocv = cv::Mat(sz_out, type);
 
     // G-API code //////////////////////////////////////////////////////////////
     std::vector <cv::GMat> mats(3);
@@ -1271,7 +1102,7 @@ TEST_P(ConcatVertVecTest, AccuracyTest)
     std::vector <cv::Mat> cvmats = {in_mat1, in_mat2, in_mat3};
 
     cv::GComputation c({mats[0], mats[1], mats[2]}, {out});
-    c.apply(gin(in_mat1, in_mat2, in_mat3), gout(out_mat), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2, in_mat3), gout(out_mat), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -1285,10 +1116,7 @@ TEST_P(ConcatVertVecTest, AccuracyTest)
 
 TEST_P(ConcatHorVecTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type = std::get<0>(param);
-    cv::Size sz_out = std::get<1>(param);
-    auto compile_args = std::get<2>(param);
+    cv::Size sz_out = sz;
 
     int wpart1 = sz_out.width / 3;
     int wpart2 = sz_out.width / 4;
@@ -1296,8 +1124,8 @@ TEST_P(ConcatHorVecTest, AccuracyTest)
     cv::Size sz_in2 = cv::Size(wpart2, sz_out.height);
     cv::Size sz_in3 = cv::Size(sz_out.width - wpart1 - wpart2, sz_out.height);
 
-    cv::Mat in_mat1 (sz_in1, type);
-    cv::Mat in_mat2 (sz_in2, type);
+    in_mat1 = cv::Mat(sz_in1, type);
+    in_mat2 = cv::Mat(sz_in2, type);
     cv::Mat in_mat3 (sz_in3, type);
     cv::Scalar mean = cv::Scalar::all(127);
     cv::Scalar stddev = cv::Scalar::all(40.f);
@@ -1307,7 +1135,7 @@ TEST_P(ConcatHorVecTest, AccuracyTest)
     cv::randn(in_mat3, mean, stddev);
 
     cv::Mat out_mat(sz_out, type);
-    cv::Mat out_mat_ocv(sz_out, type);
+    out_mat_ocv = cv::Mat(sz_out, type);
 
     // G-API code //////////////////////////////////////////////////////////////
     std::vector <cv::GMat> mats(3);
@@ -1316,7 +1144,7 @@ TEST_P(ConcatHorVecTest, AccuracyTest)
     std::vector <cv::Mat> cvmats = {in_mat1, in_mat2, in_mat3};
 
     cv::GComputation c({mats[0], mats[1], mats[2]}, {out});
-    c.apply(gin(in_mat1, in_mat2, in_mat3), gout(out_mat), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2, in_mat3), gout(out_mat), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -1330,14 +1158,11 @@ TEST_P(ConcatHorVecTest, AccuracyTest)
 
 TEST_P(LUTTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type_mat = std::get<0>(param);
-    int type_lut = std::get<1>(param);
+    int type_mat = type;
+    int type_lut = dtype;
     int type_out = CV_MAKETYPE(CV_MAT_DEPTH(type_lut), CV_MAT_CN(type_mat));
-    cv::Size sz_in = std::get<2>(param);
-    auto compile_args = std::get<4>(GetParam());
 
-    initMatrixRandU(type_mat, sz_in, type_out);
+    initMatrixRandU(type_mat, sz, type_out);
     cv::Size sz_lut = cv::Size(1, 256);
     cv::Mat in_lut(sz_lut, type_lut);
     cv::randu(in_lut, cv::Scalar::all(0), cv::Scalar::all(255));
@@ -1347,7 +1172,7 @@ TEST_P(LUTTest, AccuracyTest)
     auto out = cv::gapi::LUT(in, in_lut);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::LUT(in_mat1, in_lut, out_mat_ocv);
@@ -1355,52 +1180,42 @@ TEST_P(LUTTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(ConvertToTest, AccuracyTest)
 {
-    auto param = GetParam();
-    int type_mat = std::get<0>(param);
-    int depth_to = std::get<1>(param);
-    cv::Size sz_in = std::get<2>(param);
+    int type_mat = type;
+    int depth_to = dtype;
     int type_out = CV_MAKETYPE(depth_to, CV_MAT_CN(type_mat));
-    initMatrixRandU(type_mat, sz_in, type_out);
-    auto compile_args = std::get<3>(GetParam());
+    initMatrixRandU(type_mat, sz, type_out);
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
-    auto out = cv::gapi::convertTo(in, depth_to);
+    auto out = cv::gapi::convertTo(in, depth_to, alpha, beta);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
-        in_mat1.convertTo(out_mat_ocv, depth_to);
+        in_mat1.convertTo(out_mat_ocv, depth_to, alpha, beta);
     }
     // Comparison //////////////////////////////////////////////////////////////
     {
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(PhaseTest, AccuracyTest)
 {
-    int img_type = -1;
-    cv::Size img_size;
-    bool angle_in_degrees = false;
-    cv::GCompileArgs compile_args;
-    std::tie(img_type, img_size, angle_in_degrees, compile_args) = GetParam();
-    initMatsRandU(img_type, img_size, img_type);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in_x, in_y;
     auto out = cv::gapi::phase(in_x, in_y, angle_in_degrees);
 
     cv::GComputation c(in_x, in_y, out);
-    c.apply(in_mat1, in_mat2, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, in_mat2, out_mat_gapi, getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     cv::phase(in_mat1, in_mat2, out_mat_ocv, angle_in_degrees);
@@ -1414,18 +1229,12 @@ TEST_P(PhaseTest, AccuracyTest)
 
 TEST_P(SqrtTest, AccuracyTest)
 {
-    int img_type = -1;
-    cv::Size img_size;
-    cv::GCompileArgs compile_args;
-    std::tie(img_type, img_size, compile_args) = GetParam();
-    initMatrixRandU(img_type, img_size, img_type);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::sqrt(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     cv::sqrt(in_mat1, out_mat_ocv);
@@ -1439,27 +1248,14 @@ TEST_P(SqrtTest, AccuracyTest)
 
 TEST_P(NormalizeTest, Test)
 {
-    auto param = GetParam();
-
-    compare_f cmpF;
-    MatType type, ddepth;
-    cv::Size sz;
-    double a = 0 , b = 0;
-    int norm_type = 0;
-    bool createOut = 0;
-    cv::GCompileArgs compile_args;
-
-    std::tie(cmpF, type, sz, a, b, norm_type, ddepth, createOut, compile_args) = GetParam();
-    int dtype = CV_MAKETYPE(ddepth, CV_MAT_CN(type));
-
-    initMatsRandN(type, sz, dtype, createOut);
+    initMatrixRandN(type, sz, CV_MAKETYPE(ddepth, CV_MAT_CN(type)));
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::normalize(in, a, b, norm_type, ddepth);
 
     cv::GComputation c(cv::GIn(in), cv::GOut(out));
-    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi), std::move(compile_args));
+    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi), getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -1472,6 +1268,249 @@ TEST_P(NormalizeTest, Test)
     }
 }
 
+TEST_P(BackendOutputAllocationTest, EmptyOutput)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::mul(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+
+    EXPECT_TRUE(out_mat_gapi.empty());
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), getCompileArgs());
+    EXPECT_FALSE(out_mat_gapi.empty());
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::multiply(in_mat1, in_mat2, out_mat_ocv);
+
+    // Comparison //////////////////////////////////////////////////////////////
+    // Expected: output is allocated to the needed size
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+    EXPECT_EQ(sz, out_mat_gapi.size());
+}
+
+TEST_P(BackendOutputAllocationTest, CorrectlyPreallocatedOutput)
+{
+    out_mat_gapi = cv::Mat(sz, type);
+    auto out_mat_gapi_ref = out_mat_gapi;  // shallow copy to ensure previous data is not deleted
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::add(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), getCompileArgs());
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::add(in_mat1, in_mat2, out_mat_ocv);
+
+    // Comparison //////////////////////////////////////////////////////////////
+    // Expected: output is not reallocated
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+    EXPECT_EQ(sz, out_mat_gapi.size());
+
+    EXPECT_EQ(out_mat_gapi_ref.data, out_mat_gapi.data);
+}
+
+TEST_P(BackendOutputAllocationTest, IncorrectOutputMeta)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::add(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+
+    const auto run_and_compare = [&c, this] ()
+    {
+        auto out_mat_gapi_ref = out_mat_gapi; // shallow copy to ensure previous data is not deleted
+
+        // G-API code //////////////////////////////////////////////////////////////
+        c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), getCompileArgs());
+
+        // OpenCV code /////////////////////////////////////////////////////////////
+        cv::add(in_mat1, in_mat2, out_mat_ocv, cv::noArray());
+
+        // Comparison //////////////////////////////////////////////////////////////
+        // Expected: size is changed, type is changed, output is reallocated
+        EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+        EXPECT_EQ(sz, out_mat_gapi.size());
+        EXPECT_EQ(type, out_mat_gapi.type());
+
+        EXPECT_NE(out_mat_gapi_ref.data, out_mat_gapi.data);
+    };
+
+    const auto chan = CV_MAT_CN(type);
+
+    out_mat_gapi = cv::Mat(sz, CV_MAKE_TYPE(CV_64F, chan));
+    run_and_compare();
+
+    out_mat_gapi = cv::Mat(sz, CV_MAKE_TYPE(CV_MAT_DEPTH(type), chan + 1));
+    run_and_compare();
+}
+
+TEST_P(BackendOutputAllocationTest, SmallerPreallocatedSize)
+{
+    out_mat_gapi = cv::Mat(sz / 2, type);
+    auto out_mat_gapi_ref = out_mat_gapi; // shallow copy to ensure previous data is not deleted
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::mul(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), getCompileArgs());
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::multiply(in_mat1, in_mat2, out_mat_ocv);
+
+    // Comparison //////////////////////////////////////////////////////////////
+    // Expected: size is changed, output is reallocated due to original size < curr size
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+    EXPECT_EQ(sz, out_mat_gapi.size());
+
+    EXPECT_NE(out_mat_gapi_ref.data, out_mat_gapi.data);
+}
+
+TEST_P(BackendOutputAllocationTest, SmallerPreallocatedSizeWithSubmatrix)
+{
+    out_mat_gapi = cv::Mat(sz / 2, type);
+
+    cv::Mat out_mat_gapi_submat = out_mat_gapi(cv::Rect({10, 0}, sz / 5));
+    EXPECT_EQ(out_mat_gapi.data, out_mat_gapi_submat.datastart);
+
+    auto out_mat_gapi_submat_ref = out_mat_gapi_submat; // shallow copy to ensure previous data is not deleted
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::mul(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi_submat), getCompileArgs());
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::multiply(in_mat1, in_mat2, out_mat_ocv);
+
+    // Comparison //////////////////////////////////////////////////////////////
+    // Expected: submatrix is reallocated and is "detached", original matrix is unchanged
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi_submat != out_mat_ocv));
+    EXPECT_EQ(sz, out_mat_gapi_submat.size());
+    EXPECT_EQ(sz / 2, out_mat_gapi.size());
+
+    EXPECT_NE(out_mat_gapi_submat_ref.data, out_mat_gapi_submat.data);
+    EXPECT_NE(out_mat_gapi.data, out_mat_gapi_submat.datastart);
+}
+
+TEST_P(BackendOutputAllocationTest, LargerPreallocatedSize)
+{
+    out_mat_gapi = cv::Mat(sz * 2, type);
+    auto out_mat_gapi_ref = out_mat_gapi; // shallow copy to ensure previous data is not deleted
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::mul(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), getCompileArgs());
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::multiply(in_mat1, in_mat2, out_mat_ocv);
+
+    // Comparison //////////////////////////////////////////////////////////////
+    // Expected: size is changed, output is reallocated
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+    EXPECT_EQ(sz, out_mat_gapi.size());
+
+    EXPECT_NE(out_mat_gapi_ref.data, out_mat_gapi.data);
+}
+
+TEST_P(BackendOutputAllocationLargeSizeWithCorrectSubmatrixTest,
+    LargerPreallocatedSizeWithCorrectSubmatrix)
+{
+    out_mat_gapi = cv::Mat(sz * 2, type);
+    auto out_mat_gapi_ref = out_mat_gapi; // shallow copy to ensure previous data is not deleted
+
+    cv::Mat out_mat_gapi_submat = out_mat_gapi(cv::Rect({5, 8}, sz));
+    EXPECT_EQ(out_mat_gapi.data, out_mat_gapi_submat.datastart);
+
+    auto out_mat_gapi_submat_ref = out_mat_gapi_submat;
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::mul(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi_submat), getCompileArgs());
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::multiply(in_mat1, in_mat2, out_mat_ocv);
+
+    // Comparison //////////////////////////////////////////////////////////////
+    // Expected: submatrix is not reallocated, original matrix is not reallocated
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi_submat != out_mat_ocv));
+    EXPECT_EQ(sz, out_mat_gapi_submat.size());
+    EXPECT_EQ(sz * 2, out_mat_gapi.size());
+
+    EXPECT_EQ(out_mat_gapi_ref.data, out_mat_gapi.data);
+    EXPECT_EQ(out_mat_gapi_submat_ref.data, out_mat_gapi_submat.data);
+    EXPECT_EQ(out_mat_gapi.data, out_mat_gapi_submat.datastart);
+}
+
+TEST_P(BackendOutputAllocationTest, LargerPreallocatedSizeWithSmallSubmatrix)
+{
+    out_mat_gapi = cv::Mat(sz * 2, type);
+    auto out_mat_gapi_ref = out_mat_gapi; // shallow copy to ensure previous data is not deleted
+
+    cv::Mat out_mat_gapi_submat = out_mat_gapi(cv::Rect({5, 8}, sz / 2));
+    EXPECT_EQ(out_mat_gapi.data, out_mat_gapi_submat.datastart);
+
+    auto out_mat_gapi_submat_ref = out_mat_gapi_submat;
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::mul(in1, in2);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+    c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi_submat), getCompileArgs());
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::multiply(in_mat1, in_mat2, out_mat_ocv);
+
+    // Comparison //////////////////////////////////////////////////////////////
+    // Expected: submatrix is reallocated and is "detached", original matrix is unchanged
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi_submat != out_mat_ocv));
+    EXPECT_EQ(sz, out_mat_gapi_submat.size());
+    EXPECT_EQ(sz * 2, out_mat_gapi.size());
+
+    EXPECT_EQ(out_mat_gapi_ref.data, out_mat_gapi.data);
+    EXPECT_NE(out_mat_gapi_submat_ref.data, out_mat_gapi_submat.data);
+    EXPECT_NE(out_mat_gapi.data, out_mat_gapi_submat.datastart);
+}
+
+TEST_P(ReInitOutTest, TestWithAdd)
+{
+    in_mat1 = cv::Mat(sz, type);
+    in_mat2 = cv::Mat(sz, type);
+    cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(100));
+    cv::randu(in_mat2, cv::Scalar::all(0), cv::Scalar::all(100));
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in1, in2, out;
+    out = cv::gapi::add(in1, in2, dtype);
+    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
+
+    const auto run_and_compare = [&c, this] ()
+    {
+        // G-API code //////////////////////////////////////////////////////////////
+        c.apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat_gapi), getCompileArgs());
+
+        // OpenCV code /////////////////////////////////////////////////////////////
+        cv::add(in_mat1, in_mat2, out_mat_ocv, cv::noArray());
+
+        // Comparison //////////////////////////////////////////////////////////////
+        EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
+    };
+
+    // run for uninitialized output
+    run_and_compare();
+
+    // run for initialized output (can be initialized with a different size)
+    initOutMats(out_sz, type);
+    run_and_compare();
+}
+
 } // opencv_test
 
 #endif //OPENCV_GAPI_CORE_TESTS_INL_HPP
index 14aac47..bb75f82 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_TESTS_HPP
 
 namespace opencv_test
 {
-
-struct Filter2DTest : public TestParams <std::tuple<compare_f, MatType,int,cv::Size,int,int,bool,cv::GCompileArgs>> {};
-struct BoxFilterTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,int,int,bool,cv::GCompileArgs>> {};
-struct SepFilterTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,int,bool,cv::GCompileArgs>> {};
-struct BlurTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,int,bool,cv::GCompileArgs>> {};
-struct GaussianBlurTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,bool,cv::GCompileArgs>> {};
-struct MedianBlurTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,bool,cv::GCompileArgs>> {};
-struct ErodeTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,int,bool,cv::GCompileArgs>> {};
-struct Erode3x3Test : public TestParams <std::tuple<compare_f,MatType,cv::Size,bool,int,cv::GCompileArgs>> {};
-struct DilateTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,int,bool,cv::GCompileArgs>> {};
-struct Dilate3x3Test : public TestParams <std::tuple<compare_f,MatType,cv::Size,bool,int,cv::GCompileArgs>> {};
-struct SobelTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,int,int,int,bool,cv::GCompileArgs>> {};
-struct SobelXYTest : public TestParams <std::tuple<compare_f,MatType,int,cv::Size,int,int,int,int,cv::GCompileArgs>> {};
-struct EqHistTest : public TestParams <std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct CannyTest : public TestParams <std::tuple<compare_f,MatType,cv::Size,double,double,int,bool,bool,cv::GCompileArgs>> {};
-struct RGB2GrayTest : public  TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct BGR2GrayTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct RGB2YUVTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct YUV2RGBTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct NV12toRGBTest : public TestParams<std::tuple<compare_f,cv::Size,cv::GCompileArgs>> {};
-struct NV12toBGRTest : public TestParams<std::tuple<compare_f,cv::Size,cv::GCompileArgs>> {};
-struct RGB2LabTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct BGR2LUVTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct LUV2BGRTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct BGR2YUVTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
-struct YUV2BGRTest : public TestParams<std::tuple<compare_f,cv::Size,bool,cv::GCompileArgs>> {};
+GAPI_TEST_FIXTURE(Filter2DTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int), 3,
+    cmpF, kernSize, borderType)
+GAPI_TEST_FIXTURE(BoxFilterTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int), 3,
+    cmpF, filterSize, borderType)
+GAPI_TEST_FIXTURE(SepFilterTest, initMatrixRandN, FIXTURE_API(CompareMats,int), 2, cmpF, kernSize)
+GAPI_TEST_FIXTURE(BlurTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int), 3,
+    cmpF, filterSize, borderType)
+GAPI_TEST_FIXTURE(GaussianBlurTest, initMatrixRandN, FIXTURE_API(CompareMats,int), 2, cmpF, kernSize)
+GAPI_TEST_FIXTURE(MedianBlurTest, initMatrixRandN, FIXTURE_API(CompareMats,int), 2, cmpF, kernSize)
+GAPI_TEST_FIXTURE(ErodeTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int), 3,
+    cmpF, kernSize, kernType)
+GAPI_TEST_FIXTURE(Erode3x3Test, initMatrixRandN, FIXTURE_API(CompareMats,int), 2,
+    cmpF, numIters)
+GAPI_TEST_FIXTURE(DilateTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int), 3,
+    cmpF, kernSize, kernType)
+GAPI_TEST_FIXTURE(Dilate3x3Test, initMatrixRandN, FIXTURE_API(CompareMats,int), 2, cmpF, numIters)
+GAPI_TEST_FIXTURE(SobelTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int,int), 4,
+    cmpF, kernSize, dx, dy)
+GAPI_TEST_FIXTURE(SobelXYTest, initMatrixRandN, FIXTURE_API(CompareMats,int,int,int,int), 5,
+    cmpF, kernSize, order, border_type, border_val)
+GAPI_TEST_FIXTURE(EqHistTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(CannyTest, initMatrixRandN, FIXTURE_API(CompareMats,double,double,int,bool), 5,
+    cmpF, thrLow, thrUp, apSize, l2gr)
+GAPI_TEST_FIXTURE(RGB2GrayTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(BGR2GrayTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(RGB2YUVTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(YUV2RGBTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(NV12toRGBTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(NV12toBGRpTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(NV12toRGBpTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(NV12toBGRTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(RGB2LabTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(BGR2LUVTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(LUV2BGRTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(BGR2YUVTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(YUV2BGRTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(RGB2HSVTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(BayerGR2RGBTest, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
+GAPI_TEST_FIXTURE(RGB2YUV422Test, initMatrixRandN, FIXTURE_API(CompareMats), 1, cmpF)
 } // opencv_test
 
 #endif //OPENCV_GAPI_IMGPROC_TESTS_HPP
index 08a317a..396f50c 100644 (file)
@@ -2,32 +2,62 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_TESTS_INL_HPP
 #define OPENCV_GAPI_IMGPROC_TESTS_INL_HPP
 
-#include "opencv2/gapi/imgproc.hpp"
+#include <opencv2/gapi/imgproc.hpp>
 #include "gapi_imgproc_tests.hpp"
 
 namespace opencv_test
 {
-TEST_P(Filter2DTest, AccuracyTest)
+
+// FIXME avoid this code duplicate in perf tests
+namespace
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0, borderType = 0, dtype = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, kernSize, sz, borderType, dtype, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, dtype, initOut);
+    void rgb2yuyv(const uchar* rgb_line, uchar* yuv422_line, int width)
+    {
+        CV_Assert(width % 2 == 0);
+
+        for (int i = 0; i < width; i += 2)
+        {
+            uchar r = rgb_line[i * 3    ];
+            uchar g = rgb_line[i * 3 + 1];
+            uchar b = rgb_line[i * 3 + 2];
+
+            yuv422_line[i * 2    ] = cv::saturate_cast<uchar>(-0.14713 * r - 0.28886 * g + 0.436   * b + 128.f);  // U0
+            yuv422_line[i * 2 + 1] = cv::saturate_cast<uchar>( 0.299   * r + 0.587   * g + 0.114   * b        );  // Y0
+            yuv422_line[i * 2 + 2] = cv::saturate_cast<uchar>( 0.615   * r - 0.51499 * g - 0.10001 * b + 128.f);  // V0
+
+            r = rgb_line[i * 3 + 3];
+            g = rgb_line[i * 3 + 4];
+            b = rgb_line[i * 3 + 5];
+
+            yuv422_line[i * 2 + 3] = cv::saturate_cast<uchar>(0.299 * r + 0.587 * g + 0.114 * b);   // Y1
+        }
+    }
 
+    void convertRGB2YUV422Ref(const cv::Mat& in, cv::Mat &out)
+    {
+        out.create(in.size(), CV_8UC2);
+
+        for (int i = 0; i < in.rows; ++i)
+        {
+            const uchar* in_line_p  = in.ptr<uchar>(i);
+            uchar* out_line_p = out.ptr<uchar>(i);
+            rgb2yuyv(in_line_p, out_line_p, in.cols);
+        }
+    }
+}
+
+TEST_P(Filter2DTest, AccuracyTest)
+{
     cv::Point anchor = {-1, -1};
     double delta = 0;
 
-    cv::Mat kernel = cv::Mat(kernSize, kernSize, CV_32FC1 );
+    cv::Mat kernel = cv::Mat(kernSize, kernSize, CV_32FC1);
     cv::Scalar kernMean = cv::Scalar(1.0);
     cv::Scalar kernStddev = cv::Scalar(2.0/3);
     randn(kernel, kernMean, kernStddev);
@@ -37,7 +67,7 @@ TEST_P(Filter2DTest, AccuracyTest)
     auto out = cv::gapi::filter2D(in, dtype, kernel, anchor, delta, borderType);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::filter2D(in_mat1, out_mat_ocv, dtype, kernel, anchor, delta, borderType);
@@ -51,27 +81,20 @@ TEST_P(Filter2DTest, AccuracyTest)
 
 TEST_P(BoxFilterTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int filterSize = 0, borderType = 0, dtype = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, filterSize, sz, borderType, dtype, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, dtype, initOut);
-
     cv::Point anchor = {-1, -1};
     bool normalize = true;
 
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
-    auto out = cv::gapi::boxFilter(in, dtype, cv::Size(filterSize, filterSize), anchor, normalize, borderType);
+    auto out = cv::gapi::boxFilter(in, dtype, cv::Size(filterSize, filterSize), anchor, normalize,
+        borderType);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
-        cv::boxFilter(in_mat1, out_mat_ocv, dtype, cv::Size(filterSize, filterSize), anchor, normalize, borderType);
+        cv::boxFilter(in_mat1, out_mat_ocv, dtype, cv::Size(filterSize, filterSize), anchor,
+            normalize, borderType);
     }
     // Comparison //////////////////////////////////////////////////////////////
     {
@@ -82,19 +105,10 @@ TEST_P(BoxFilterTest, AccuracyTest)
 
 TEST_P(SepFilterTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0, dtype = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, kernSize, sz, dtype, initOut, compile_args) = GetParam();
-
     cv::Mat kernelX(kernSize, 1, CV_32F);
     cv::Mat kernelY(kernSize, 1, CV_32F);
     randu(kernelX, -1, 1);
     randu(kernelY, -1, 1);
-    initMatsRandN(type, sz, dtype, initOut);
 
     cv::Point anchor = cv::Point(-1, -1);
 
@@ -103,7 +117,7 @@ TEST_P(SepFilterTest, AccuracyTest)
     auto out = cv::gapi::sepFilter(in, dtype, kernelX, kernelY, anchor, cv::Scalar() );
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::sepFilter2D(in_mat1, out_mat_ocv, dtype, kernelX, kernelY );
@@ -117,15 +131,6 @@ TEST_P(SepFilterTest, AccuracyTest)
 
 TEST_P(BlurTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int filterSize = 0, borderType = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, filterSize, sz, borderType, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, type, initOut);
-
     cv::Point anchor = {-1, -1};
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -133,7 +138,7 @@ TEST_P(BlurTest, AccuracyTest)
     auto out = cv::gapi::blur(in, cv::Size(filterSize, filterSize), anchor, borderType);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::blur(in_mat1, out_mat_ocv, cv::Size(filterSize, filterSize), anchor, borderType);
@@ -147,15 +152,6 @@ TEST_P(BlurTest, AccuracyTest)
 
 TEST_P(GaussianBlurTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF,type, kernSize, sz, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, type, initOut);
-
     cv::Size kSize = cv::Size(kernSize, kernSize);
     double sigmaX = rand();
 
@@ -164,7 +160,7 @@ TEST_P(GaussianBlurTest, AccuracyTest)
     auto out = cv::gapi::gaussianBlur(in, kSize, sigmaX);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::GaussianBlur(in_mat1, out_mat_ocv, kSize, sigmaX);
@@ -178,21 +174,12 @@ TEST_P(GaussianBlurTest, AccuracyTest)
 
 TEST_P(MedianBlurTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, kernSize, sz, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, type, initOut);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::medianBlur(in, kernSize);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::medianBlur(in_mat1, out_mat_ocv, kernSize);
@@ -206,15 +193,6 @@ TEST_P(MedianBlurTest, AccuracyTest)
 
 TEST_P(ErodeTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0, kernType = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, kernSize, sz, kernType, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, type, initOut);
-
     cv::Mat kernel = cv::getStructuringElement(kernType, cv::Size(kernSize, kernSize));
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -222,7 +200,7 @@ TEST_P(ErodeTest, AccuracyTest)
     auto out = cv::gapi::erode(in, kernel);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::erode(in_mat1, out_mat_ocv, kernel);
@@ -236,15 +214,6 @@ TEST_P(ErodeTest, AccuracyTest)
 
 TEST_P(Erode3x3Test, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int numIters = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, sz, initOut, numIters, compile_args) = GetParam();
-    initMatsRandN(type, sz, type, initOut);
-
     cv::Mat kernel = cv::getStructuringElement(cv::MorphShapes::MORPH_RECT, cv::Size(3,3));
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -252,7 +221,7 @@ TEST_P(Erode3x3Test, AccuracyTest)
     auto out = cv::gapi::erode3x3(in, numIters);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::erode(in_mat1, out_mat_ocv, kernel, cv::Point(-1, -1), numIters);
@@ -266,15 +235,6 @@ TEST_P(Erode3x3Test, AccuracyTest)
 
 TEST_P(DilateTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0, kernType = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, kernSize, sz, kernType, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, type, initOut);
-
     cv::Mat kernel = cv::getStructuringElement(kernType, cv::Size(kernSize, kernSize));
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -282,7 +242,7 @@ TEST_P(DilateTest, AccuracyTest)
     auto out = cv::gapi::dilate(in, kernel);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::dilate(in_mat1, out_mat_ocv, kernel);
@@ -296,15 +256,6 @@ TEST_P(DilateTest, AccuracyTest)
 
 TEST_P(Dilate3x3Test, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int numIters = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, sz, initOut, numIters, compile_args) = GetParam();
-    initMatsRandN(type, sz, type, initOut);
-
     cv::Mat kernel = cv::getStructuringElement(cv::MorphShapes::MORPH_RECT, cv::Size(3,3));
 
     // G-API code //////////////////////////////////////////////////////////////
@@ -312,7 +263,7 @@ TEST_P(Dilate3x3Test, AccuracyTest)
     auto out = cv::gapi::dilate3x3(in, numIters);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::dilate(in_mat1, out_mat_ocv, kernel, cv::Point(-1,-1), numIters);
@@ -324,24 +275,14 @@ TEST_P(Dilate3x3Test, AccuracyTest)
     }
 }
 
-
 TEST_P(SobelTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0, dtype = 0, dx = 0, dy = 0;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, kernSize, sz, dtype, dx, dy, initOut, compile_args) = GetParam();
-    initMatsRandN(type, sz, dtype, initOut);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::Sobel(in, dtype, dx, dy, kernSize );
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::Sobel(in_mat1, out_mat_ocv, dtype, dx, dy, kernSize);
@@ -355,24 +296,15 @@ TEST_P(SobelTest, AccuracyTest)
 
 TEST_P(SobelXYTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type = 0;
-    int kernSize = 0, dtype = 0, order = 0, border_type = 0, border_val = 0;
-    cv::Size sz;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, kernSize, sz, dtype, order, border_type, border_val, compile_args) = GetParam();
-
     cv::Mat out_mat_ocv2;
     cv::Mat out_mat_gapi2;
 
-    initMatsRandN(type, sz, dtype);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::SobelXY(in, dtype, order, kernSize, 1, 0, border_type, border_val);
 
     cv::GComputation c(cv::GIn(in), cv::GOut(std::get<0>(out), std::get<1>(out)));
-    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi, out_mat_gapi2), std::move(compile_args));
+    c.apply(cv::gin(in_mat1), cv::gout(out_mat_gapi, out_mat_gapi2), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         // workaround for cv::Sobel
@@ -397,19 +329,12 @@ TEST_P(SobelXYTest, AccuracyTest)
 
 TEST_P(EqHistTest, AccuracyTest)
 {
-    compare_f cmpF;
-    cv::Size sz;
-    bool initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, sz, initOut, compile_args) = GetParam();
-    initMatsRandN(CV_8UC1, sz, CV_8UC1, initOut);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::equalizeHist(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::equalizeHist(in_mat1, out_mat_ocv);
@@ -417,29 +342,18 @@ TEST_P(EqHistTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(GetParam()));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(CannyTest, AccuracyTest)
 {
-    compare_f cmpF;
-    MatType type;
-    int apSize = 0;
-    double thrLow = 0.0, thrUp = 0.0;
-    cv::Size sz;
-    bool l2gr = false, initOut = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, type, sz, thrLow, thrUp, apSize, l2gr, initOut, compile_args) = GetParam();
-
-    initMatsRandN(type, sz, CV_8UC1, initOut);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::Canny(in, thrLow, thrUp, apSize, l2gr);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::Canny(in_mat1, out_mat_ocv, thrLow, thrUp, apSize, l2gr);
@@ -453,17 +367,12 @@ TEST_P(CannyTest, AccuracyTest)
 
 TEST_P(RGB2GrayTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC1, std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::RGB2Gray(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_RGB2GRAY);
@@ -471,23 +380,18 @@ TEST_P(RGB2GrayTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(BGR2GrayTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC1, std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::BGR2Gray(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BGR2GRAY);
@@ -495,23 +399,18 @@ TEST_P(BGR2GrayTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(RGB2YUVTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC3, std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::RGB2YUV(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_RGB2YUV);
@@ -519,24 +418,18 @@ TEST_P(RGB2YUVTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(YUV2RGBTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC3, std::get<2>(param));
-
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::YUV2RGB(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_YUV2RGB);
@@ -544,19 +437,12 @@ TEST_P(YUV2RGBTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(NV12toRGBTest, AccuracyTest)
 {
-    compare_f cmpF;
-    cv::Size sz;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, sz, compile_args) = GetParam();
-
-    initMatsRandN(CV_8UC1, sz, CV_8UC3);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in_y;
     cv::GMat in_uv;
@@ -567,7 +453,7 @@ TEST_P(NV12toRGBTest, AccuracyTest)
     cv::randn(in_mat_uv, cv::Scalar::all(127), cv::Scalar::all(40.f));
 
     cv::GComputation c(cv::GIn(in_y, in_uv), cv::GOut(out));
-    c.apply(cv::gin(in_mat1, in_mat_uv), cv::gout(out_mat_gapi), std::move(compile_args));
+    c.apply(cv::gin(in_mat1, in_mat_uv), cv::gout(out_mat_gapi), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColorTwoPlane(in_mat1, in_mat_uv, out_mat_ocv, cv::COLOR_YUV2RGB_NV12);
@@ -581,13 +467,6 @@ TEST_P(NV12toRGBTest, AccuracyTest)
 
 TEST_P(NV12toBGRTest, AccuracyTest)
 {
-    compare_f cmpF;
-    cv::Size sz;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, sz, compile_args) = GetParam();
-
-    initMatsRandN(CV_8UC1, sz, CV_8UC3);
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in_y;
     cv::GMat in_uv;
@@ -598,7 +477,7 @@ TEST_P(NV12toBGRTest, AccuracyTest)
     cv::randn(in_mat_uv, cv::Scalar::all(127), cv::Scalar::all(40.f));
 
     cv::GComputation c(cv::GIn(in_y, in_uv), cv::GOut(out));
-    c.apply(cv::gin(in_mat1, in_mat_uv), cv::gout(out_mat_gapi), std::move(compile_args));
+    c.apply(cv::gin(in_mat1, in_mat_uv), cv::gout(out_mat_gapi), getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColorTwoPlane(in_mat1, in_mat_uv, out_mat_ocv, cv::COLOR_YUV2BGR_NV12);
@@ -610,19 +489,88 @@ TEST_P(NV12toBGRTest, AccuracyTest)
     }
 }
 
-TEST_P(RGB2LabTest, AccuracyTest)
+
+static void toPlanar(const cv::Mat& in, cv::Mat& out)
+{
+    GAPI_Assert(out.depth() == in.depth());
+    GAPI_Assert(out.channels() == 1);
+    GAPI_Assert(in.channels() == 3);
+    GAPI_Assert(out.cols == in.cols);
+    GAPI_Assert(out.rows == 3*in.rows);
+
+    std::vector<cv::Mat> outs(3);
+    for (int i = 0; i < 3; i++) {
+        outs[i] = out(cv::Rect(0, i*in.rows, in.cols, in.rows));
+    }
+    cv::split(in, outs);
+}
+
+TEST_P(NV12toRGBpTest, AccuracyTest)
+{
+    cv::Size sz_p = cv::Size(sz.width, sz.height * 3);
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in_y;
+    cv::GMat in_uv;
+    auto out = cv::gapi::NV12toRGBp(in_y, in_uv);
+
+    // Additional mat for uv
+    cv::Mat in_mat_uv(cv::Size(sz.width / 2, sz.height / 2), CV_8UC2);
+    cv::randn(in_mat_uv, cv::Scalar::all(127), cv::Scalar::all(40.f));
+
+    cv::GComputation c(cv::GIn(in_y, in_uv), cv::GOut(out));
+    cv::Mat out_mat_gapi_planar(cv::Size(sz.width, sz.height * 3), CV_8UC1);
+    c.apply(cv::gin(in_mat1, in_mat_uv), cv::gout(out_mat_gapi_planar), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::Mat out_mat_ocv_planar(cv::Size(sz.width, sz.height * 3), CV_8UC1);
+    {
+        cv::cvtColorTwoPlane(in_mat1, in_mat_uv, out_mat_ocv, cv::COLOR_YUV2RGB_NV12);
+        toPlanar(out_mat_ocv, out_mat_ocv_planar);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi_planar, out_mat_ocv_planar));
+        EXPECT_EQ(out_mat_gapi_planar.size(), sz_p);
+    }
+}
+
+
+TEST_P(NV12toBGRpTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC3, std::get<2>(param));
+    cv::Size sz_p = cv::Size(sz.width, sz.height * 3);
+
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in_y;
+    cv::GMat in_uv;
+    auto out = cv::gapi::NV12toBGRp(in_y, in_uv);
+
+    // Additional mat for uv
+    cv::Mat in_mat_uv(cv::Size(sz.width / 2, sz.height / 2), CV_8UC2);
+    cv::randn(in_mat_uv, cv::Scalar::all(127), cv::Scalar::all(40.f));
+
+    cv::GComputation c(cv::GIn(in_y, in_uv), cv::GOut(out));
+    cv::Mat out_mat_gapi_planar(cv::Size(sz.width, sz.height * 3), CV_8UC1);
+    c.apply(cv::gin(in_mat1, in_mat_uv), cv::gout(out_mat_gapi_planar), getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    cv::Mat out_mat_ocv_planar(cv::Size(sz.width, sz.height * 3), CV_8UC1);
+    {
+        cv::cvtColorTwoPlane(in_mat1, in_mat_uv, out_mat_ocv, cv::COLOR_YUV2BGR_NV12);
+        toPlanar(out_mat_ocv, out_mat_ocv_planar);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi_planar, out_mat_ocv_planar));
+        EXPECT_EQ(out_mat_gapi_planar.size(), sz_p);
+    }
+}
 
+TEST_P(RGB2LabTest, AccuracyTest)
+{
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::RGB2Lab(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_RGB2Lab);
@@ -630,23 +578,18 @@ TEST_P(RGB2LabTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(BGR2LUVTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC3, std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::BGR2LUV(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BGR2Luv);
@@ -654,23 +597,18 @@ TEST_P(BGR2LUVTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(LUV2BGRTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC3, std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::LUV2BGR(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_Luv2BGR);
@@ -678,23 +616,18 @@ TEST_P(LUV2BGRTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(BGR2YUVTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC3, std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::BGR2YUV(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BGR2YUV);
@@ -702,23 +635,18 @@ TEST_P(BGR2YUVTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
 TEST_P(YUV2BGRTest, AccuracyTest)
 {
-    auto param = GetParam();
-    auto compile_args = std::get<3>(param);
-    compare_f cmpF = std::get<0>(param);
-    initMatsRandN(CV_8UC3, std::get<1>(param), CV_8UC3, std::get<2>(param));
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = cv::gapi::YUV2BGR(in);
 
     cv::GComputation c(in, out);
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
     // OpenCV code /////////////////////////////////////////////////////////////
     {
         cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_YUV2BGR);
@@ -726,7 +654,64 @@ TEST_P(YUV2BGRTest, AccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
-        EXPECT_EQ(out_mat_gapi.size(), std::get<1>(param));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
+    }
+}
+
+TEST_P(RGB2HSVTest, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::RGB2HSV(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_RGB2HSV);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
+    }
+}
+
+TEST_P(BayerGR2RGBTest, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::BayerGR2RGB(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        cv::cvtColor(in_mat1, out_mat_ocv, cv::COLOR_BayerGR2RGB);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
+    }
+}
+
+TEST_P(RGB2YUV422Test, AccuracyTest)
+{
+    // G-API code //////////////////////////////////////////////////////////////
+    cv::GMat in;
+    auto out = cv::gapi::RGB2YUV422(in);
+
+    cv::GComputation c(in, out);
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        convertRGB2YUV422Ref(in_mat1, out_mat_ocv);
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 
index 9f53d36..6cf4e5e 100644 (file)
@@ -61,7 +61,6 @@ struct g_api_ocv_pair_mat_mat {
 namespace
 {
 
-
 //declare test cases for matrix and scalar operators
 g_api_ocv_pair_mat_scalar opPlus =  {std::string{"operator+"},
                                     [](cv::GMat in,cv::GScalar c){return in+c;},
@@ -184,9 +183,12 @@ g_api_ocv_pair_mat_mat opXor = {std::string{"operator^"},
                                         [](const cv::Mat& in1, const cv::Mat& in2, cv::Mat& out){cv::bitwise_xor(in1, in2, out);}};
 
 } // anonymous namespace
-struct MathOperatorMatScalarTest : public TestParams<std::tuple<compare_f, g_api_ocv_pair_mat_scalar,int,cv::Size,int,bool,cv::GCompileArgs>>{};
-struct MathOperatorMatMatTest : public TestParams<std::tuple<compare_f, g_api_ocv_pair_mat_mat,int,cv::Size,int,bool,cv::GCompileArgs>>{};
-struct NotOperatorTest : public TestParams<std::tuple<int,cv::Size,bool,cv::GCompileArgs>> {};
+
+GAPI_TEST_FIXTURE(MathOperatorMatScalarTest, initMatsRandU,
+    FIXTURE_API(CompareMats, g_api_ocv_pair_mat_scalar), 2, cmpF, op)
+GAPI_TEST_FIXTURE(MathOperatorMatMatTest, initMatsRandU,
+    FIXTURE_API(CompareMats, g_api_ocv_pair_mat_mat), 2, cmpF, op)
+GAPI_TEST_FIXTURE(NotOperatorTest, initMatrixRandU, <>, 0)
 } // opencv_test
 
 #endif // OPENCV_GAPI_OPERATOR_TESTS_COMMON_HPP
index 7ec702a..44bcc9b 100644 (file)
@@ -14,15 +14,6 @@ namespace opencv_test
 {
 TEST_P(MathOperatorMatScalarTest, OperatorAccuracyTest )
 {
-    compare_f cmpF;
-    g_api_ocv_pair_mat_scalar op;
-    int type = 0, dtype = 0;
-    cv::Size sz;
-    bool initOutMatr = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, op, type, sz, dtype, initOutMatr, compile_args) = GetParam();
-    initMatsRandU(type, sz, dtype, initOutMatr);
-
     auto fun_gapi = op.g_api_function;
     auto fun_ocv = op.ocv_function ;
 
@@ -33,7 +24,7 @@ TEST_P(MathOperatorMatScalarTest, OperatorAccuracyTest )
     auto out = fun_gapi(in1, in2);
     cv::GComputation c(GIn(in1, in2), GOut(out));
 
-    c.apply(gin(in_mat1, sc), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, sc), gout(out_mat_gapi), getCompileArgs());
 
     fun_ocv(in_mat1, sc, out_mat_ocv);
 
@@ -46,15 +37,6 @@ TEST_P(MathOperatorMatScalarTest, OperatorAccuracyTest )
 
 TEST_P(MathOperatorMatMatTest, OperatorAccuracyTest )
 {
-    compare_f cmpF;
-    g_api_ocv_pair_mat_mat op;
-    int type = 0, dtype = 0;
-    cv::Size sz;
-    bool initOutMatr = false;
-    cv::GCompileArgs compile_args;
-    std::tie(cmpF, op, type, sz, dtype, initOutMatr, compile_args) = GetParam();
-    initMatsRandU(type, sz, dtype, initOutMatr);
-
     auto fun_gapi = op.g_api_function;
     auto fun_ocv = op.ocv_function ;
 
@@ -65,7 +47,7 @@ TEST_P(MathOperatorMatMatTest, OperatorAccuracyTest )
     auto out = fun_gapi(in1, in2);
     cv::GComputation c(GIn(in1, in2), GOut(out));
 
-    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), std::move(compile_args));
+    c.apply(gin(in_mat1, in_mat2), gout(out_mat_gapi), getCompileArgs());
 
     fun_ocv(in_mat1, in_mat2, out_mat_ocv);
 
@@ -78,16 +60,12 @@ TEST_P(MathOperatorMatMatTest, OperatorAccuracyTest )
 
 TEST_P(NotOperatorTest, OperatorAccuracyTest)
 {
-    cv::Size sz_in = std::get<1>(GetParam());
-    initMatrixRandU(std::get<0>(GetParam()), sz_in, std::get<0>(GetParam()), std::get<2>(GetParam()));
-    cv::GCompileArgs compile_args;
-
     // G-API code //////////////////////////////////////////////////////////////
     cv::GMat in;
     auto out = ~in;
     cv::GComputation c(in, out);
 
-    c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+    c.apply(in_mat1, out_mat_gapi, getCompileArgs());
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
@@ -96,7 +74,7 @@ TEST_P(NotOperatorTest, OperatorAccuracyTest)
     // Comparison //////////////////////////////////////////////////////////////
     {
         EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
+        EXPECT_EQ(out_mat_gapi.size(), sz);
     }
 }
 } // opencv_test
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests.cpp
new file mode 100644 (file)
index 0000000..8e845e6
--- /dev/null
@@ -0,0 +1,9 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#include "../test_precomp.hpp"
+#include "gapi_render_tests_inl.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests.hpp
new file mode 100644 (file)
index 0000000..84df8c1
--- /dev/null
@@ -0,0 +1,73 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_RENDER_TESTS_HPP
+#define OPENCV_GAPI_RENDER_TESTS_HPP
+
+#include "gapi_tests_common.hpp"
+#include "api/render_priv.hpp"
+
+namespace opencv_test
+{
+
+using Points            = std::vector<cv::Point>;
+using Rects             = std::vector<cv::Rect>;
+using PairOfPoints      = std::pair<cv::Point, cv::Point>;
+using VecOfPairOfPoints = std::vector<PairOfPoints>;
+
+template<class T>
+class RenderWithParam : public TestWithParam<T>
+{
+protected:
+    void Init()
+    {
+        MatType type = CV_8UC3;
+        out_mat_ocv  = cv::Mat(sz, type, cv::Scalar(255));
+        out_mat_gapi = cv::Mat(sz, type, cv::Scalar(255));
+
+        if (isNV12Format) {
+            /* NB: When converting data from BGR to NV12, data loss occurs,
+             * so the reference data is subjected to the same transformation
+             * for correct comparison of the test results */
+            cv::gapi::wip::draw::BGR2NV12(out_mat_ocv, y, uv);
+            cv::cvtColorTwoPlane(y, uv, out_mat_ocv, cv::COLOR_YUV2BGR_NV12);
+        }
+    }
+
+    void Run()
+    {
+        if (isNV12Format) {
+            cv::gapi::wip::draw::BGR2NV12(out_mat_gapi, y, uv);
+            cv::gapi::wip::draw::render(y, uv, prims);
+            cv::cvtColorTwoPlane(y, uv, out_mat_gapi, cv::COLOR_YUV2BGR_NV12);
+
+            // NB: Also due to data loss
+            cv::gapi::wip::draw::BGR2NV12(out_mat_ocv, y, uv);
+            cv::cvtColorTwoPlane(y, uv, out_mat_ocv, cv::COLOR_YUV2BGR_NV12);
+        } else {
+            cv::gapi::wip::draw::render(out_mat_gapi, prims);
+        }
+    }
+
+    cv::Size sz;
+    cv::Scalar color;
+    int thick;
+    int lt;
+    bool isNV12Format;
+    std::vector<cv::gapi::wip::draw::Prim> prims;
+    cv::Mat y, uv;
+    cv::Mat out_mat_ocv, out_mat_gapi;
+};
+
+struct RenderTextTest   : public RenderWithParam <std::tuple<cv::Size,std::string,Points,int,double,cv::Scalar,int,int,bool,bool>> {};
+struct RenderRectTest   : public RenderWithParam <std::tuple<cv::Size,Rects,cv::Scalar,int,int,int,bool>>                          {};
+struct RenderCircleTest : public RenderWithParam <std::tuple<cv::Size,Points,int,cv::Scalar,int,int,int,bool>>                     {};
+struct RenderLineTest   : public RenderWithParam <std::tuple<cv::Size,VecOfPairOfPoints,cv::Scalar,int,int,int,bool>>              {};
+
+} // opencv_test
+
+#endif //OPENCV_GAPI_RENDER_TESTS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests_inl.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_render_tests_inl.hpp
new file mode 100644 (file)
index 0000000..aded1ad
--- /dev/null
@@ -0,0 +1,96 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_RENDER_TESTS_INL_HPP
+#define OPENCV_GAPI_RENDER_TESTS_INL_HPP
+
+#include "gapi_render_tests.hpp"
+
+#include <opencv2/gapi/render.hpp>
+
+namespace opencv_test
+{
+
+TEST_P(RenderTextTest, AccuracyTest)
+{
+    std::vector<cv::Point> points;
+    std::string text;
+    int         ff;
+    double      fs;
+    bool        blo;
+
+    std::tie(sz, text, points, ff, fs, color, thick, lt, blo, isNV12Format) = GetParam();
+    Init();
+
+    for (const auto& p : points) {
+        cv::putText(out_mat_ocv, text, p, ff, fs, color, thick, lt, blo);
+        prims.emplace_back(cv::gapi::wip::draw::Text{text, p, ff, fs, color, thick, lt, blo});
+    }
+
+    Run();
+
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+}
+
+TEST_P(RenderRectTest, AccuracyTest)
+{
+    std::vector<cv::Rect> rects;
+    int shift;
+
+    std::tie(sz, rects, color, thick, lt, shift, isNV12Format) = GetParam();
+    Init();
+
+    for (const auto& r : rects) {
+        cv::rectangle(out_mat_ocv, r, color, thick, lt, shift);
+        prims.emplace_back(cv::gapi::wip::draw::Rect{r, color, thick, lt, shift});
+    }
+
+    Run();
+
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+}
+
+TEST_P(RenderCircleTest, AccuracyTest)
+{
+    std::vector<cv::Point> points;
+    int radius;
+    int shift;
+
+    std::tie(sz, points, radius, color, thick, lt, shift, isNV12Format) = GetParam();
+    Init();
+
+    for (const auto& p : points) {
+        cv::circle(out_mat_ocv, p, radius, color, thick, lt, shift);
+        prims.emplace_back(cv::gapi::wip::draw::Circle{p, radius, color, thick, lt, shift});
+    }
+
+    Run();
+
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+}
+
+TEST_P(RenderLineTest, AccuracyTest)
+{
+    std::vector<std::pair<cv::Point, cv::Point>> points;
+    int shift;
+
+    std::tie(sz, points, color, thick, lt, shift, isNV12Format) = GetParam();
+    Init();
+
+    for (const auto& p : points) {
+        cv::line(out_mat_ocv, p.first, p.second, color, thick, lt, shift);
+        prims.emplace_back(cv::gapi::wip::draw::Line{p.first, p.second, color, thick, lt, shift});
+    }
+
+    Run();
+
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
+}
+
+} // opencv_test
+
+#endif //OPENCV_GAPI_RENDER_TESTS_INL_HPP
index 937587c..3d3141f 100644 (file)
@@ -2,13 +2,20 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
+#ifndef OPENCV_GAPI_TESTS_COMMON_HPP
+#define OPENCV_GAPI_TESTS_COMMON_HPP
 
 #include <iostream>
+#include <tuple>
+#include <type_traits>
 
-#include "opencv2/ts.hpp"
-#include "opencv2/gapi.hpp"
+#include <opencv2/ts.hpp>
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/util/util.hpp>
+
+#include "gapi_tests_helpers.hpp"
 
 namespace
 {
@@ -41,6 +48,15 @@ public:
         return cv::Scalar(s1, s2, s3, s4);
     }
 
+    void initOutMats(cv::Size sz_in, int dtype)
+    {
+        if (dtype != -1)
+        {
+            out_mat_gapi = cv::Mat(sz_in, dtype);
+            out_mat_ocv = cv::Mat(sz_in, dtype);
+        }
+    }
+
     void initMatsRandU(int type, cv::Size sz_in, int dtype, bool createOutputMatrices = true)
     {
         in_mat1 = cv::Mat(sz_in, type);
@@ -50,10 +66,9 @@ public:
         cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
         cv::randu(in_mat2, cv::Scalar::all(0), cv::Scalar::all(255));
 
-        if (createOutputMatrices && dtype != -1)
+        if (createOutputMatrices)
         {
-            out_mat_gapi = cv::Mat (sz_in, dtype);
-            out_mat_ocv = cv::Mat (sz_in, dtype);
+            initOutMats(sz_in, dtype);
         }
     }
 
@@ -62,28 +77,28 @@ public:
         in_mat1 = cv::Mat(sz_in, type);
 
         sc = initScalarRandU(100);
-
         cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
 
-        if (createOutputMatrices && dtype != -1)
+        if (createOutputMatrices)
         {
-            out_mat_gapi = cv::Mat (sz_in, dtype);
-            out_mat_ocv = cv::Mat (sz_in, dtype);
+            initOutMats(sz_in, dtype);
         }
     }
 
-    void initMatsRandN(int type, cv::Size sz_in, int dtype, bool createOutputMatrices = true)
+    void initMatrixRandN(int type, cv::Size sz_in, int dtype, bool createOutputMatrices = true)
     {
-        in_mat1  = cv::Mat(sz_in, type);
+        in_mat1 = cv::Mat(sz_in, type);
         cv::randn(in_mat1, cv::Scalar::all(127), cv::Scalar::all(40.f));
 
-        if (createOutputMatrices  && dtype != -1)
+        if (createOutputMatrices)
         {
-            out_mat_gapi = cv::Mat(sz_in, dtype);
-            out_mat_ocv = cv::Mat(sz_in, dtype);
+            initOutMats(sz_in, dtype);
         }
     }
 
+    // empty function intended to show that nothing is to be initialized via TestFunctional methods
+    void initNothing(int, cv::Size, int, bool = true) {}
+
     static cv::Mat nonZeroPixels(const cv::Mat& mat)
     {
         int channels = mat.channels();
@@ -117,6 +132,134 @@ using compare_f = std::function<bool(const cv::Mat &a, const cv::Mat &b)>;
 
 using compare_scalar_f = std::function<bool(const cv::Scalar &a, const cv::Scalar &b)>;
 
+// FIXME: re-use MatType. current problem: "special values" interpreted incorrectly (-1 is printed
+//        as 16FC512)
+struct MatType2
+{
+public:
+    MatType2(int val = 0) : _value(val) {}
+    operator int() const { return _value; }
+    friend std::ostream& operator<<(std::ostream& os, const MatType2& t)
+    {
+        switch (t)
+        {
+            case -1: return os << "SAME_TYPE";
+            default: PrintTo(MatType(t), &os); return os;
+        }
+    }
+private:
+    int _value;
+};
+
+// Universal parameter wrapper for common (pre-defined) and specific (user-defined) parameters
+template<typename ...SpecificParams>
+struct Params
+{
+    using gcomp_args_function_t = cv::GCompileArgs(*)();
+    using common_params_t = std::tuple<MatType2, cv::Size, MatType2, gcomp_args_function_t>;
+    using specific_params_t = std::tuple<SpecificParams...>;
+    using params_t = std::tuple<MatType2, cv::Size, MatType2, gcomp_args_function_t, SpecificParams...>;
+    static constexpr const size_t common_params_size = std::tuple_size<common_params_t>::value;
+    static constexpr const size_t specific_params_size = std::tuple_size<specific_params_t>::value;
+
+    template<size_t I>
+    static const typename std::tuple_element<I, common_params_t>::type&
+    getCommon(const params_t& t)
+    {
+        static_assert(I < common_params_size, "Index out of range");
+        return std::get<I>(t);
+    }
+
+    template<size_t I>
+    static const typename std::tuple_element<I, specific_params_t>::type&
+    getSpecific(const params_t& t)
+    {
+        static_assert(specific_params_size > 0,
+            "Impossible to call this function: no specific parameters specified");
+        static_assert(I < specific_params_size, "Index out of range");
+        return std::get<common_params_size + I>(t);
+    }
+};
+
+// Base class for test fixtures
+template<typename ...SpecificParams>
+struct TestWithParamBase : TestFunctional,
+    TestWithParam<typename Params<SpecificParams...>::params_t>
+{
+    using AllParams = Params<SpecificParams...>;
+
+    MatType2 type = getCommonParam<0>();
+    cv::Size sz = getCommonParam<1>();
+    MatType2 dtype = getCommonParam<2>();
+
+    // Get common (pre-defined) parameter value by index
+    template<size_t I>
+    inline auto getCommonParam() const
+        -> decltype(AllParams::template getCommon<I>(this->GetParam()))
+    {
+        return AllParams::template getCommon<I>(this->GetParam());
+    }
+
+    // Get specific (user-defined) parameter value by index
+    template<size_t I>
+    inline auto getSpecificParam() const
+        -> decltype(AllParams::template getSpecific<I>(this->GetParam()))
+    {
+        return AllParams::template getSpecific<I>(this->GetParam());
+    }
+
+    // Return G-API compile arguments specified for test fixture
+    inline cv::GCompileArgs getCompileArgs() const
+    {
+        return getCommonParam<3>()();
+    }
+};
+
+/**
+ * @private
+ * @brief Create G-API test fixture with TestWithParamBase base class
+ * @param Fixture   test fixture name
+ * @param InitF     callable that will initialize default available members (from TestFunctional)
+ * @param API       base class API. Specifies types of user-defined parameters. If there are no such
+ *                  parameters, empty angle brackets ("<>") must be specified.
+ * @param Number    number of user-defined parameters (corresponds to the number of types in API).
+ *                  if there are no such parameters, 0 must be specified.
+ * @param ...       list of names of user-defined parameters. if there are no parameters, the list
+ *                  must be empty.
+ */
+#define GAPI_TEST_FIXTURE(Fixture, InitF, API, Number, ...) \
+    struct Fixture : public TestWithParamBase API { \
+        static_assert(Number == AllParams::specific_params_size, \
+            "Number of user-defined parameters doesn't match size of __VA_ARGS__"); \
+        __WRAP_VAARGS(DEFINE_SPECIFIC_PARAMS_##Number(__VA_ARGS__)) \
+        Fixture() { InitF(type, sz, dtype); } \
+    };
+
+// Wrapper for test fixture API. Use to specify multiple types.
+// Example: FIXTURE_API(int, bool) expands to <int, bool>
+#define FIXTURE_API(...) <__VA_ARGS__>
+
+template<typename T1, typename T2>
+struct CompareF
+{
+    using callable_t = std::function<bool(const T1& a, const T2& b)>;
+    CompareF(callable_t&& cmp, std::string&& cmp_name) :
+        _comparator(std::move(cmp)), _name(std::move(cmp_name)) {}
+    bool operator()(const T1& a, const T2& b) const
+    {
+        return _comparator(a, b);
+    }
+    friend std::ostream& operator<<(std::ostream& os, const CompareF<T1, T2>& obj)
+    {
+        return os << obj._name;
+    }
+private:
+    callable_t _comparator;
+    std::string _name;
+};
+
+using CompareMats = CompareF<cv::Mat, cv::Mat>;
+using CompareScalars = CompareF<cv::Scalar, cv::Scalar>;
 
 template<typename T>
 struct Wrappable
@@ -129,6 +272,14 @@ struct Wrappable
             return t(a, b);
         };
     }
+
+    CompareMats to_compare_obj()
+    {
+        T t = *static_cast<T*const>(this);
+        std::stringstream ss;
+        ss << t;
+        return CompareMats(to_compare_f(), ss.str());
+    }
 };
 
 template<typename T>
@@ -142,6 +293,14 @@ struct WrappableScalar
             return t(a, b);
         };
     }
+
+    CompareScalars to_compare_obj()
+    {
+        T t = *static_cast<T*const>(this);
+        std::stringstream ss;
+        ss << t;
+        return CompareScalars(to_compare_f(), ss.str());
+    }
 };
 
 
@@ -161,7 +320,10 @@ public:
             return true;
         }
     }
-private:
+    friend std::ostream& operator<<(std::ostream& os, const AbsExact&)
+    {
+        return os << "AbsExact()";
+    }
 };
 
 class AbsTolerance : public Wrappable<AbsTolerance>
@@ -181,6 +343,10 @@ public:
             return true;
         }
     }
+    friend std::ostream& operator<<(std::ostream& os, const AbsTolerance& obj)
+    {
+        return os << "AbsTolerance(" << std::to_string(obj._tol) << ")";
+    }
 private:
     double _tol;
 };
@@ -209,6 +375,10 @@ public:
             }
         }
     }
+    friend std::ostream& operator<<(std::ostream& os, const Tolerance_FloatRel_IntAbs& obj)
+    {
+        return os << "Tolerance_FloatRel_IntAbs(" << obj._tol << ", " << obj._tol8u << ")";
+    }
 private:
     double _tol;
     double _tol8u;
@@ -238,6 +408,10 @@ public:
             return true;
         }
     }
+    friend std::ostream& operator<<(std::ostream& os, const AbsSimilarPoints& obj)
+    {
+        return os << "AbsSimilarPoints(" << obj._tol << ", " << obj._percent << ")";
+    }
 private:
     double _tol;
     double _percent;
@@ -270,6 +444,11 @@ public:
         }
         return true;
     }
+    friend std::ostream& operator<<(std::ostream& os, const ToleranceFilter& obj)
+    {
+        return os << "ToleranceFilter(" << obj._tol << ", " << obj._tol8u << ", "
+                  << obj._inf_tol << ")";
+    }
 private:
     double _tol;
     double _tol8u;
@@ -298,6 +477,10 @@ public:
         }
         return true;
     }
+    friend std::ostream& operator<<(std::ostream& os, const ToleranceColor& obj)
+    {
+        return os << "ToleranceColor(" << obj._tol << ", " << obj._inf_tol << ")";
+    }
 private:
     double _tol;
     double _inf_tol;
@@ -320,24 +503,66 @@ public:
             return true;
         }
     }
+    friend std::ostream& operator<<(std::ostream& os, const AbsToleranceScalar& obj)
+    {
+        return os << "AbsToleranceScalar(" << std::to_string(obj._tol) << ")";
+    }
 private:
     double _tol;
 };
-
 } // namespace opencv_test
 
 namespace
 {
-    inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_f&)
+inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_f&)
+{
+    return os << "compare_f";
+}
+
+inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_scalar_f&)
+{
+    return os << "compare_scalar_f";
+}
+}  // anonymous namespace
+
+// Note: namespace must match the namespace of the type of the printed object
+namespace cv
+{
+inline std::ostream& operator<<(std::ostream& os, CmpTypes op)
+{
+#define CASE(v) case CmpTypes::v: os << #v; break
+    switch (op)
     {
-        return os << "compare_f";
+        CASE(CMP_EQ);
+        CASE(CMP_GT);
+        CASE(CMP_GE);
+        CASE(CMP_LT);
+        CASE(CMP_LE);
+        CASE(CMP_NE);
+        default: GAPI_Assert(false && "unknown CmpTypes value");
     }
+#undef CASE
+    return os;
 }
 
-namespace
+inline std::ostream& operator<<(std::ostream& os, NormTypes op)
 {
-    inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_scalar_f&)
+#define CASE(v) case NormTypes::v: os << #v; break
+    switch (op)
     {
-        return os << "compare_scalar_f";
+        CASE(NORM_INF);
+        CASE(NORM_L1);
+        CASE(NORM_L2);
+        CASE(NORM_L2SQR);
+        CASE(NORM_HAMMING);
+        CASE(NORM_HAMMING2);
+        CASE(NORM_RELATIVE);
+        CASE(NORM_MINMAX);
+        default: GAPI_Assert(false && "unknown NormTypes value");
     }
+#undef CASE
+    return os;
 }
+}  // namespace cv
+
+#endif //OPENCV_GAPI_TESTS_COMMON_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_helpers.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_helpers.hpp
new file mode 100644 (file)
index 0000000..db1083d
--- /dev/null
@@ -0,0 +1,67 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_TESTS_HELPERS_HPP
+#define OPENCV_GAPI_TESTS_HELPERS_HPP
+
+#include <tuple>
+#include <limits>
+
+namespace opencv_test
+{
+
+// Ensure correct __VA_ARGS__ expansion on Windows
+#define __WRAP_VAARGS(x) x
+
+#define __TUPLE_PARAM_TYPE(i) std::tuple_element<i, AllParams::specific_params_t>::type
+
+// implementation of recursive in-class declaration and initialization of member variables
+#define __DEFINE_PARAMS_IMPL1(index, param_name) \
+    __TUPLE_PARAM_TYPE(index) param_name = getSpecificParam<index>();
+
+#define __DEFINE_PARAMS_IMPL2(index, param_name, ...) \
+    __TUPLE_PARAM_TYPE(index) param_name = getSpecificParam<index>(); \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL1(index+1, __VA_ARGS__))
+
+#define __DEFINE_PARAMS_IMPL3(index, param_name, ...) \
+    __TUPLE_PARAM_TYPE(index) param_name = getSpecificParam<index>(); \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL2(index+1, __VA_ARGS__))
+
+#define __DEFINE_PARAMS_IMPL4(index, param_name, ...) \
+    __TUPLE_PARAM_TYPE(index) param_name = getSpecificParam<index>(); \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL3(index+1, __VA_ARGS__))
+
+#define __DEFINE_PARAMS_IMPL5(index, param_name, ...) \
+    __TUPLE_PARAM_TYPE(index) param_name = getSpecificParam<index>(); \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL4(index+1, __VA_ARGS__))
+
+#define __DEFINE_PARAMS_IMPL6(index, param_name, ...) \
+    __TUPLE_PARAM_TYPE(index) param_name = getSpecificParam<index>(); \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL5(index+1, __VA_ARGS__))
+
+// user interface to define member variables of specified names
+#define DEFINE_SPECIFIC_PARAMS_0()
+
+#define DEFINE_SPECIFIC_PARAMS_1(...) \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL1(0, __VA_ARGS__))
+
+#define DEFINE_SPECIFIC_PARAMS_2(...) \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL2(0, __VA_ARGS__))
+
+#define DEFINE_SPECIFIC_PARAMS_3(...) \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL3(0, __VA_ARGS__))
+
+#define DEFINE_SPECIFIC_PARAMS_4(...) \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL4(0, __VA_ARGS__))
+
+#define DEFINE_SPECIFIC_PARAMS_5(...) \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL5(0, __VA_ARGS__))
+
+#define DEFINE_SPECIFIC_PARAMS_6(...) \
+    __WRAP_VAARGS(__DEFINE_PARAMS_IMPL6(0, __VA_ARGS__))
+} // namespace opencv_test
+
+#endif //OPENCV_GAPI_TESTS_HELPERS_HPP
index 5414263..665d525 100644 (file)
@@ -2,75 +2,69 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 #include "../common/gapi_core_tests.hpp"
-#include "opencv2/gapi/cpu/core.hpp"
+#include <opencv2/gapi/cpu/core.hpp>
 
-#define CORE_CPU cv::gapi::core::cpu::kernels()
+namespace
+{
+#define CORE_CPU [] () { return cv::compile_args(cv::gapi::core::cpu::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
-
 // FIXME: Wut? See MulTestCPU/MathOpTest below (duplicate?)
 INSTANTIATE_TEST_CASE_P(AddTestCPU, MathOpTest,
-                        Combine(Values(ADD, MUL),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(1.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(false),
-                                Values(cv::compile_args(CORE_CPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values(CORE_CPU),
+                                Values(ADD, MUL),
+                                testing::Bool(),
+                                Values(1.0),
+                                Values(false)));
 
 INSTANTIATE_TEST_CASE_P(MulTestCPU, MathOpTest,
-                        Combine(Values(MUL),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(1.0, 0.5, 2.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(false),
-                                Values(cv::compile_args(CORE_CPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values(CORE_CPU),
+                                Values(MUL),
+                                testing::Bool(),
+                                Values(1.0, 0.5, 2.0),
+                                Values(false)));
 
 INSTANTIATE_TEST_CASE_P(SubTestCPU, MathOpTest,
-                        Combine(Values(SUB),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values (1.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
+                                Values(CORE_CPU),
+                                Values(SUB),
                                 testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values (1.0),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(DivTestCPU, MathOpTest,
-                        Combine(Values(DIV),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values (1.0, 0.5, 2.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
+                                Values(CORE_CPU),
+                                Values(DIV),
                                 testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values (1.0, 0.5, 2.0),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(MulTestCPU, MulDoubleTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -78,8 +72,7 @@ INSTANTIATE_TEST_CASE_P(MulTestCPU, MulDoubleTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(DivTestCPU, DivTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -87,8 +80,7 @@ INSTANTIATE_TEST_CASE_P(DivTestCPU, DivTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(DivCTestCPU, DivCTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -96,132 +88,133 @@ INSTANTIATE_TEST_CASE_P(DivCTestCPU, DivCTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(MeanTestCPU, MeanTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(MaskTestCPU, MaskTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(SelectTestCPU, SelectTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(Polar2CartCPU, Polar2CartTest,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_32FC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_32FC1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(Cart2PolarCPU, Cart2PolarTest,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_32FC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_32FC1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(PhaseCPU, PhaseTest,
                         Combine(Values(CV_32F, CV_32FC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU),
+         /* angle_in_degrees */ testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(SqrtCPU, SqrtTest,
                         Combine(Values(CV_32F, CV_32FC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(CompareTestCPU, CmpTest,
-                        Combine(Values(CMP_EQ, CMP_GE, CMP_NE, CMP_GT, CMP_LT, CMP_LE),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))),
-                        opencv_test::PrintCmpCoreParams());
+                                Values(CV_8U),
+                                Values(CORE_CPU),
+                                Values(CMP_EQ, CMP_GE, CMP_NE, CMP_GT, CMP_LT, CMP_LE),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(BitwiseTestCPU, BitwiseTest,
-                        Combine(Values(AND, OR, XOR),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))),
-                        opencv_test::PrintBWCoreParams());
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(AND, OR, XOR)));
 
 INSTANTIATE_TEST_CASE_P(BitwiseNotTestCPU, NotTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
                               Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(MinTestCPU, MinTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(MaxTestCPU, MaxTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(SumTestCPU, SumTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
+                                Values(-1),
                                 //Values(1e-5),
-                                Values(AbsToleranceScalar(1e-5).to_compare_f()),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU),
+                                Values(AbsToleranceScalar(1e-5).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(AbsDiffTestCPU, AbsDiffTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(AbsDiffCTestCPU, AbsDiffCTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(AddWeightedTestCPU, AddWeightedTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -229,45 +222,45 @@ INSTANTIATE_TEST_CASE_P(AddWeightedTestCPU, AddWeightedTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(NormTestCPU, NormTest,
-                        Combine(Values(NORM_INF, NORM_L1, NORM_L2),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                //Values(1e-5),
-                                Values(AbsToleranceScalar(1e-5).to_compare_f()),
-                                Values(cv::compile_args(CORE_CPU))),
-                        opencv_test::PrintNormCoreParams());
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(AbsToleranceScalar(1e-5).to_compare_obj()),
+                                Values(NORM_INF, NORM_L1, NORM_L2)));
 
 INSTANTIATE_TEST_CASE_P(IntegralTestCPU, IntegralTest,
                         Combine(Values( CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(ThresholdTestCPU, ThresholdTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC,
+                                    cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)));
 
 INSTANTIATE_TEST_CASE_P(ThresholdTestCPU, ThresholdOTTest,
                         Combine(Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::THRESH_OTSU, cv::THRESH_TRIANGLE),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(cv::THRESH_OTSU, cv::THRESH_TRIANGLE)));
 
 
 INSTANTIATE_TEST_CASE_P(InRangeTestCPU, InRangeTest,
@@ -275,144 +268,192 @@ INSTANTIATE_TEST_CASE_P(InRangeTestCPU, InRangeTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(Split3TestCPU, Split3Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_8UC1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(Split4TestCPU, Split4Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC4),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_8UC1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(ResizeTestCPU, ResizeTest,
-                        Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(AbsSimilarPoints(2, 0.05).to_compare_obj()),
                                 Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+                                Values(cv::Size(64,64),
+                                       cv::Size(30,30))));
+
+INSTANTIATE_TEST_CASE_P(ResizePTestCPU, ResizePTest,
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(AbsSimilarPoints(2, 0.05).to_compare_obj()),
+                                Values(cv::INTER_LINEAR),
                                 Values(cv::Size(64,64),
-                                       cv::Size(30,30)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                       cv::Size(30,30))));
 
 INSTANTIATE_TEST_CASE_P(ResizeTestCPU, ResizeTestFxFy,
-                        Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(AbsSimilarPoints(2, 0.05).to_compare_obj()),
+                                Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
                                 Values(0.5, 0.1),
-                                Values(0.5, 0.1),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(0.5, 0.1)));
 
 INSTANTIATE_TEST_CASE_P(Merge3TestCPU, Merge3Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_8UC3),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(Merge4TestCPU, Merge4Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_8UC4),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(RemapTestCPU, RemapTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(FlipTestCPU, FlipTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(0,1,-1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(0,1,-1)));
 
 INSTANTIATE_TEST_CASE_P(CropTestCPU, CropTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(cv::Rect(10, 8, 20, 35), cv::Rect(4, 10, 37, 50)),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(cv::Rect(10, 8, 20, 35), cv::Rect(4, 10, 37, 50))));
 
 INSTANTIATE_TEST_CASE_P(LUTTestCPU, LUTTest,
                         Combine(Values(CV_8UC1, CV_8UC3),
-                                Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_8UC1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(LUTTestCustomCPU, LUTTest,
                         Combine(Values(CV_8UC3),
-                                Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_8UC3),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(ConvertToCPU, ConvertToTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(CV_8U, CV_16U, CV_16S, CV_32F),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CV_8U, CV_16U, CV_16S, CV_32F),
+                                Values(CORE_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(2.5, 1.0, -1.0),
+                                Values(250.0, 0.0, -128.0)));
 
 INSTANTIATE_TEST_CASE_P(ConcatHorTestCPU, ConcatHorTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(ConcatVertTestCPU, ConcatVertTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(ConcatVertVecTestCPU, ConcatVertVecTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(ConcatHorVecTestCPU, ConcatHorVecTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 
 INSTANTIATE_TEST_CASE_P(NormalizeTestCPU, NormalizeTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(AbsExact().to_compare_obj()),
                                 Values(0.0, 15.0),
                                 Values(1.0, 120.0, 255.0),
                                 Values(NORM_MINMAX, NORM_INF, NORM_L1, NORM_L2),
-                                Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1, CV_8U, CV_16U, CV_16S, CV_32F)));
+
+INSTANTIATE_TEST_CASE_P(BackendOutputAllocationTestCPU, BackendOutputAllocationTest,
+                        Combine(Values(CV_8UC3, CV_16SC2, CV_32FC1),
+                                Values(cv::Size(50, 50)),
+                                Values(-1),
+                                Values(CORE_CPU)));
+
+INSTANTIATE_TEST_CASE_P(BackendOutputAllocationLargeSizeWithCorrectSubmatrixTestCPU,
+                        BackendOutputAllocationLargeSizeWithCorrectSubmatrixTest,
+                        Combine(Values(CV_8UC3, CV_16SC2, CV_32FC1),
+                                Values(cv::Size(50, 50)),
+                                Values(-1),
+                                Values(CORE_CPU)));
+
+INSTANTIATE_TEST_CASE_P(ReInitOutTestCPU, ReInitOutTest,
+                        Combine(Values(CV_8UC3, CV_16SC4, CV_32FC1),
+                                Values(cv::Size(640, 480)),
+                                Values(-1),
+                                Values(CORE_CPU),
+                                Values(cv::Size(640, 400),
+                                       cv::Size(10, 480))));
 }
index ccf8646..be158d0 100644 (file)
@@ -2,33 +2,33 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 #include "../common/gapi_core_tests.hpp"
 
-namespace opencv_test
+namespace
 {
+#define CORE_FLUID [] () { return cv::compile_args(cv::gapi::core::fluid::kernels()); }
+}  // anonymous namespace
 
-#define CORE_FLUID cv::gapi::core::fluid::kernels()
-
+namespace opencv_test
+{
 
 // FIXME: Windows accuracy problems after recent update!
 INSTANTIATE_TEST_CASE_P(MathOpTestFluid, MathOpTest,
-                        Combine(Values(ADD, SUB, DIV, MUL),
-                                testing::Bool(),
-                                Values(CV_8UC3, CV_8UC1, CV_16SC1, CV_32FC1),
-                                Values(1.0),
+                        Combine(Values(CV_8UC3, CV_8UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1920, 1080),
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
+                                Values(CORE_FLUID),
+                                Values(ADD, SUB, DIV, MUL),
                                 testing::Bool(),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values(1.0),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(MulSTestFluid, MulDoubleTest,
                         Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
@@ -36,8 +36,7 @@ INSTANTIATE_TEST_CASE_P(MulSTestFluid, MulDoubleTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1), // FIXME: extend with more types
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(DivCTestFluid, DivCTest,
                         Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
@@ -45,35 +44,33 @@ INSTANTIATE_TEST_CASE_P(DivCTestFluid, DivCTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(CV_8U, CV_32F),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(AbsDiffTestFluid, AbsDiffTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(AbsDiffCTestFluid, AbsDiffCTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(BitwiseTestFluid, BitwiseTest,
-                        Combine(Values(AND, OR, XOR),
-                                Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1),
+                        Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1920, 1080),
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))),
-                        opencv_test::PrintBWCoreParams());
+                                Values(-1),
+                                Values(CORE_FLUID),
+                                Values(AND, OR, XOR)));
 
 INSTANTIATE_TEST_CASE_P(BitwiseNotTestFluid, NotTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1),
@@ -81,8 +78,8 @@ INSTANTIATE_TEST_CASE_P(BitwiseNotTestFluid, NotTest,
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(MinTestFluid, MinTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1),
@@ -90,8 +87,8 @@ INSTANTIATE_TEST_CASE_P(MinTestFluid, MinTest,
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(MaxTestFluid, MaxTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1),
@@ -99,20 +96,19 @@ INSTANTIATE_TEST_CASE_P(MaxTestFluid, MaxTest,
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(CompareTestFluid, CmpTest,
-                        Combine(Values(CMP_EQ, CMP_GE, CMP_NE, CMP_GT, CMP_LT, CMP_LE),
-                                testing::Bool(),
-                                Values(CV_8UC3, CV_8UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC3, CV_8UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1920, 1080),
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))),
-                        opencv_test::PrintCmpCoreParams());
+                                Values(CV_8U),
+                                Values(CORE_FLUID),
+                                Values(CMP_EQ, CMP_GE, CMP_NE, CMP_GT, CMP_LT, CMP_LE),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(AddWeightedTestFluid, AddWeightedTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
@@ -120,55 +116,63 @@ INSTANTIATE_TEST_CASE_P(AddWeightedTestFluid, AddWeightedTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
-                                testing::Bool(),
-                                Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_f()),
-                                //Values(0.5000005),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CORE_FLUID),
+                                Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(LUTTestFluid, LUTTest,
                         Combine(Values(CV_8UC1, CV_8UC3),
-                                Values(CV_8UC1),
                                 Values(cv::Size(1920, 1080),
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_8UC1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(ConvertToFluid, ConvertToTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_32FC1),
-                                Values(CV_8U, CV_16U, CV_32F),
                                 Values(cv::Size(1920, 1080),
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_8U, CV_16U, CV_32F),
+                                Values(CORE_FLUID),
+                                Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_obj()),
+                                Values(2.5, 1.0, -1.0),
+                                Values(250.0, 0.0, -128.0)));
 
 INSTANTIATE_TEST_CASE_P(Split3TestFluid, Split3Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_8UC1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(Split4TestFluid, Split4Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC4),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_8UC1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(Merge3TestFluid, Merge3Test,
-                        Combine(Values(cv::Size(1920, 1080),
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1920, 1080),
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_8UC3),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(Merge4TestFluid, Merge4Test,
-                        Combine(Values(cv::Size(1920, 1080),
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1920, 1080),
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_8UC4),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(SelectTestFluid, SelectTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1),
@@ -176,37 +180,41 @@ INSTANTIATE_TEST_CASE_P(SelectTestFluid, SelectTest,
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(Polar2CartFluid, Polar2CartTest,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_32FC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_32FC1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(Cart2PolarFluid, Cart2PolarTest,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_32FC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CV_32FC1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(PhaseFluid, PhaseTest,
                         Combine(Values(CV_32F, CV_32FC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID),
+         /* angle_in_degrees */ testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(SqrtFluid, SqrtTest,
                         Combine(Values(CV_32F, CV_32FC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
 INSTANTIATE_TEST_CASE_P(ThresholdTestFluid, ThresholdTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1),
@@ -214,11 +222,11 @@ INSTANTIATE_TEST_CASE_P(ThresholdTestFluid, ThresholdTest,
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
+                                Values(-1),
+                                Values(CORE_FLUID),
                                 Values(cv::THRESH_BINARY, cv::THRESH_BINARY_INV,
                                        cv::THRESH_TRUNC,
-                                       cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                       cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)));
 
 INSTANTIATE_TEST_CASE_P(InRangeTestFluid, InRangeTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1),
@@ -226,25 +234,46 @@ INSTANTIATE_TEST_CASE_P(InRangeTestFluid, InRangeTest,
                                        cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(-1),
+                                Values(CORE_FLUID)));
 
-INSTANTIATE_TEST_CASE_P(
-                        ResizeTestFluid, ResizeTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC3/*CV_8UC1, CV_16UC1, CV_16SC1*/),
-                                Values(/*cv::INTER_NEAREST,*/ cv::INTER_LINEAR/*, cv::INTER_AREA*/),
+INSTANTIATE_TEST_CASE_P(ResizeTestFluid, ResizeTest,
+                        Combine(Values(CV_8UC3/*CV_8UC1, CV_16UC1, CV_16SC1*/),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128),
                                        cv::Size(64, 64),
                                        cv::Size(30, 30)),
+                                Values(-1),
+                                Values(CORE_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(/*cv::INTER_NEAREST,*/ cv::INTER_LINEAR/*, cv::INTER_AREA*/),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128),
                                        cv::Size(64, 64),
-                                       cv::Size(30, 30)),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                       cv::Size(30, 30))));
+
+INSTANTIATE_TEST_CASE_P(BackendOutputAllocationTestFluid, BackendOutputAllocationTest,
+                        Combine(Values(CV_8UC3, CV_16SC2, CV_32FC1),
+                                Values(cv::Size(50, 50)),
+                                Values(-1),
+                                Values(CORE_FLUID)));
+
+INSTANTIATE_TEST_CASE_P(BackendOutputAllocationLargeSizeWithCorrectSubmatrixTestFluid,
+                        BackendOutputAllocationLargeSizeWithCorrectSubmatrixTest,
+                        Combine(Values(CV_8UC3, CV_16SC2, CV_32FC1),
+                                Values(cv::Size(50, 50)),
+                                Values(-1),
+                                Values(CORE_FLUID)));
+
+INSTANTIATE_TEST_CASE_P(ReInitOutTestFluid, ReInitOutTest,
+                        Combine(Values(CV_8UC3, CV_16SC4, CV_32FC1),
+                                Values(cv::Size(640, 480)),
+                                Values(-1),
+                                Values(CORE_FLUID),
+                                Values(cv::Size(640, 400),
+                                       cv::Size(10, 480))));
 
 //----------------------------------------------------------------------
 // FIXME: Clean-up test configurations which are enabled already
@@ -258,8 +287,7 @@ INSTANTIATE_TEST_CASE_P(MathOpTestCPU, MathOpTest,
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
 /*init output matrices or not*/ testing::Bool(),
-                                Values(false)),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values(false)));
 
 INSTANTIATE_TEST_CASE_P(SubTestCPU, MathOpTest,
                         Combine(Values(SUB),
@@ -270,8 +298,7 @@ INSTANTIATE_TEST_CASE_P(SubTestCPU, MathOpTest,
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
 /*init output matrices or not*/ testing::Bool(),
-                                testing::Bool()),
-                        opencv_test::PrintMathOpCoreParams());
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(MulSTestCPU, MulSTest,
                         Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
@@ -321,8 +348,7 @@ INSTANTIATE_TEST_CASE_P(CompareTestCPU, CmpTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool()),
-                        opencv_test::PrintCmpCoreParams());
+/*init output matrices or not*/ testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(BitwiseTestCPU, BitwiseTest,
                         Combine(Values(AND, OR, XOR),
@@ -330,8 +356,7 @@ INSTANTIATE_TEST_CASE_P(BitwiseTestCPU, BitwiseTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool()),
-                        opencv_test::PrintBWCoreParams());
+/*init output matrices or not*/ testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(BitwiseNotTestCPU, NotTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
@@ -391,8 +416,7 @@ INSTANTIATE_TEST_CASE_P(NormTestCPU, NormTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128))),
-                                Values(0.0),
-                        opencv_test::PrintNormCoreParams());
+                                Values(0.0));
 
 INSTANTIATE_TEST_CASE_P(IntegralTestCPU, IntegralTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
@@ -473,7 +497,7 @@ INSTANTIATE_TEST_CASE_P(LUTTestCPU, LUTTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool()));
+/*init output matrices or not*/ Values(true)));
 
 INSTANTIATE_TEST_CASE_P(LUTTestCustomCPU, LUTTest,
                         Combine(Values(CV_8UC3),
@@ -481,7 +505,7 @@ INSTANTIATE_TEST_CASE_P(LUTTestCustomCPU, LUTTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool()));
+/*init output matrices or not*/ Values(true)));
 
 INSTANTIATE_TEST_CASE_P(ConvertToCPU, ConvertToTest,
                         Combine(Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_32FC1),
index 58b8bab..77622b8 100644 (file)
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 
 #include "../common/gapi_imgproc_tests.hpp"
-#include "opencv2/gapi/cpu/imgproc.hpp"
+#include <opencv2/gapi/cpu/imgproc.hpp>
 
-#define IMGPROC_CPU cv::gapi::imgproc::cpu::kernels()
+namespace
+{
+#define IMGPROC_CPU [] () { return cv::compile_args(cv::gapi::imgproc::cpu::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
-
 INSTANTIATE_TEST_CASE_P(Filter2DTestCPU, Filter2DTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 4, 5, 7),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
-                                       cv::Size(640, 480),
-                                       cv::Size(128, 128)),
-                                Values(cv::BORDER_DEFAULT),
+                                        cv::Size(640, 480),
+                                        cv::Size(128, 128)),
                                 Values(-1, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 4, 5, 7),
+                                Values(cv::BORDER_DEFAULT)));
 
 INSTANTIATE_TEST_CASE_P(BoxFilterTestCPU, BoxFilterTest,
-                        Combine(Values(AbsTolerance(0).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3,5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::BORDER_DEFAULT),
                                 Values(-1, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(IMGPROC_CPU),
+                                Values(AbsTolerance(0).to_compare_obj()),
+                                Values(3,5),
+                                Values(cv::BORDER_DEFAULT)));
 
 INSTANTIATE_TEST_CASE_P(SepFilterTestCPU_8U, SepFilterTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3),
-                                Values(3),
+                        Combine(Values(CV_8UC1, CV_8UC3),
                                 Values(cv::Size(1280, 720),
-                                       cv::Size(640, 480)),
+                                        cv::Size(640, 480)),
                                 Values(-1, CV_16S, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3)));
 
 INSTANTIATE_TEST_CASE_P(SepFilterTestCPU_other, SepFilterTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3),
+                        Combine(Values(CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3)));
 
 INSTANTIATE_TEST_CASE_P(BlurTestCPU, BlurTest,
-                        Combine(Values(AbsTolerance(0.0).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3,5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::BORDER_DEFAULT),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsTolerance(0.0).to_compare_obj()),
+                                Values(3,5),
+                                Values(cv::BORDER_DEFAULT)));
 
 INSTANTIATE_TEST_CASE_P(gaussBlurTestCPU, GaussianBlurTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5)));
 
 INSTANTIATE_TEST_CASE_P(MedianBlurTestCPU, MedianBlurTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5)));
 
 INSTANTIATE_TEST_CASE_P(ErodeTestCPU, ErodeTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(cv::MorphShapes::MORPH_RECT,
                                        cv::MorphShapes::MORPH_CROSS,
-                                       cv::MorphShapes::MORPH_ELLIPSE),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                       cv::MorphShapes::MORPH_ELLIPSE)));
 
 INSTANTIATE_TEST_CASE_P(Erode3x3TestCPU, Erode3x3Test,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(1,2,4),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(1,2,4)));
 
 INSTANTIATE_TEST_CASE_P(DilateTestCPU, DilateTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(cv::MorphShapes::MORPH_RECT,
                                        cv::MorphShapes::MORPH_CROSS,
-                                       cv::MorphShapes::MORPH_ELLIPSE),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                       cv::MorphShapes::MORPH_ELLIPSE)));
 
 INSTANTIATE_TEST_CASE_P(Dilate3x3TestCPU, Dilate3x3Test,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(1,2,4),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(-1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(1,2,4)));
 
 INSTANTIATE_TEST_CASE_P(SobelTestCPU, SobelTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_16S, CV_32F),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(0, 1),
-                                Values(1, 2),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(1, 2)));
 
 INSTANTIATE_TEST_CASE_P(SobelTestCPU32F, SobelTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(CV_32F),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(0, 1),
-                                Values(1, 2),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(1, 2)));
 
 INSTANTIATE_TEST_CASE_P(SobelXYTestCPU, SobelXYTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_16S, CV_32F),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(1, 2),
                                 Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT),
-                                Values(0, 1, 255),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(0, 1, 255)));
 
 INSTANTIATE_TEST_CASE_P(SobelXYTestCPU32F, SobelXYTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(CV_32F),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(1, 2),
                                 Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT),
-                                Values(0, 1, 255),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(0, 1, 255)));
 
 INSTANTIATE_TEST_CASE_P(EqHistTestCPU, EqHistTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
-                                cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(CannyTestCPU, CannyTest,
-                        Combine(Values(AbsSimilarPoints(0, 0.05).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3),
+                        Combine(Values(CV_8UC1, CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsSimilarPoints(0, 0.05).to_compare_obj()),
                                 Values(3.0, 120.0),
                                 Values(125.0, 240.0),
                                 Values(3, 5),
-                                testing::Bool(),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(RGB2GrayTestCPU, RGB2GrayTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
-                                cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(BGR2GrayTestCPU, BGR2GrayTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC1),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(RGB2YUVTestCPU, RGB2YUVTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(YUV2RGBTestCPU, YUV2RGBTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                            /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(NV12toRGBTestCPU, NV12toRGBTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(NV12toBGRTestCPU, NV12toBGRTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(NV12toRGBpTestCPU, NV12toRGBpTest,
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(NV12toBGRpTestCPU, NV12toBGRpTest,
+                        Combine(Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(RGB2LabTestCPU, RGB2LabTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(BGR2LUVTestCPU, BGR2LUVTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(LUV2BGRTestCPU, LUV2BGRTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(BGR2YUVTestCPU, BGR2YUVTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(YUV2BGRTestCPU, YUV2BGRTest,
-                        Combine(Values(AbsExact().to_compare_f()),
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(RGB2HSVTestCPU, RGB2HSVTest,
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_CPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
 
+INSTANTIATE_TEST_CASE_P(BayerGR2RGBTestCPU, BayerGR2RGBTest,
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_CPU),
+                                Values(AbsExact().to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(RGB2YUV422TestCPU, RGB2YUV422Test,
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC2),
+                                Values(IMGPROC_CPU),
+                                Values(AbsTolerance(1).to_compare_obj())));
 } // opencv_test
index f053565..99c36c4 100644 (file)
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 #include "../common/gapi_imgproc_tests.hpp"
 
-#define IMGPROC_FLUID cv::gapi::imgproc::fluid::kernels()
+namespace
+{
+#define IMGPROC_FLUID [] () { return cv::compile_args(cv::gapi::imgproc::fluid::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
 INSTANTIATE_TEST_CASE_P(RGB2GrayTestFluid, RGB2GrayTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
-                                cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(BGR2GrayTestFluid, BGR2GrayTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(CV_8UC1),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(RGB2YUVTestFluid, RGB2YUVTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(YUV2RGBTestFluid, YUV2RGBTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(RGB2LabTestFluid, RGB2LabTest,
-                        Combine(Values(AbsSimilarPoints(1, 0.05).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_FLUID),
+                                Values(AbsSimilarPoints(1, 0.05).to_compare_obj())));
 
 // FIXME: Not supported by Fluid yet (no kernel implemented)
 INSTANTIATE_TEST_CASE_P(BGR2LUVTestFluid, BGR2LUVTest,
-                        Combine(Values(ToleranceColor(5e-3, 6).to_compare_f()),
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceColor(5e-3, 6).to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(RGB2HSVTestFluid, RGB2HSVTest,
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(BayerGR2RGBTestFluid, BayerGR2RGBTest,
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC3),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
+
+INSTANTIATE_TEST_CASE_P(RGB2YUV422TestFluid, RGB2YUV422Test,
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC2),
+                                Values(IMGPROC_FLUID),
+                                Values(AbsTolerance(1).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(blurTestFluid, BlurTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::BORDER_DEFAULT),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(-1),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3), // add kernel size=5 when implementation is ready
+                                Values(cv::BORDER_DEFAULT)));
 
 INSTANTIATE_TEST_CASE_P(gaussBlurTestFluid, GaussianBlurTest,
-                        Combine(Values(ToleranceFilter(1e-3f, 0.01).to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(-1),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceFilter(1e-3f, 0.01).to_compare_obj()),
+                                Values(3))); // add kernel size=5 when implementation is ready
 
 INSTANTIATE_TEST_CASE_P(medianBlurTestFluid, MedianBlurTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(-1),
+                                Values(IMGPROC_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3))); // add kernel size=5 when implementation is ready
 
 INSTANTIATE_TEST_CASE_P(erodeTestFluid, ErodeTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3), // add kernel size=5 when implementation is ready
                                 Values(cv::MorphShapes::MORPH_RECT,
                                        cv::MorphShapes::MORPH_CROSS,
-                                       cv::MorphShapes::MORPH_ELLIPSE),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                       cv::MorphShapes::MORPH_ELLIPSE)));
 
 INSTANTIATE_TEST_CASE_P(dilateTestFluid, DilateTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3), // add kernel size=5 when implementation is ready
                                 Values(cv::MorphShapes::MORPH_RECT,
                                        cv::MorphShapes::MORPH_CROSS,
-                                       cv::MorphShapes::MORPH_ELLIPSE),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                       cv::MorphShapes::MORPH_ELLIPSE)));
 
 INSTANTIATE_TEST_CASE_P(SobelTestFluid, SobelTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_16S, CV_32F),
+                                Values(IMGPROC_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3), // add kernel size=5 when implementation is ready
                                 Values(0, 1),
-                                Values(1, 2),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(1, 2)));
 
 INSTANTIATE_TEST_CASE_P(SobelTestFluid32F, SobelTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_32FC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(CV_32F),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3), // add kernel size=5 when implementation is ready
                                 Values(0, 1),
-                                Values(1, 2),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(1, 2)));
 
 INSTANTIATE_TEST_CASE_P(SobelXYTestFluid, SobelXYTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
-                                Values(3),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_16S, CV_32F),
+                                Values(IMGPROC_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3),
                                 Values(1, 2),
                                 Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT_101),
-                                Values(0, 1, 255),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(0, 1, 255)));
 
 INSTANTIATE_TEST_CASE_P(SobelXYTestFluid32F, SobelXYTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_32FC1),
-                                Values(3),
+                        Combine(Values(CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(CV_32F),
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3),
                                 Values(1, 2),
                                 Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT_101),
-                                Values(0, 1, 255),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(0, 1, 255)));
 
 INSTANTIATE_TEST_CASE_P(boxFilterTestFluid32, BoxFilterTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::BORDER_DEFAULT),
                                 Values(-1, CV_32F),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3), // add kernel size=5 when implementation is ready
+                                Values(cv::BORDER_DEFAULT)));
 
 INSTANTIATE_TEST_CASE_P(sepFilterTestFluid, SepFilterTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_32FC1),
-                                Values(3), // add kernel size=5 when implementation is ready
+                        Combine(Values(CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_32F),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3))); // add kernel size=5 when implementation is ready
 
 INSTANTIATE_TEST_CASE_P(filter2DTestFluid, Filter2DTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
-                                Values(3), // add kernel size=4,5,7 when implementation ready
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::BORDER_DEFAULT),
                                 Values(-1, CV_32F),
-                                Values(true, false),
-                                Values(cv::compile_args(IMGPROC_FLUID))));
+                                Values(IMGPROC_FLUID),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3), // add kernel size=4,5,7 when implementation ready
+                                Values(cv::BORDER_DEFAULT)));
 
 } // opencv_test
index 435c798..1481755 100644 (file)
@@ -7,67 +7,65 @@
 
 #include "../test_precomp.hpp"
 #include "../common/gapi_operators_tests.hpp"
-#include "opencv2/gapi/cpu/core.hpp"
+#include <opencv2/gapi/cpu/core.hpp>
 
-#define CORE_CPU cv::gapi::core::cpu::kernels()
+namespace
+{
+#define CORE_CPU [] () { return cv::compile_args(cv::gapi::core::cpu::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
-
 // FIXME: CPU test runs are disabled since Fluid is an exclusive plugin now!
 INSTANTIATE_TEST_CASE_P(MathOperatorTestCPU, MathOperatorMatMatTest,
-                    Combine(Values(AbsExact().to_compare_f()),
-                            Values( opPlusM, opMinusM, opDivM,
-                                    opGreater, opLess, opGreaterEq, opLessEq, opEq, opNotEq),
-                            Values(CV_8UC1, CV_16SC1, CV_32FC1),
+                    Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
                             Values(cv::Size(1280, 720),
-                               cv::Size(640, 480),
-                               cv::Size(128, 128)),
+                                   cv::Size(640, 480),
+                                   cv::Size(128, 128)),
                             Values(-1, CV_8U, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                            Values(cv::compile_args(CORE_CPU))));
+                            Values(CORE_CPU),
+                            Values(AbsExact().to_compare_obj()),
+                            Values( opPlusM, opMinusM, opDivM,
+                                    opGreater, opLess, opGreaterEq, opLessEq, opEq, opNotEq)));
 
 INSTANTIATE_TEST_CASE_P(MathOperatorTestCPU, MathOperatorMatScalarTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opPlus, opPlusR, opMinus, opMinusR, opMul, opMulR,  // FIXIT avoid division by values near zero: opDiv, opDivR,
-                                        opGT, opLT, opGE, opLE, opEQ, opNE,
-                                        opGTR, opLTR, opGER, opLER, opEQR, opNER),
-                                Values(CV_8UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opPlus, opPlusR, opMinus, opMinusR, opMul, opMulR,  // FIXIT avoid division by values near zero: opDiv, opDivR,
+                                        opGT, opLT, opGE, opLE, opEQ, opNE,
+                                        opGTR, opLTR, opGER, opLER, opEQR, opNER)));
 
 INSTANTIATE_TEST_CASE_P(BitwiseOperatorTestCPU, MathOperatorMatMatTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opAnd, opOr, opXor ),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
-                                   cv::Size(640, 480),
-                                   cv::Size(128, 128)),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
                                 Values(-1),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opAnd, opOr, opXor )));
 
 INSTANTIATE_TEST_CASE_P(BitwiseOperatorTestCPU, MathOperatorMatScalarTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opAND, opOR, opXOR, opANDR, opORR, opXORR ),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(CORE_CPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opAND, opOR, opXOR, opANDR, opORR, opXORR )));
 
 INSTANTIATE_TEST_CASE_P(BitwiseNotOperatorTestCPU, NotOperatorTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_CPU))));
+                                Values(-1),
+                                Values(CORE_CPU)));
 }
index b3e54bb..45c8e18 100644 (file)
@@ -8,65 +8,64 @@
 #include "../test_precomp.hpp"
 #include "../common/gapi_operators_tests.hpp"
 
-#define CORE_FLUID cv::gapi::core::fluid::kernels()
+namespace
+{
+#define CORE_FLUID [] () { return cv::compile_args(cv::gapi::core::fluid::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
 INSTANTIATE_TEST_CASE_P(MathOperatorTestFluid, MathOperatorMatMatTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opPlusM, opMinusM, opDivM,
-                                        opGreater, opLess, opGreaterEq, opLessEq, opEq, opNotEq),
-                                Values(CV_8UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
-                                   cv::Size(640, 480),
-                                   cv::Size(128, 128)),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CORE_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opPlusM, opMinusM, opDivM,
+                                        opGreater, opLess, opGreaterEq, opLessEq, opEq, opNotEq)));
 
 //FIXME: Some Mat/Scalar Fluid kernels are not there yet!
 INSTANTIATE_TEST_CASE_P(DISABLED_MathOperatorTestFluid, MathOperatorMatScalarTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opPlus, opPlusR, opMinus, opMinusR, opMul, opMulR,  // FIXIT avoid division by values near zero: opDiv, opDivR,
-                                        opGT, opLT, opGE, opLE, opEQ, opNE,
-                                        opGTR, opLTR, opGER, opLER, opEQR, opNER),
-                                Values(CV_8UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CORE_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opPlus, opPlusR, opMinus, opMinusR, opMul, opMulR,  // FIXIT avoid division by values near zero: opDiv, opDivR,
+                                        opGT, opLT, opGE, opLE, opEQ, opNE,
+                                        opGTR, opLTR, opGER, opLER, opEQR, opNER)));
 
 INSTANTIATE_TEST_CASE_P(BitwiseOperatorTestFluid, MathOperatorMatMatTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opAnd, opOr, opXor ),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
-                                   cv::Size(640, 480),
-                                   cv::Size(128, 128)),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
                                 Values(-1),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CORE_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opAnd, opOr, opXor )));
 
 //FIXME: Some Mat/Scalar Fluid kernels are not there yet!
 INSTANTIATE_TEST_CASE_P(DISABLED_BitwiseOperatorTestFluid, MathOperatorMatScalarTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opAND, opOR, opXOR, opANDR, opORR, opXORR ),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(CORE_FLUID),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opAND, opOR, opXOR, opANDR, opORR, opXORR )));
 
 INSTANTIATE_TEST_CASE_P(BitwiseNotOperatorTestFluid, NotOperatorTest,
                     Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                             Values(cv::Size(1280, 720),
                                    cv::Size(640, 480),
                                    cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_FLUID))));
+                            Values(-1),
+                            Values(CORE_FLUID)));
 }
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_render_tests_cpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_render_tests_cpu.cpp
new file mode 100644 (file)
index 0000000..334a9e5
--- /dev/null
@@ -0,0 +1,66 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#include "../test_precomp.hpp"
+#include "../common/gapi_render_tests.hpp"
+
+namespace opencv_test
+{
+
+INSTANTIATE_TEST_CASE_P(RenderTextTestCPU, RenderTextTest,
+                        Combine(Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
+                                Values("text"),
+                                Values(Points{Point(5, 30), Point(40, 70), Point(-1, -1)}),
+/* Font face          */        Values(FONT_HERSHEY_SIMPLEX),
+/* Font scale         */        Values(2),
+/* Color              */        Values(cv::Scalar(255, 0, 0)),
+/* Thickness          */        Values(1),
+/* Line type          */        Values(LINE_8),
+/* Bottom left origin */        testing::Bool(),
+/* NV12 format or not */        testing::Bool()));
+
+INSTANTIATE_TEST_CASE_P(RenderRectTestCPU, RenderRectTest,
+                        Combine(Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
+                                Values(Rects{Rect(5, 30, 40, 50),
+                                             Rect(40, 70, 40, 50),
+/* Edge case, rectangle will not be drawn */ Rect(75, 110, -40, 50),
+/* Edge case, rectangle will not be drawn */ Rect(70, 100, 0, 50)}),
+/* Color              */        Values(cv::Scalar(255, 0, 0)),
+/* Thickness          */        Values(1),
+/* Line type          */        Values(LINE_8),
+/* Shift              */        Values(0),
+/* NV12 format or not */        testing::Bool()));
+
+INSTANTIATE_TEST_CASE_P(RenderCircleTestCPU, RenderCircleTest,
+                        Combine(Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
+                                Values(Points{Point(5, 30), Point(40, 70), Point(75, 110)}),
+/* Radius             */        Values(5),
+/* Color              */        Values(cv::Scalar(255, 0, 0)),
+/* Thickness          */        Values(1),
+/* Line type          */        Values(LINE_8),
+/* Shift              */        Values(0),
+/* NV12 format or not */        testing::Bool()));
+
+INSTANTIATE_TEST_CASE_P(RenderLineTestCPU, RenderLineTest,
+                        Combine(Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
+                                Values(VecOfPairOfPoints{ {Point(5, 30)  , Point(5, 40)   },
+                                                          {Point(40, 70) , Point(50, 70)  },
+                                                          {Point(75, 110), Point(100, 115)} }),
+/* Color              */        Values(cv::Scalar(255, 0, 0)),
+/* Thickness          */        Values(1),
+/* Line type          */        Values(LINE_8),
+/* Shift              */        Values(0),
+/* NV12 format or not */        testing::Bool()));
+}
index ebf7a7d..9702119 100644 (file)
@@ -6,8 +6,10 @@
 
 
 #include "test_precomp.hpp"
-#include "opencv2/gapi/gcomputation_async.hpp"
-#include "opencv2/gapi/gcompiled_async.hpp"
+#include <opencv2/gapi/gcomputation_async.hpp>
+#include <opencv2/gapi/gcompiled_async.hpp>
+#include <opencv2/gapi/gasync_context.hpp>
+
 
 #include <condition_variable>
 #include <stdexcept>
@@ -78,6 +80,32 @@ namespace {
             }
         }
     };
+
+
+    //TODO: unify with callback helper code
+    struct cancel_struct {
+        std::atomic<int> num_tasks_to_spawn;
+
+        cv::gapi::wip::GAsyncContext ctx;
+
+        cancel_struct(int tasks_to_spawn) : num_tasks_to_spawn(tasks_to_spawn) {}
+    };
+
+    G_TYPED_KERNEL(GCancelationAdHoc, <GMat(GMat, cancel_struct*)>, "org.opencv.test.cancel_ad_hoc")
+    {
+        static GMatDesc outMeta(GMatDesc in, cancel_struct* ) { return in;  }
+
+    };
+
+    GAPI_OCV_KERNEL(GCancelationAdHocImpl, GCancelationAdHoc)
+    {
+        static void run(const cv::Mat& , cancel_struct* cancel_struct_p, cv::Mat&)        {
+            auto& cancel_struct_ = * cancel_struct_p;
+            auto num_tasks_to_spawn =  -- cancel_struct_.num_tasks_to_spawn;
+            cancel_struct_.ctx.cancel();
+            EXPECT_GT(num_tasks_to_spawn, 0)<<"Incorrect Test setup - to small number of tasks to feed the queue \n";
+        }
+    };
 }
 
 struct ExceptionOnExecution {
@@ -117,6 +145,41 @@ struct ExceptionOnExecution {
 
 };
 
+struct SelfCanceling {
+    cv::GComputation self_cancel;
+    SelfCanceling(cancel_struct* cancel_struct_p) : self_cancel([cancel_struct_p]{
+        cv::GMat in;
+        cv::GMat out = GCancelationAdHoc::on(in, cancel_struct_p);
+        return GComputation{in, out};
+    })
+    {}
+
+    const cv::Size sz{2, 2};
+    cv::Mat in_mat{sz, CV_8U, cv::Scalar(1)};
+    cv::Mat out_mat;
+
+    cv::GCompiled compile(){
+        return self_cancel.compile(descr_of(in_mat), compile_args());
+    }
+
+    cv::GComputation& computation(){
+        return self_cancel;
+    }
+
+    cv::GRunArgs in_args(){
+        return cv::gin(in_mat);
+    }
+
+    cv::GRunArgsP out_args(){
+        return cv::gout(out_mat);
+    }
+
+    cv::GCompileArgs compile_args(){
+        auto pkg = cv::gapi::kernels<GCancelationAdHocImpl>();
+        return cv::compile_args(pkg);
+    }
+};
+
 template<typename crtp_final_t>
 struct crtp_cast {
     template<typename crtp_base_t>
@@ -150,6 +213,11 @@ struct CallBack: crtp_cast<crtp_final_t> {
         this->crtp_cast_(this)->async(callback(), std::forward<Args>(args)...);
     }
 
+    template<typename... Args >
+    void start_async(cv::gapi::wip::GAsyncContext& ctx, Args&&... args){
+        this->crtp_cast_(this)->async(ctx, callback(), std::forward<Args>(args)...);
+    }
+
     void wait_for_result()
     {
         std::unique_lock<std::mutex> lck{mtx};
@@ -186,6 +254,14 @@ struct AsyncCompiled  : crtp_cast<crtp_final_t>{
         auto gcmpld = this->crtp_cast_(this)->compile();
         return cv::gapi::wip::async(gcmpld, std::forward<Args>(args)...);
     }
+
+    template<typename... Args>
+    auto async(cv::gapi::wip::GAsyncContext& ctx, Args&&... args) ->
+        decltype(cv::gapi::wip::async(std::declval<cv::GCompiled&>(), std::forward<Args>(args)..., std::declval<cv::gapi::wip::GAsyncContext&>()))
+    {
+        auto gcmpld = this->crtp_cast_(this)->compile();
+        return cv::gapi::wip::async(gcmpld, std::forward<Args>(args)..., ctx);
+    }
 };
 
 //Test Mixin, hiding details of calling apply (async_apply) on GAPI Computation object
@@ -193,9 +269,23 @@ template<typename crtp_final_t>
 struct AsyncApply : crtp_cast<crtp_final_t> {
 
     template<typename... Args>
-    auto async(Args&&... args) ->decltype(cv::gapi::wip::async_apply(std::declval<cv::GComputation&>(), std::forward<Args>(args)...)) {
-        return cv::gapi::wip::async_apply(this->crtp_cast_(this)->computation(), std::forward<Args>(args)..., this->crtp_cast_(this)->compile_args());
+    auto async(Args&&... args) ->
+         decltype(cv::gapi::wip::async_apply(std::declval<cv::GComputation&>(), std::forward<Args>(args)..., std::declval<cv::GCompileArgs>()))
+    {
+        return cv::gapi::wip::async_apply(
+                this->crtp_cast_(this)->computation(), std::forward<Args>(args)..., this->crtp_cast_(this)->compile_args()
+        );
+    }
+
+    template<typename... Args>
+    auto async(cv::gapi::wip::GAsyncContext& ctx, Args&&... args) ->
+         decltype(cv::gapi::wip::async_apply(std::declval<cv::GComputation&>(), std::forward<Args>(args)... , std::declval<cv::GCompileArgs>(), std::declval<cv::gapi::wip::GAsyncContext&>()))
+    {
+        return cv::gapi::wip::async_apply(
+                this->crtp_cast_(this)->computation(), std::forward<Args>(args)..., this->crtp_cast_(this)->compile_args(), ctx
+        );
     }
+
 };
 
 
@@ -240,7 +330,7 @@ TYPED_TEST_P(stress, test){
     const std::size_t number_of_threads  = 4;
 
     auto thread_body = [&](){
-        std::vector<TypeParam> requests{request_per_thread};
+        std::vector<TypeParam> requests(request_per_thread);
         for (auto&& r : requests){
             r.start_async(r.in_args(), r.out_args());
         }
@@ -262,13 +352,151 @@ TYPED_TEST_P(stress, test){
 }
 REGISTER_TYPED_TEST_CASE_P(stress, test);
 
+template<typename case_t>
+struct cancel : ::testing::Test{};
+TYPED_TEST_CASE_P(cancel);
+
+TYPED_TEST_P(cancel, basic){
+    constexpr int num_tasks = 100;
+    cancel_struct cancel_struct_ {num_tasks};
+    std::vector<TypeParam> requests; requests.reserve(num_tasks);
+
+    for (auto i = num_tasks; i>0; i--){
+        requests.emplace_back(&cancel_struct_);
+    }
+    for (auto&& r : requests){
+        //first request will cancel other on it's execution
+        r.start_async(cancel_struct_.ctx, r.in_args(), r.out_args());
+    }
+
+    unsigned int canceled = 0 ;
+    for (auto&& r : requests){
+        try {
+            r.wait_for_result();
+        }catch (cv::gapi::wip::GAsyncCanceled&){
+            ++canceled;
+        }
+    }
+    ASSERT_GT(canceled, 0u);
+}
+
+namespace {
+    GRunArgs deep_copy_out_args(const GRunArgsP& args ){
+        GRunArgs result; result.reserve(args.size());
+        for (auto&& arg : args){
+            //FIXME: replace this switch with use of visit() on variant, when it will be available
+            switch (arg.index()){
+    #if !defined(GAPI_STANDALONE)
+                case GRunArgP::index_of<cv::Mat*>()                 :   result.emplace_back(*util::get<cv::Mat*>(arg));     break;
+                case GRunArgP::index_of<cv::Scalar*>()              :   result.emplace_back(*util::get<cv::Scalar*>(arg));  break;
+                case GRunArgP::index_of<cv::UMat*>()                :   result.emplace_back(*util::get<cv::UMat*>(arg));    break;
+    #endif // !defined(GAPI_STANDALONE)
+                case GRunArgP::index_of<cv::gapi::own::Mat*>()      :   result.emplace_back(*util::get<cv::gapi::own::Mat*>   (arg));   break;
+                case GRunArgP::index_of<cv::gapi::own::Scalar*>()   :   result.emplace_back(*util::get<cv::gapi::own::Scalar*>(arg));   break;
+                case GRunArgP::index_of<cv::detail::VectorRef>()    :   result.emplace_back(util::get<cv::detail::VectorRef>  (arg));   break;
+                default : ;
+            }
+        }
+        return result;
+    }
+
+    GRunArgsP args_p_from_args(GRunArgs& args){
+        GRunArgsP result; result.reserve(args.size());
+        for (auto&& arg : args){
+            switch (arg.index()){
+    #if !defined(GAPI_STANDALONE)
+                case GRunArg::index_of<cv::Mat>()                 :   result.emplace_back(&util::get<cv::Mat>(arg));     break;
+                case GRunArg::index_of<cv::Scalar>()              :   result.emplace_back(&util::get<cv::Scalar>(arg));  break;
+                case GRunArg::index_of<cv::UMat>()                :   result.emplace_back(&util::get<cv::UMat>(arg));    break;
+    #endif // !defined(GAPI_STANDALONE)
+                case GRunArg::index_of<cv::gapi::own::Mat>()      :   result.emplace_back(&util::get<cv::gapi::own::Mat>   (arg));   break;
+                case GRunArg::index_of<cv::gapi::own::Scalar>()   :   result.emplace_back(&util::get<cv::gapi::own::Scalar>(arg));   break;
+                case GRunArg::index_of<cv::detail::VectorRef>()   :   result.emplace_back(util::get<cv::detail::VectorRef>  (arg));   break;
+                default : ;
+            }
+        }
+        return result;
+    }
+}
+
+REGISTER_TYPED_TEST_CASE_P(cancel, basic);
+
+template<typename case_t>
+struct output_args_lifetime : ::testing::Test{
+    static constexpr const int num_of_requests = 20;
+};
+TYPED_TEST_CASE_P(output_args_lifetime);
+//There are intentionaly no actual checks (asserts and verify) in output_args_lifetime tests.
+//They are more of example use-cases than real tests. (ASAN/valgrind can still catch issues here)
+TYPED_TEST_P(output_args_lifetime, callback){
+
+    std::atomic<int> active_requests = {0};
+
+    for (int i=0; i<this->num_of_requests; i++)
+    {
+        TypeParam r;
+
+        //As output arguments are __captured by reference__  calling code
+        //__must__ ensure they live long enough to complete asynchronous activity.
+        //(i.e. live at least until callback is called)
+        auto out_args_ptr =  std::make_shared<cv::GRunArgs>(deep_copy_out_args(r.out_args()));
+
+        //Extend lifetime of out_args_ptr content by capturing it into a callback
+        auto cb =  [&active_requests, out_args_ptr](std::exception_ptr ){
+            --active_requests;
+        };
+
+        ++active_requests;
+
+        r.async(cb, r.in_args(), args_p_from_args(*out_args_ptr));
+    }
+
+
+   while(active_requests){
+       std::this_thread::sleep_for(std::chrono::milliseconds{2});
+   }
+}
+
+
+TYPED_TEST_P(output_args_lifetime, future){
+
+    std::vector<std::future<void>>                      fs(this->num_of_requests);
+    std::vector<std::shared_ptr<cv::GRunArgs>>    out_ptrs(this->num_of_requests);
+
+    for (int i=0; i<this->num_of_requests; i++)
+    {
+        TypeParam r;
+
+        //As output arguments are __captured by reference__  calling code
+        //__must__ ensure they live long enough to complete asynchronous activity.
+        //(i.e. live at least until future.get()/wait() is returned)
+        auto out_args_ptr =  std::make_shared<cv::GRunArgs>(deep_copy_out_args(r.out_args()));
+
+        //Extend lifetime of out_args_ptr content
+        out_ptrs[i] = out_args_ptr;
+
+        fs[i] = r.async(r.in_args(), args_p_from_args(*out_args_ptr));
+    }
+
+    for (auto const& ftr : fs ){
+        ftr.wait();
+    }
+}
+REGISTER_TYPED_TEST_CASE_P(output_args_lifetime, callback, future);
+
 //little helpers to match up all combinations of setups
-template<typename compute_fixture_t,template <typename> class callback_or_future_t, template <typename> class compiled_or_apply_t>
+template<typename compute_fixture_t, template<typename> class... args_t>
 struct Case
         : compute_fixture_t,
-          callback_or_future_t<Case<compute_fixture_t,callback_or_future_t,compiled_or_apply_t>>,
-          compiled_or_apply_t <Case<compute_fixture_t,callback_or_future_t,compiled_or_apply_t>>
-{};
+          args_t<Case<compute_fixture_t, args_t...>> ...
+{
+    template<typename... Args>
+    Case(Args&&... args) : compute_fixture_t(std::forward<Args>(args)...) { }
+    Case(Case const &  ) = default;
+    Case(Case &&  ) = default;
+
+    Case() = default;
+};
 
 template<typename computation_t>
 using cases = ::testing::Types<
@@ -277,23 +505,22 @@ using cases = ::testing::Types<
             Case<computation_t, Future,   AsyncCompiled>,
             Case<computation_t, Future,   AsyncApply>
             >;
+
 INSTANTIATE_TYPED_TEST_CASE_P(AsyncAPINormalFlow_,        normal,     cases<SumOfSum2x2>);
 INSTANTIATE_TYPED_TEST_CASE_P(AsyncAPIExceptionHandling_, exception,  cases<ExceptionOnExecution>);
 
 INSTANTIATE_TYPED_TEST_CASE_P(AsyncAPIStress,             stress,     cases<SumOfSum2x2>);
 
-TEST(AsyncAPI, Sample){
-    cv::GComputation self_mul([]{
-        cv::GMat in;
-        cv::GMat out = cv::gapi::mul(in, in);
-        return GComputation{in, out};
-    });
+INSTANTIATE_TYPED_TEST_CASE_P(AsyncAPICancelation,        cancel,     cases<SelfCanceling>);
 
-    const cv::Size sz{2, 2};
-    cv::Mat in_mat{sz, CV_8U, cv::Scalar(1)};
-    cv::Mat out;
+template<typename computation_t>
+using explicit_wait_cases = ::testing::Types<
+            Case<computation_t, AsyncCompiled>,
+            Case<computation_t, AsyncApply>,
+            Case<computation_t, AsyncCompiled>,
+            Case<computation_t, AsyncApply>
+            >;
+
+INSTANTIATE_TYPED_TEST_CASE_P(AsyncAPIOutArgsLifetTime,   output_args_lifetime,     explicit_wait_cases<SumOfSum2x2>);
 
-    auto f = cv::gapi::wip::async_apply(self_mul,cv::gin(in_mat), cv::gout(out));
-    f.wait();
-}
 } // namespace opencv_test
index 62069d8..4f0ac18 100644 (file)
@@ -8,7 +8,7 @@
 #include "test_precomp.hpp"
 #include "gapi_mock_kernels.hpp"
 
-#include "opencv2/gapi/fluid/gfluidkernel.hpp"
+#include <opencv2/gapi/fluid/gfluidkernel.hpp>
 
 namespace opencv_test
 {
index d0c551a..fa79230 100644 (file)
@@ -7,7 +7,7 @@
 
 #include "test_precomp.hpp"
 
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
 
 namespace opencv_test
 {
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_parallel_rois_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_parallel_rois_test.cpp
new file mode 100644 (file)
index 0000000..2275dba
--- /dev/null
@@ -0,0 +1,315 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+
+#include "test_precomp.hpp"
+
+#include "gapi_fluid_test_kernels.hpp"
+
+namespace opencv_test
+{
+
+namespace {
+    cv::Mat randomMat(cv::Size img_sz, int type = CV_8UC1, cv::Scalar mean   = cv::Scalar(127.0f), cv::Scalar stddev = cv::Scalar(40.f)){
+        cv::Mat mat(img_sz, type);
+        cv::randn(mat, mean, stddev);
+        return mat;
+    }
+
+    cv::GFluidParallelOutputRois asGFluidParallelOutputRois(const std::vector<cv::Rect>& rois){
+        cv::GFluidParallelOutputRois parallel_rois;
+        for (auto const& roi : rois) {
+            parallel_rois.parallel_rois.emplace_back(GFluidOutputRois{{to_own(roi)}});
+        }
+        return parallel_rois;
+    }
+
+    void adjust_empty_roi(cv::Rect& roi, cv::Size size){
+        if (roi.empty()) roi = cv::Rect{{0,0}, size};
+    }
+
+    cv::GCompileArgs combine(cv::GCompileArgs&& lhs, cv::GCompileArgs const& rhs){
+        lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+        return std::move(lhs);
+    }
+}
+using namespace cv::gapi_test_kernels;
+
+//As GTest can not simultaneously parameterize test with both types and values - lets use type-erasure and virtual interfaces
+//to use different computation pipelines
+struct ComputationPair {
+    void run_with_gapi(const cv::Mat& in_mat, cv::GCompileArgs const& compile_args, cv::Mat& out_mat){
+        run_with_gapi_impl(in_mat, combine(cv::compile_args(fluidTestPackage), compile_args), out_mat);
+    }
+    void run_with_gapi(const cv::Mat& in_mat, cv::GFluidParallelOutputRois const& parallel_rois, cv::Mat& out_mat){
+        run_with_gapi_impl(in_mat, cv::compile_args(fluidTestPackage, parallel_rois), out_mat);
+    }
+
+    virtual void run_with_ocv (const cv::Mat& in_mat, const std::vector<cv::Rect>& rois,                 cv::Mat& out_mat) = 0;
+
+    virtual std::string name() const { return {}; }
+
+    virtual ~ComputationPair ()  = default;
+
+    friend std::ostream& operator<<(std::ostream& o, ComputationPair const* cp){
+        std::string custom_name = cp->name();
+        return o << (custom_name.empty() ? typeid(cp).name() : custom_name );
+    }
+
+private:
+    virtual void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat) = 0;
+};
+
+struct Blur3x3CP  : ComputationPair{
+    static constexpr int borderType = BORDER_REPLICATE;
+    static constexpr int kernelSize = 3;
+
+    std::string name() const override { return "Blur3x3"; }
+    void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat_gapi) override {
+        cv::GMat in;
+        cv::GMat out = TBlur3x3::on(in, borderType, {});
+        cv::GComputation c(cv::GIn(in), cv::GOut(out));
+
+        // Run G-API
+        auto cc = c.compile(cv::descr_of(in_mat), comp_args);
+        cc(cv::gin(in_mat), cv::gout(out_mat_gapi));
+    }
+
+    void run_with_ocv(const cv::Mat& in_mat, const std::vector<cv::Rect>& rois, cv::Mat& out_mat_ocv) override {
+        cv::Point anchor = {-1, -1};
+        // Check with OpenCV
+        for (auto roi : rois) {
+            adjust_empty_roi(roi, in_mat.size());
+            cv::blur(in_mat(roi), out_mat_ocv(roi), {kernelSize, kernelSize}, anchor, borderType);
+        }
+    }
+};
+
+struct AddCCP : ComputationPair{
+    std::string name() const override { return "AddC"; }
+    void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat_gapi) override {
+        cv::GMat in;
+        cv::GMat out = TAddCSimple::on(in, 1);
+        cv::GComputation c(cv::GIn(in), cv::GOut(out));
+
+        // Run G-API
+        auto cc = c.compile(cv::descr_of(in_mat), comp_args);
+        cc(cv::gin(in_mat), cv::gout(out_mat_gapi));
+    }
+
+    void run_with_ocv(const cv::Mat& in_mat, const std::vector<cv::Rect>& rois, cv::Mat& out_mat_ocv) override {
+        // Check with OpenCV
+        for (auto roi : rois) {
+            adjust_empty_roi(roi, in_mat.size());
+            out_mat_ocv(roi) = in_mat(roi) + 1u;
+        }
+    }
+};
+
+template<BorderTypes _borderType>
+struct SequenceOfBlursCP : ComputationPair{
+    BorderTypes borderType = _borderType;
+
+    std::string name() const override { return "SequenceOfBlurs, border type: " + std::to_string(static_cast<int>(borderType)); }
+    void run_with_gapi_impl(const cv::Mat& in_mat, cv::GCompileArgs const& comp_args, cv::Mat& out_mat) override {
+        cv::Scalar borderValue(0);
+
+        GMat in;
+        auto mid = TBlur3x3::on(in,  borderType, borderValue);
+        auto out = TBlur5x5::on(mid, borderType, borderValue);
+
+        GComputation c(GIn(in), GOut(out));
+        auto cc = c.compile(descr_of(in_mat), comp_args);
+        cc(cv::gin(in_mat), cv::gout(out_mat));
+    }
+    void run_with_ocv(const cv::Mat& in_mat, const std::vector<cv::Rect>& rois,                 cv::Mat& out_mat) override {
+        cv::Mat mid_mat_ocv = Mat::zeros(in_mat.size(), in_mat.type());
+        cv::Point anchor = {-1, -1};
+
+        for (auto roi : rois) {
+            adjust_empty_roi(roi, in_mat.size());
+            cv::blur(in_mat, mid_mat_ocv, {3,3}, anchor, borderType);
+            cv::blur(mid_mat_ocv(roi), out_mat(roi), {5,5}, anchor, borderType);
+        }
+    }
+};
+
+struct TiledComputation : public TestWithParam <std::tuple<ComputationPair*, cv::Size, std::vector<cv::Rect>, decltype(cv::GFluidParallelFor::parallel_for)>> {};
+TEST_P(TiledComputation, Test)
+{
+    ComputationPair*        cp;
+    cv::Size                img_sz;
+    std::vector<cv::Rect>   rois ;
+    decltype(cv::GFluidParallelFor::parallel_for)        pfor;
+    auto                    mat_type  =  CV_8UC1;
+
+    std::tie(cp, img_sz, rois, pfor) = GetParam();
+
+    cv::Mat in_mat       =      randomMat(img_sz, mat_type);
+    cv::Mat out_mat_gapi = cv::Mat::zeros(img_sz, mat_type);
+    cv::Mat out_mat_ocv  = cv::Mat::zeros(img_sz, mat_type);
+
+    auto comp_args = combine(cv::compile_args(asGFluidParallelOutputRois(rois)), pfor ? cv::compile_args(cv::GFluidParallelFor{pfor}) : cv::GCompileArgs{});
+    cp->run_with_gapi(in_mat, comp_args, out_mat_gapi);
+    cp->run_with_ocv (in_mat, rois,      out_mat_ocv);
+
+    EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv))
+            << "in_mat : \n"      << in_mat << std::endl
+            << "diff matrix :\n " << (out_mat_gapi != out_mat_ocv) << std::endl
+            << "out_mat_gapi: \n" << out_mat_gapi << std::endl
+            << "out_mat_ocv:  \n" << out_mat_ocv << std::endl;;
+}
+
+
+namespace {
+    //this is ugly but other variants (like using shared_ptr) are IMHO even more ugly :)
+    template<typename T, typename... Arg>
+    T* addr_of_static(Arg... arg) {
+        static T obj(std::forward<Arg>(arg)...);
+        return &obj;
+    }
+}
+
+auto single_arg_computations = [](){
+    return Values(  addr_of_static<Blur3x3CP>(),
+                    addr_of_static<AddCCP>(),
+                    addr_of_static<SequenceOfBlursCP<BORDER_CONSTANT>>(),
+                    addr_of_static<SequenceOfBlursCP<BORDER_REPLICATE>>(),
+                    addr_of_static<SequenceOfBlursCP<BORDER_REFLECT_101>>()
+            );
+
+};
+
+auto tilesets_8x10 = [](){
+    return  Values(std::vector<cv::Rect>{cv::Rect{}},
+                   std::vector<cv::Rect>{cv::Rect{0,0,8,5}, cv::Rect{0,5,8,5}},
+                   std::vector<cv::Rect>{cv::Rect{0,1,8,3}, cv::Rect{0,4,8,3}},
+                   std::vector<cv::Rect>{cv::Rect{0,2,8,3}, cv::Rect{0,5,8,2}},
+                   std::vector<cv::Rect>{cv::Rect{0,3,8,4}, cv::Rect{0,9,8,1}});
+};
+
+auto tilesets_20x15 = [](){
+    return   Values(std::vector<cv::Rect>{cv::Rect{}},
+                    std::vector<cv::Rect>{cv::Rect{{0,0},cv::Size{20,7}},
+                                          cv::Rect{{0,7},cv::Size{20,8}}});
+};
+
+auto tilesets_320x240 = [](){
+    return  Values(std::vector<cv::Rect>{cv::Rect{{0,0},   cv::Size{320,120}},
+                                         cv::Rect{{0,120}, cv::Size{320,120}}},
+
+                   std::vector<cv::Rect>{cv::Rect{{0,0},   cv::Size{320,120}},
+                                         cv::Rect{{0,120}, cv::Size{320,120}}},
+
+                   std::vector<cv::Rect>{cv::Rect{{0,0},  cv::Size{320,60}},
+                                         cv::Rect{{0,60}, cv::Size{320,60}},
+                                         cv::Rect{{0,120},cv::Size{320,120}}});
+};
+
+namespace{
+    auto no_custom_pfor = decltype(cv::GFluidParallelFor::parallel_for){};
+}
+
+INSTANTIATE_TEST_CASE_P(FluidTiledSerial8x10, TiledComputation,
+                        Combine(
+                            single_arg_computations(),
+                            Values(cv::Size(8, 10)),
+                            tilesets_8x10(),
+                            Values(no_custom_pfor))
+);
+
+INSTANTIATE_TEST_CASE_P(FluidTiledSerial20x15, TiledComputation,
+                        Combine(
+                            single_arg_computations(),
+                            Values(cv::Size(20, 15)),
+                            tilesets_20x15(),
+                            Values(no_custom_pfor))
+);
+
+INSTANTIATE_TEST_CASE_P(FluidTiledSerial320x240, TiledComputation,
+                        Combine(
+                            single_arg_computations(),
+                            Values(cv::Size(320, 240)),
+                            tilesets_320x240(),
+                            Values(no_custom_pfor))
+);
+
+//FIXME: add multiple outputs tests
+
+TEST(FluidTiledParallelFor, basic)
+{
+    cv::Size                img_sz{8,20};
+    auto                    mat_type  =  CV_8UC1;
+
+    cv::GMat in;
+    cv::GMat out = TAddCSimple::on(in, 1);
+    cv::GComputation c(cv::GIn(in), cv::GOut(out));
+
+    cv::Mat in_mat       =      randomMat(img_sz, mat_type);
+    cv::Mat out_mat_gapi = cv::Mat::zeros(img_sz, mat_type);
+
+    auto  parallel_rois = asGFluidParallelOutputRois( std::vector<cv::Rect>{cv::Rect{0,0,8,5}, cv::Rect{0,5,8,5}});
+
+    std::size_t items_count = 0;
+    auto pfor = [&items_count](std::size_t count, std::function<void(std::size_t)> ){
+        items_count = count;
+    };
+
+    // Run G-API
+    auto cc = c.compile(cv::descr_of(in_mat), cv::compile_args(fluidTestPackage, parallel_rois, GFluidParallelFor{pfor}));
+    cc(cv::gin(in_mat), cv::gout(out_mat_gapi));
+    ASSERT_EQ(parallel_rois.parallel_rois.size(), items_count);
+}
+
+namespace {
+    auto serial_for = [](std::size_t count, std::function<void(std::size_t)> f){
+        for (std::size_t i  = 0; i < count; ++i){
+            f(i);
+        }
+    };
+
+    auto cv_parallel_for = [](std::size_t count, std::function<void(std::size_t)> f){
+        cv::parallel_for_(cv::Range(0, static_cast<int>(count)), [f](const cv::Range& r){
+            for (auto i = r.start; i < r.end; ++i){
+                f(i);
+            }        });
+    };
+}
+
+INSTANTIATE_TEST_CASE_P(FluidTiledParallel8x10, TiledComputation,
+                        Combine(
+                            single_arg_computations(),
+                            Values(cv::Size(8, 10)),
+                            tilesets_8x10(),
+                            Values(serial_for, cv_parallel_for))
+);
+} // namespace opencv_test
+
+//define custom printer for "parallel_for" test parameter
+namespace std {
+    void PrintTo(decltype(cv::GFluidParallelFor::parallel_for) const& f, std::ostream* o);
+}
+
+//separate declaration and definition are needed to please the compiler
+void std::PrintTo(decltype(cv::GFluidParallelFor::parallel_for) const& f, std::ostream* o){
+    if (f) {
+        using namespace opencv_test;
+        if      (f.target<decltype(serial_for)>()){
+                    *o <<"serial_for";
+        }
+        else if (f.target<decltype(cv_parallel_for)>()){
+            *o <<"cv_parallel_for";
+        }
+        else {
+            *o <<"parallel_for of type: " << f.target_type().name();
+        }
+    }
+    else
+    {
+        *o << "default parallel_for";
+    }
+
+}
index cbe3237..2798b85 100644 (file)
@@ -381,7 +381,7 @@ static auto fluidResizeTestPackage = [](int interpolation, cv::Size szIn, cv::Si
     }break;
     default: CV_Assert(false);
     }
-    return combine(pkg, fluidTestPackage, unite_policy::KEEP);
+    return combine(pkg, fluidTestPackage);
 
 #undef RESIZE_SWITCH
 #undef RESIZE_CASE
@@ -743,7 +743,7 @@ TEST_P(NV12PlusResizeTest, Test)
     auto out = cv::gapi::resize(rgb, out_sz, 0, 0, interp);
     cv::GComputation c(cv::GIn(y, uv), cv::GOut(out));
 
-    auto pkg = cv::gapi::combine(fluidTestPackage, cv::gapi::core::fluid::kernels(), cv::unite_policy::KEEP);
+    auto pkg = cv::gapi::combine(fluidTestPackage, cv::gapi::core::fluid::kernels());
 
     c.apply(cv::gin(y_mat, uv_mat), cv::gout(out_mat)
            ,cv::compile_args(pkg, cv::GFluidOutputRois{{to_own(roi)}}));
@@ -822,8 +822,7 @@ TEST_P(Preproc4lpiTest, Test)
     cv::GComputation c(cv::GIn(y, uv), cv::GOut(out));
 
     auto pkg = cv::gapi::combine(cv::gapi::core::fluid::kernels(),
-                                 fluidResizeTestPackage(interp, in_sz, out_sz, 4),
-                                 cv::unite_policy::REPLACE);
+                                 fluidResizeTestPackage(interp, in_sz, out_sz, 4));
 
     c.apply(cv::gin(y_mat, uv_mat), cv::gout(out_mat)
            ,cv::compile_args(pkg, cv::GFluidOutputRois{{to_own(roi)}}));
index 131f96a..b919d99 100644 (file)
@@ -7,10 +7,10 @@
 
 #include "test_precomp.hpp"
 
-#include "opencv2/gapi/core.hpp"
+#include <opencv2/gapi/core.hpp>
 
-#include "opencv2/gapi/fluid/gfluidbuffer.hpp"
-#include "opencv2/gapi/fluid/gfluidkernel.hpp"
+#include <opencv2/gapi/fluid/gfluidbuffer.hpp>
+#include <opencv2/gapi/fluid/gfluidkernel.hpp>
 
  // FIXME: move these tests with priv() to internal suite
 #include "backends/fluid/gfluidbuffer_priv.hpp"
index fcc8d9b..7c4904c 100644 (file)
@@ -9,6 +9,7 @@
 #include <iomanip>
 #include "gapi_fluid_test_kernels.hpp"
 #include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/own/saturate.hpp>
 
 namespace cv
 {
@@ -72,7 +73,8 @@ GAPI_FLUID_KERNEL(FAddCSimple, TAddCSimple, false)
             for (int i = 0, w = in.length(); i < w; i++)
             {
                 //std::cout << std::setw(4) << int(in_row[i]);
-                out_row[i] = static_cast<uint8_t>(in_row[i] + cval);
+                //FIXME: it seems that over kernels might need it as well
+                out_row[i] = cv::gapi::own::saturate<uint8_t>(in_row[i] + cval);
             }
             //std::cout << std::endl;
         }
index 567dddd..dfb8822 100644 (file)
@@ -8,7 +8,7 @@
 #ifndef GAPI_FLUID_TEST_KERNELS_HPP
 #define GAPI_FLUID_TEST_KERNELS_HPP
 
-#include "opencv2/gapi/fluid/gfluidkernel.hpp"
+#include <opencv2/gapi/fluid/gfluidkernel.hpp>
 
 namespace cv
 {
index 070cea6..0e38e05 100644 (file)
@@ -6,7 +6,8 @@
 
 
 #include "test_precomp.hpp"
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <ade/util/zip_range.hpp>
 
 namespace opencv_test
 {
@@ -51,6 +52,41 @@ namespace opencv_test
           {
           }
       };
+
+      struct GComputationVectorMatsAsOutput: public ::testing::Test
+      {
+          cv::Mat  in_mat;
+          cv::GComputation m_c;
+          std::vector<cv::Mat> ref_mats;
+
+          GComputationVectorMatsAsOutput() : in_mat(300, 300, CV_8UC3),
+          m_c([&](){
+                      cv::GMat in;
+                      cv::GMat out[3];
+                      std::tie(out[0], out[1], out[2]) = cv::gapi::split3(in);
+                      return cv::GComputation({in}, {out[0], out[1], out[2]});
+                  })
+          {
+              cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255));
+              cv::split(in_mat, ref_mats);
+          }
+
+          void run(std::vector<cv::Mat>& out_mats)
+          {
+              m_c.apply({in_mat}, out_mats);
+          }
+
+          void check(const std::vector<cv::Mat>& out_mats)
+          {
+              for (const auto& it : ade::util::zip(ref_mats, out_mats))
+              {
+                  const auto& ref_mat = std::get<0>(it);
+                  const auto& out_mat = std::get<1>(it);
+
+                  EXPECT_EQ(0, cv::countNonZero(ref_mat != out_mat));
+              }
+          }
+      };
   }
 
   TEST_F(GComputationApplyTest, ThrowDontPassCustomKernel)
@@ -65,4 +101,37 @@ namespace opencv_test
       ASSERT_NO_THROW(m_c.apply(in_mat, out_mat, cv::compile_args(pkg)));
   }
 
+  TEST_F(GComputationVectorMatsAsOutput, OutputAllocated)
+  {
+      std::vector<cv::Mat> out_mats(3);
+      for (auto& out_mat : out_mats)
+      {
+          out_mat.create(in_mat.size(), CV_8UC1);
+      }
+
+      run(out_mats);
+      check(out_mats);
+  }
+
+  TEST_F(GComputationVectorMatsAsOutput, OutputNotAllocated)
+  {
+      std::vector<cv::Mat> out_mats(3);
+
+      run(out_mats);
+      check(out_mats);
+  }
+
+  TEST_F(GComputationVectorMatsAsOutput, OutputAllocatedWithInvalidMeta)
+  {
+      std::vector<cv::Mat> out_mats(3);
+
+      for (auto& out_mat : out_mats)
+      {
+          out_mat.create(in_mat.size() / 2, CV_8UC1);
+      }
+
+      run(out_mats);
+      check(out_mats);
+  }
+
 } // namespace opencv_test
index 7cb6f9f..6c4e10a 100644 (file)
@@ -10,7 +10,7 @@
 
 #include "logger.hpp"
 #include "common/gapi_tests_common.hpp"
-#include "opencv2/gapi/gpu/ggpukernel.hpp"
+#include <opencv2/gapi/gpu/ggpukernel.hpp>
 #include "opencl_kernels_test_gapi.hpp"
 
 
index aeb4762..7a33b0d 100644 (file)
 
 
 #include "test_precomp.hpp"
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
 #include "gapi_mock_kernels.hpp"
 
+#include <opencv2/gapi/cpu/gcpukernel.hpp>     // cpu::backend
+#include <opencv2/gapi/fluid/gfluidkernel.hpp> // fluid::backend
+
 namespace opencv_test
 {
 
 namespace
 {
-    G_TYPED_KERNEL(GClone, <GMat(GMat)>, "org.opencv.test.clone")
+    namespace I
     {
-        static GMatDesc outMeta(GMatDesc in) { return in;  }
+        G_TYPED_KERNEL(GClone, <GMat(GMat)>, "org.opencv.test.clone")
+        {
+            static GMatDesc outMeta(GMatDesc in) { return in;  }
+        };
+    }
 
+    enum class KernelTags
+    {
+        CPU_CUSTOM_BGR2GRAY,
+        CPU_CUSTOM_CLONE,
+        CPU_CUSTOM_ADD,
+        FLUID_CUSTOM_BGR2GRAY,
+        FLUID_CUSTOM_CLONE,
+        FLUID_CUSTOM_ADD
     };
 
-    GAPI_OCV_KERNEL(GCloneImpl, GClone)
+    class HeteroGraph: public ::testing::Test
     {
-        static void run(const cv::Mat& in, cv::Mat &out)
+    public:
+        HeteroGraph()
         {
-            out = in.clone();
+            auto tmp = I::GClone::on(cv::gapi::add(in[0], in[1]));
+            out = cv::gapi::imgproc::GBGR2Gray::on(tmp);
+        }
+
+        static void registerCallKernel(KernelTags kernel_tag) {
+            kernel_calls.insert(kernel_tag);
+        }
+
+        bool checkCallKernel(KernelTags kernel_tag) {
+            return ade::util::contains(kernel_calls, kernel_tag);
         }
+
+    protected:
+        void SetUp() override
+        {
+            if (!kernel_calls.empty())
+                cv::util::throw_error(std::logic_error("Kernel call log has not been cleared!!!"));
+        }
+
+        void TearDown() override
+        {
+            kernel_calls.clear();
+        }
+
+    protected:
+        cv::GMat in[2], out;
+        static std::set<KernelTags> kernel_calls;
     };
-}
+
+    namespace cpu
+    {
+        GAPI_OCV_KERNEL(GClone, I::GClone)
+        {
+            static void run(const cv::Mat&, cv::Mat)
+            {
+                HeteroGraph::registerCallKernel(KernelTags::CPU_CUSTOM_CLONE);
+            }
+        };
+
+        GAPI_OCV_KERNEL(BGR2Gray, cv::gapi::imgproc::GBGR2Gray)
+        {
+            static void run(const cv::Mat&, cv::Mat&)
+            {
+                HeteroGraph::registerCallKernel(KernelTags::CPU_CUSTOM_BGR2GRAY);
+            }
+        };
+
+        GAPI_OCV_KERNEL(GAdd, cv::gapi::core::GAdd)
+        {
+            static void run(const cv::Mat&, const cv::Mat&, int, cv::Mat&)
+            {
+                HeteroGraph::registerCallKernel(KernelTags::CPU_CUSTOM_ADD);
+            }
+        };
+    }
+
+    namespace fluid
+    {
+        GAPI_FLUID_KERNEL(GClone, I::GClone, false)
+        {
+            static const int Window = 1;
+            static void run(const cv::gapi::fluid::View&, cv::gapi::fluid::Buffer)
+            {
+                HeteroGraph::registerCallKernel(KernelTags::FLUID_CUSTOM_CLONE);
+            }
+        };
+
+        GAPI_FLUID_KERNEL(BGR2Gray, cv::gapi::imgproc::GBGR2Gray, false)
+        {
+            static const int Window = 1;
+            static void run(const cv::gapi::fluid::View&, cv::gapi::fluid::Buffer&)
+            {
+                HeteroGraph::registerCallKernel(KernelTags::FLUID_CUSTOM_BGR2GRAY);
+            }
+        };
+
+        GAPI_FLUID_KERNEL(GAdd, cv::gapi::core::GAdd, false)
+        {
+            static const int Window = 1;
+            static void run(const cv::gapi::fluid::View&, const cv::gapi::fluid::View&,
+                            int, cv::gapi::fluid::Buffer&)
+            {
+                HeteroGraph::registerCallKernel(KernelTags::FLUID_CUSTOM_ADD);
+            }
+        };
+    }
+
+    std::set<KernelTags> HeteroGraph::kernel_calls;
+} // anonymous namespace
 
 TEST(KernelPackage, Create)
 {
@@ -57,17 +158,6 @@ TEST(KernelPackage, IncludesAPI)
     EXPECT_FALSE(pkg.includesAPI<I::Qux>());
 }
 
-TEST(KernelPackage, IncludesAPI_Overlapping)
-{
-    namespace J = Jupiter;
-    namespace S = Saturn;
-    auto pkg = cv::gapi::kernels<J::Foo, J::Bar, S::Foo, S::Bar>();
-    EXPECT_TRUE (pkg.includesAPI<I::Foo>());
-    EXPECT_TRUE (pkg.includesAPI<I::Bar>());
-    EXPECT_FALSE(pkg.includesAPI<I::Baz>());
-    EXPECT_FALSE(pkg.includesAPI<I::Qux>());
-}
-
 TEST(KernelPackage, Include_Add)
 {
     namespace J = Jupiter;
@@ -78,23 +168,6 @@ TEST(KernelPackage, Include_Add)
     EXPECT_TRUE(pkg.includes<J::Qux>());
 }
 
-TEST(KernelPackage, Include_KEEP)
-{
-    namespace J = Jupiter;
-    namespace S = Saturn;
-    auto pkg = cv::gapi::kernels<J::Foo, J::Bar>();
-    EXPECT_FALSE(pkg.includes<S::Foo>());
-    EXPECT_FALSE(pkg.includes<S::Bar>());
-
-    pkg.include<S::Bar>(); // default (KEEP)
-    EXPECT_TRUE(pkg.includes<J::Bar>());
-    EXPECT_TRUE(pkg.includes<S::Bar>());
-
-    pkg.include<S::Foo>(cv::unite_policy::KEEP); // explicit (KEEP)
-    EXPECT_TRUE(pkg.includes<J::Foo>());
-    EXPECT_TRUE(pkg.includes<S::Foo>());
-}
-
 TEST(KernelPackage, Include_REPLACE)
 {
     namespace J = Jupiter;
@@ -102,7 +175,7 @@ TEST(KernelPackage, Include_REPLACE)
     auto pkg = cv::gapi::kernels<J::Foo, J::Bar>();
     EXPECT_FALSE(pkg.includes<S::Bar>());
 
-    pkg.include<S::Bar>(cv::unite_policy::REPLACE);
+    pkg.include<S::Bar>();
     EXPECT_FALSE(pkg.includes<J::Bar>());
     EXPECT_TRUE(pkg.includes<S::Bar>());
 }
@@ -111,31 +184,27 @@ TEST(KernelPackage, RemoveBackend)
 {
     namespace J = Jupiter;
     namespace S = Saturn;
-    auto pkg = cv::gapi::kernels<J::Foo, J::Bar, S::Foo>();
+    auto pkg = cv::gapi::kernels<J::Foo, J::Bar, S::Baz>();
     EXPECT_TRUE(pkg.includes<J::Foo>());
     EXPECT_TRUE(pkg.includes<J::Bar>());
-    EXPECT_TRUE(pkg.includes<S::Foo>());
 
     pkg.remove(J::backend());
     EXPECT_FALSE(pkg.includes<J::Foo>());
     EXPECT_FALSE(pkg.includes<J::Bar>());
-    EXPECT_TRUE(pkg.includes<S::Foo>());
+    EXPECT_TRUE(pkg.includes<S::Baz>());
 };
 
 TEST(KernelPackage, RemoveAPI)
 {
     namespace J = Jupiter;
     namespace S = Saturn;
-    auto pkg = cv::gapi::kernels<J::Foo, J::Bar, S::Foo, S::Bar>();
+    auto pkg = cv::gapi::kernels<J::Foo, J::Bar>();
     EXPECT_TRUE(pkg.includes<J::Foo>());
     EXPECT_TRUE(pkg.includes<J::Bar>());
-    EXPECT_TRUE(pkg.includes<S::Foo>());
 
     pkg.remove<I::Foo>();
     EXPECT_TRUE(pkg.includes<J::Bar>());
-    EXPECT_TRUE(pkg.includes<S::Bar>());
     EXPECT_FALSE(pkg.includes<J::Foo>());
-    EXPECT_FALSE(pkg.includes<S::Foo>());
 };
 
 TEST(KernelPackage, CreateHetero)
@@ -177,7 +246,7 @@ TEST(KernelPackage, Combine_REPLACE_Full)
     namespace S = Saturn;
     auto j_pkg = cv::gapi::kernels<J::Foo, J::Bar, J::Baz>();
     auto s_pkg = cv::gapi::kernels<S::Foo, S::Bar, S::Baz>();
-    auto u_pkg = cv::gapi::combine(j_pkg, s_pkg, cv::unite_policy::REPLACE);
+    auto u_pkg = cv::gapi::combine(j_pkg, s_pkg);
 
     EXPECT_EQ(3u, u_pkg.size());
     EXPECT_FALSE(u_pkg.includes<J::Foo>());
@@ -194,7 +263,7 @@ TEST(KernelPackage, Combine_REPLACE_Partial)
     namespace S = Saturn;
     auto j_pkg = cv::gapi::kernels<J::Foo, J::Bar>();
     auto s_pkg = cv::gapi::kernels<S::Bar>();
-    auto u_pkg = cv::gapi::combine(j_pkg, s_pkg, cv::unite_policy::REPLACE);
+    auto u_pkg = cv::gapi::combine(j_pkg, s_pkg);
 
     EXPECT_EQ(2u, u_pkg.size());
     EXPECT_TRUE (u_pkg.includes<J::Foo>());
@@ -208,38 +277,7 @@ TEST(KernelPackage, Combine_REPLACE_Append)
     namespace S = Saturn;
     auto j_pkg = cv::gapi::kernels<J::Foo, J::Bar>();
     auto s_pkg = cv::gapi::kernels<S::Qux>();
-    auto u_pkg = cv::gapi::combine(j_pkg, s_pkg, cv::unite_policy::REPLACE);
-
-    EXPECT_EQ(3u, u_pkg.size());
-    EXPECT_TRUE(u_pkg.includes<J::Foo>());
-    EXPECT_TRUE(u_pkg.includes<J::Bar>());
-    EXPECT_TRUE(u_pkg.includes<S::Qux>());
-}
-
-TEST(KernelPackage, Combine_KEEP_AllDups)
-{
-    namespace J = Jupiter;
-    namespace S = Saturn;
-    auto j_pkg = cv::gapi::kernels<J::Foo, J::Bar, J::Baz>();
-    auto s_pkg = cv::gapi::kernels<S::Foo, S::Bar, S::Baz>();
-    auto u_pkg = cv::gapi::combine(j_pkg ,s_pkg, cv::unite_policy::KEEP);
-
-    EXPECT_EQ(6u, u_pkg.size());
-    EXPECT_TRUE(u_pkg.includes<J::Foo>());
-    EXPECT_TRUE(u_pkg.includes<J::Bar>());
-    EXPECT_TRUE(u_pkg.includes<J::Baz>());
-    EXPECT_TRUE(u_pkg.includes<S::Foo>());
-    EXPECT_TRUE(u_pkg.includes<S::Bar>());
-    EXPECT_TRUE(u_pkg.includes<S::Baz>());
-}
-
-TEST(KernelPackage, Combine_KEEP_Append_NoDups)
-{
-    namespace J = Jupiter;
-    namespace S = Saturn;
-    auto j_pkg = cv::gapi::kernels<J::Foo, J::Bar>();
-    auto s_pkg = cv::gapi::kernels<S::Qux>();
-    auto u_pkg = cv::gapi::combine(j_pkg, s_pkg, cv::unite_policy::KEEP);
+    auto u_pkg = cv::gapi::combine(j_pkg, s_pkg);
 
     EXPECT_EQ(3u, u_pkg.size());
     EXPECT_TRUE(u_pkg.includes<J::Foo>());
@@ -252,7 +290,7 @@ TEST(KernelPackage, TestWithEmptyLHS)
     namespace J = Jupiter;
     auto lhs = cv::gapi::kernels<>();
     auto rhs = cv::gapi::kernels<J::Foo>();
-    auto pkg = cv::gapi::combine(lhs, rhs, cv::unite_policy::KEEP);
+    auto pkg = cv::gapi::combine(lhs, rhs);
 
     EXPECT_EQ(1u, pkg.size());
     EXPECT_TRUE(pkg.includes<J::Foo>());
@@ -263,22 +301,211 @@ TEST(KernelPackage, TestWithEmptyRHS)
     namespace J = Jupiter;
     auto lhs = cv::gapi::kernels<J::Foo>();
     auto rhs = cv::gapi::kernels<>();
-    auto pkg = cv::gapi::combine(lhs, rhs, cv::unite_policy::KEEP);
+    auto pkg = cv::gapi::combine(lhs, rhs);
 
     EXPECT_EQ(1u, pkg.size());
     EXPECT_TRUE(pkg.includes<J::Foo>());
 }
 
+TEST(KernelPackage, Return_Unique_Backends)
+{
+    auto pkg = cv::gapi::kernels<cpu::GClone, fluid::BGR2Gray, fluid::GAdd>();
+    EXPECT_EQ(2u, pkg.backends().size());
+}
+
 TEST(KernelPackage, Can_Use_Custom_Kernel)
 {
     cv::GMat in[2];
-    auto out = GClone::on(cv::gapi::add(in[0], in[1]));
+    auto out = I::GClone::on(cv::gapi::add(in[0], in[1]));
     const auto in_meta = cv::GMetaArg(cv::GMatDesc{CV_8U,1,cv::Size(32,32)});
 
-    auto pkg = cv::gapi::kernels<GCloneImpl>();
+    auto pkg = cv::gapi::kernels<cpu::GClone>();
 
     EXPECT_NO_THROW(cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
                         compile({in_meta, in_meta}, cv::compile_args(pkg)));
 }
 
+TEST_F(HeteroGraph, Call_Custom_Kernel_Default_Backend)
+{
+    // in0 -> GCPUAdd -> tmp -> cpu::GClone -> GCPUBGR2Gray -> out
+    //            ^
+    //            |
+    // in1 -------`
+
+    cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC3),
+            in_mat2 = cv::Mat::eye(3, 3, CV_8UC3),
+            out_mat;
+
+    auto pkg = cv::gapi::kernels<cpu::GClone>();
+    cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(pkg));
+
+    EXPECT_TRUE(checkCallKernel(KernelTags::CPU_CUSTOM_CLONE));
+}
+
+TEST_F(HeteroGraph, Call_Custom_Kernel_Not_Default_Backend)
+{
+    // in0 -> GCPUAdd -> tmp -> fluid::GClone -> GCPUBGR2Gray -> out
+    //            ^
+    //            |
+    // in1 -------`
+
+    cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC3),
+            in_mat2 = cv::Mat::eye(3, 3, CV_8UC3),
+            out_mat;
+
+    auto pkg = cv::gapi::kernels<fluid::GClone>();
+    cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(pkg));
+
+    EXPECT_TRUE(checkCallKernel(KernelTags::FLUID_CUSTOM_CLONE));
+}
+
+TEST_F(HeteroGraph, Replace_Default_To_Same_Backend)
+{
+    // in0 -> GCPUAdd -> tmp -> cpu::GClone -> cpu::BGR2Gray -> out
+    //            ^
+    //            |
+    // in1 -------`
+
+    cv::Mat in_mat1 = cv::Mat::eye(3, 3, CV_8UC3),
+            in_mat2 = cv::Mat::eye(3, 3, CV_8UC3),
+            out_mat;
+
+    auto pkg = cv::gapi::kernels<cpu::GClone, cpu::BGR2Gray>();
+    cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(pkg));
+
+    EXPECT_TRUE(checkCallKernel(KernelTags::CPU_CUSTOM_BGR2GRAY));
+}
+
+TEST_F(HeteroGraph, Replace_Default_To_Another_Backend)
+{
+    //in0 -> GCPUAdd -> tmp -> cpu::GClone -> fluid::BGR2Gray -> out
+    //            ^
+    //            |
+    //in1 --------`
+
+    cv::Mat in_mat1(300, 300, CV_8UC3),
+            in_mat2(300, 300, CV_8UC3),
+            out_mat;
+
+    auto pkg = cv::gapi::kernels<cpu::GClone, fluid::BGR2Gray>();
+    cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(pkg));
+
+    EXPECT_TRUE(checkCallKernel(KernelTags::FLUID_CUSTOM_BGR2GRAY));
+}
+
+TEST_F(HeteroGraph, Use_Only_Same_Backend)
+{
+    //in0 -> cpu::GAdd -> tmp -> cpu::GClone -> cpu::BGR2Gray -> out
+    //            ^
+    //            |
+    //in1 --------`
+
+    cv::Mat in_mat1(300, 300, CV_8UC3),
+            in_mat2(300, 300, CV_8UC3),
+        out_mat;
+
+    auto pkg = cv::gapi::kernels<cpu::GAdd, cpu::GClone, cpu::BGR2Gray>();
+    cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(cv::gapi::use_only{pkg}));
+
+    EXPECT_TRUE(checkCallKernel(KernelTags::CPU_CUSTOM_ADD));
+    EXPECT_TRUE(checkCallKernel(KernelTags::CPU_CUSTOM_CLONE));
+    EXPECT_TRUE(checkCallKernel(KernelTags::CPU_CUSTOM_BGR2GRAY));
+}
+
+TEST_F(HeteroGraph, Use_Only_Another_Backend)
+{
+    //in0 -> fluid::GAdd -> tmp -> fluid::GClone -> fluid::BGR2Gray -> out
+    //            ^
+    //            |
+    //in1 --------`
+
+    cv::Mat in_mat1(300, 300, CV_8UC3),
+            in_mat2(300, 300, CV_8UC3),
+        out_mat;
+
+    auto pkg = cv::gapi::kernels<fluid::GAdd, fluid::GClone, fluid::BGR2Gray>();
+    cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(cv::gapi::use_only{pkg}));
+
+    EXPECT_TRUE(checkCallKernel(KernelTags::FLUID_CUSTOM_ADD));
+    EXPECT_TRUE(checkCallKernel(KernelTags::FLUID_CUSTOM_CLONE));
+    EXPECT_TRUE(checkCallKernel(KernelTags::FLUID_CUSTOM_BGR2GRAY));
+}
+
+TEST_F(HeteroGraph, Use_Only_Hetero_Backend)
+{
+    //in0 -> cpu::GAdd -> tmp -> fluid::GClone -> fluid::BGR2Gray -> out
+    //            ^
+    //            |
+    //in1 --------`
+
+    cv::Mat in_mat1(300, 300, CV_8UC3),
+            in_mat2(300, 300, CV_8UC3),
+        out_mat;
+
+    auto pkg = cv::gapi::kernels<cpu::GAdd, fluid::GClone, fluid::BGR2Gray>();
+    cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(cv::gapi::use_only{pkg}));
+
+    EXPECT_TRUE(checkCallKernel(KernelTags::CPU_CUSTOM_ADD));
+    EXPECT_TRUE(checkCallKernel(KernelTags::FLUID_CUSTOM_CLONE));
+    EXPECT_TRUE(checkCallKernel(KernelTags::FLUID_CUSTOM_BGR2GRAY));
+}
+
+TEST_F(HeteroGraph, Use_Only_Not_Found_Default)
+{
+    //in0 -> GCPUAdd -> tmp -> fluid::GClone -> fluid::BGR2Gray -> out
+    //            ^
+    //            |
+    //in1 --------`
+
+    cv::Mat in_mat1(300, 300, CV_8UC3),
+            in_mat2(300, 300, CV_8UC3),
+        out_mat;
+
+    auto pkg = cv::gapi::kernels<fluid::GClone, fluid::BGR2Gray>();
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(cv::gapi::use_only{pkg})));
+}
+
+TEST_F(HeteroGraph, Use_Only_Not_Found_Custom)
+{
+    //in0 -> cpu::GAdd -> tmp -> fluid::GClone -> fluid::BGR2Gray -> out
+    //            ^
+    //            |
+    //in1 --------`
+
+    cv::Mat in_mat1(300, 300, CV_8UC3),
+            in_mat2(300, 300, CV_8UC3),
+        out_mat;
+
+    auto pkg = cv::gapi::kernels<cpu::GAdd, fluid::BGR2Gray>();
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat), cv::compile_args(cv::gapi::use_only{pkg})));
+}
+
+TEST_F(HeteroGraph, Use_Only_Other_Package_Ignored)
+{
+    //in0 -> cpu::GAdd -> tmp -> fluid::GClone -> fluid::BGR2Gray -> out
+    //            ^
+    //            |
+    //in1 --------`
+
+    cv::Mat in_mat1(300, 300, CV_8UC3),
+            in_mat2(300, 300, CV_8UC3),
+        out_mat;
+
+    auto pkg = cv::gapi::kernels<cpu::GAdd, fluid::BGR2Gray>();
+    auto clone_pkg = cv::gapi::kernels<cpu::GClone>();
+
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in[0], in[1]), cv::GOut(out)).
+        apply(cv::gin(in_mat1, in_mat2), cv::gout(out_mat),
+              cv::compile_args(clone_pkg, cv::gapi::use_only{pkg})));
+}
+
 } // namespace opencv_test
index cd876ef..9163281 100644 (file)
@@ -5,7 +5,7 @@
 // Copyright (C) 2018 Intel Corporation
 
 
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
 
 #include "api/gbackend_priv.hpp" // directly instantiate GBackend::Priv
 
index 815aa0d..0bfb4f6 100644 (file)
@@ -298,4 +298,20 @@ TEST(GAPI_Pipeline, PipelineAllocatingKernel)
 
     EXPECT_THROW(comp.apply(in_mat, out_mat, cv::compile_args(pkg)), std::logic_error);
 }
+
+TEST(GAPI_Pipeline, CanUseOwnMatAsOutput)
+{
+    cv::GMat in;
+    cv::GComputation comp(in, cv::gapi::bitwise_not(in));
+
+    cv::Mat in_mat(3, 3, CV_8UC1);
+    cv::Mat out_mat(3, 3, CV_8UC1);
+
+    cv::gapi::own::Mat in_own_mat(in_mat.rows, in_mat.cols, CV_8UC1, in_mat.data);
+    cv::gapi::own::Mat out_own_mat(out_mat.rows, out_mat.cols, CV_8UC1, out_mat.data);
+
+    // FIXME add overload for apply(cv::gapi::own::Mat in, cv::gapi::own::Mat& out)
+    EXPECT_NO_THROW(comp.apply({in_own_mat}, {out_own_mat}));
+}
+
 } // namespace opencv_test
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_transform_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_transform_tests.cpp
new file mode 100644 (file)
index 0000000..c18e930
--- /dev/null
@@ -0,0 +1,189 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#include <tuple>
+
+#include "test_precomp.hpp"
+#include "opencv2/gapi/gtransform.hpp"
+#include "opencv2/gapi/gtype_traits.hpp"
+// explicit include to use GComputation::Priv
+#include "api/gcomputation_priv.hpp"
+
+namespace opencv_test
+{
+
+namespace
+{
+using GMat = cv::GMat;
+using GMat2 = std::tuple<GMat, GMat>;
+using GMat3 = std::tuple<GMat, GMat, GMat>;
+using GScalar = cv::GScalar;
+template <typename T> using GArray = cv::GArray<T>;
+
+GAPI_TRANSFORM(gmat_in_gmat_out, <GMat(GMat)>, "gmat_in_gmat_out")
+{
+    static GMat pattern(GMat) { return {}; }
+    static GMat substitute(GMat) { return {}; }
+};
+
+GAPI_TRANSFORM(gmat2_in_gmat_out, <GMat(GMat, GMat)>, "gmat2_in_gmat_out")
+{
+    static GMat pattern(GMat, GMat) { return {}; }
+    static GMat substitute(GMat, GMat) { return {}; }
+};
+
+GAPI_TRANSFORM(gmat2_in_gmat3_out, <GMat3(GMat, GMat)>, "gmat2_in_gmat3_out")
+{
+    static GMat3 pattern(GMat, GMat) { return {}; }
+    static GMat3 substitute(GMat, GMat) { return {}; }
+};
+
+GAPI_TRANSFORM(gmatp_in_gmatp_out, <GMatP(GMatP)>, "gmatp_in_gmatp_out")
+{
+    static GMatP pattern(GMatP) { return {}; }
+    static GMatP substitute(GMatP) { return {}; }
+};
+
+GAPI_TRANSFORM(gsc_in_gmat_out, <GMat(GScalar)>, "gsc_in_gmat_out")
+{
+    static GMat pattern(GScalar) { return {}; }
+    static GMat substitute(GScalar) { return {}; }
+};
+
+GAPI_TRANSFORM(gmat_in_gsc_out, <GScalar(GMat)>, "gmat_in_gsc_out")
+{
+    static GScalar pattern(GMat) { return {}; }
+    static GScalar substitute(GMat) { return {}; }
+};
+
+GAPI_TRANSFORM(garr_in_gmat_out, <GMat(GArray<int>)>, "garr_in_gmat_out")
+{
+    static GMat pattern(GArray<int>) { return {}; }
+    static GMat substitute(GArray<int>) { return {}; }
+};
+
+GAPI_TRANSFORM(gmat_in_garr_out, <GArray<int>(GMat)>, "gmat_in_garr_out")
+{
+    static GArray<int> pattern(GMat) { return {}; }
+    static GArray<int> substitute(GMat) { return {}; }
+};
+
+GAPI_TRANSFORM(gmat_gsc_garray_in_gmat2_out, <GMat2(GMat, GScalar, GArray<int>)>, "gmat_gsc_garray_in_gmat2_out")
+{
+    static GMat2 pattern(GMat, GScalar, GArray<int>) { return {}; }
+    static GMat2 substitute(GMat, GScalar, GArray<int>) { return {}; }
+};
+
+} // anonymous namespace
+
+TEST(KernelPackageTransform, CreatePackage)
+{
+    auto pkg = cv::gapi::kernels
+        < gmat_in_gmat_out
+        , gmat2_in_gmat_out
+        , gmat2_in_gmat3_out
+        , gmatp_in_gmatp_out
+        , gsc_in_gmat_out
+        , gmat_in_gsc_out
+        , garr_in_gmat_out
+        , gmat_in_garr_out
+        , gmat_gsc_garray_in_gmat2_out
+        >();
+
+    auto tr = pkg.get_transformations();
+    EXPECT_EQ(9u, tr.size());
+}
+
+TEST(KernelPackageTransform, Include)
+{
+    cv::gapi::GKernelPackage pkg;
+    pkg.include<gmat_in_gmat_out>();
+    pkg.include<gmat2_in_gmat_out>();
+    pkg.include<gmat2_in_gmat3_out>();
+    auto tr = pkg.get_transformations();
+    EXPECT_EQ(3u, tr.size());
+}
+
+TEST(KernelPackageTransform, Combine)
+{
+    auto pkg1 = cv::gapi::kernels<gmat_in_gmat_out>();
+    auto pkg2 = cv::gapi::kernels<gmat2_in_gmat_out>();
+    auto pkg_comb = cv::gapi::combine(pkg1, pkg2);
+    auto tr = pkg_comb.get_transformations();
+    EXPECT_EQ(2u, tr.size());
+}
+
+namespace {
+    template <typename T>
+    inline bool ProtoContainsT(const cv::GProtoArg &arg) {
+        return cv::GProtoArg::index_of<T>() == arg.index();
+    }
+} // anonymous namespace
+
+TEST(KernelPackageTransform, gmat_gsc_in_gmat_out)
+{
+    auto tr = gmat_gsc_garray_in_gmat2_out::transformation();
+
+    auto check = [](const cv::GComputation &comp){
+        const auto &p = comp.priv();
+        EXPECT_EQ(3u, p.m_ins.size());
+        EXPECT_EQ(2u, p.m_outs.size());
+
+        EXPECT_TRUE(ProtoContainsT<GMat>(p.m_ins[0]));
+        EXPECT_TRUE(ProtoContainsT<GScalar>(p.m_ins[1]));
+        EXPECT_TRUE(ProtoContainsT<cv::detail::GArrayU>(p.m_ins[2]));
+        EXPECT_TRUE(cv::util::get<cv::detail::GArrayU>(p.m_ins[2]).holds<int>());
+        EXPECT_FALSE(cv::util::get<cv::detail::GArrayU>(p.m_ins[2]).holds<char>());
+
+        EXPECT_TRUE(ProtoContainsT<GMat>(p.m_outs[0]));
+        EXPECT_TRUE(ProtoContainsT<GMat>(p.m_outs[1]));
+    };
+
+    check(tr.pattern());
+    check(tr.substitute());
+}
+
+TEST(KernelPackageTransform, gmat_in_garr_out)
+{
+    auto tr = gmat_in_garr_out::transformation();
+
+    auto check = [](const cv::GComputation &comp){
+        const auto &p = comp.priv();
+        EXPECT_EQ(1u, p.m_ins.size());
+        EXPECT_EQ(1u, p.m_outs.size());
+
+        EXPECT_TRUE(ProtoContainsT<GMat>(p.m_ins[0]));
+
+        EXPECT_TRUE(ProtoContainsT<cv::detail::GArrayU>(p.m_outs[0]));
+        EXPECT_TRUE(cv::util::get<cv::detail::GArrayU>(p.m_outs[0]).holds<int>());
+        EXPECT_FALSE(cv::util::get<cv::detail::GArrayU>(p.m_outs[0]).holds<float>());
+    };
+
+    check(tr.pattern());
+    check(tr.substitute());
+}
+
+TEST(KernelPackageTransform, garr_in_gmat_out)
+{
+    auto tr = garr_in_gmat_out::transformation();
+
+    auto check = [](const cv::GComputation &comp){
+        const auto &p = comp.priv();
+        EXPECT_EQ(1u, p.m_ins.size());
+        EXPECT_EQ(1u, p.m_outs.size());
+
+        EXPECT_TRUE(ProtoContainsT<cv::detail::GArrayU>(p.m_ins[0]));
+        EXPECT_TRUE(cv::util::get<cv::detail::GArrayU>(p.m_ins[0]).holds<int>());
+        EXPECT_FALSE(cv::util::get<cv::detail::GArrayU>(p.m_ins[0]).holds<bool>());
+
+        EXPECT_TRUE(ProtoContainsT<GMat>(p.m_outs[0]));
+    };
+
+    check(tr.pattern());
+    check(tr.substitute());
+}
+
+} // namespace opencv_test
index 5ee3f65..0f67688 100644 (file)
@@ -2,73 +2,68 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 #include "../common/gapi_core_tests.hpp"
 
-#define CORE_GPU cv::gapi::core::gpu::kernels()
+namespace
+{
+#define CORE_GPU [] () { return cv::compile_args(cv::gapi::core::gpu::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
 // FIXME: Wut? See MulTestGPU/MathOpTest below (duplicate?)
 INSTANTIATE_TEST_CASE_P(AddTestGPU, MathOpTest,
-                        Combine(Values(ADD, MUL),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(1.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(false),
-                                Values(cv::compile_args(CORE_GPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values(CORE_GPU),
+                                Values(ADD, MUL),
+                                testing::Bool(),
+                                Values(1.0),
+                                Values(false)));
 
 INSTANTIATE_TEST_CASE_P(MulTestGPU, MathOpTest,
-                        Combine(Values(MUL),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(1.0, 0.5, 2.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(false),
-                                Values(cv::compile_args(CORE_GPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values(CORE_GPU),
+                                Values(MUL),
+                                testing::Bool(),
+                                Values(1.0, 0.5, 2.0),
+                                Values(false)));
 
 INSTANTIATE_TEST_CASE_P(SubTestGPU, MathOpTest,
-                        Combine(Values(SUB),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values (1.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
+                                Values(CORE_GPU),
+                                Values(SUB),
                                 testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values (1.0),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(DivTestGPU, MathOpTest,
-                        Combine(Values(DIV),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values (1.0, 0.5, 2.0),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
+                                Values(CORE_GPU),
+                                Values(DIV),
                                 testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))),
-                        opencv_test::PrintMathOpCoreParams());
+                                Values (1.0, 0.5, 2.0),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(MulTestGPU, MulDoubleTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -76,8 +71,7 @@ INSTANTIATE_TEST_CASE_P(MulTestGPU, MulDoubleTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(DivTestGPU, DivTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -85,8 +79,7 @@ INSTANTIATE_TEST_CASE_P(DivTestGPU, DivTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(DivCTestGPU, DivCTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -94,16 +87,15 @@ INSTANTIATE_TEST_CASE_P(DivCTestGPU, DivCTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(MeanTestGPU, MeanTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-    /*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 //TODO: mask test doesn't work
 #if 0
@@ -112,8 +104,7 @@ INSTANTIATE_TEST_CASE_P(MaskTestGPU, MaskTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU)));
 #endif
 
 INSTANTIATE_TEST_CASE_P(SelectTestGPU, SelectTest,
@@ -121,92 +112,92 @@ INSTANTIATE_TEST_CASE_P(SelectTestGPU, SelectTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(Polar2CartGPU, Polar2CartTest,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_32FC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_32FC1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(Cart2PolarGPU, Cart2PolarTest,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_32FC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_32FC1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(CompareTestGPU, CmpTest,
-                        Combine(Values(CMP_EQ, CMP_GE, CMP_NE, CMP_GT, CMP_LT, CMP_LE),
-                                testing::Bool(),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))),
-                        opencv_test::PrintCmpCoreParams());
+                                Values(CV_8U),
+                                Values(CORE_GPU),
+                                Values(CMP_EQ, CMP_GE, CMP_NE, CMP_GT, CMP_LT, CMP_LE),
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(BitwiseTestGPU, BitwiseTest,
-                        Combine(Values(AND, OR, XOR),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))),
-                        opencv_test::PrintBWCoreParams());
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(AND, OR, XOR)));
 
 INSTANTIATE_TEST_CASE_P(BitwiseNotTestGPU, NotTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
                               Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(MinTestGPU, MinTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(MaxTestGPU, MaxTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(SumTestGPU, SumTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(AbsToleranceScalar(1e-3).to_compare_f()),//TODO: too relaxed?
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(AbsToleranceScalar(1e-3).to_compare_obj())));//TODO: too relaxed?
 
 INSTANTIATE_TEST_CASE_P(AbsDiffTestGPU, AbsDiffTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(AbsDiffCTestGPU, AbsDiffCTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(AddWeightedTestGPU, AddWeightedTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
@@ -214,44 +205,45 @@ INSTANTIATE_TEST_CASE_P(AddWeightedTestGPU, AddWeightedTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(NormTestGPU, NormTest,
-                        Combine(Values(NORM_INF, NORM_L1, NORM_L2),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(AbsToleranceScalar(1e-3).to_compare_f()), //TODO: too relaxed?
-                                Values(cv::compile_args(CORE_GPU))),
-                        opencv_test::PrintNormCoreParams());
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(AbsToleranceScalar(1e-3).to_compare_obj()), //TODO: too relaxed?
+                                Values(NORM_INF, NORM_L1, NORM_L2)));
 
 INSTANTIATE_TEST_CASE_P(IntegralTestGPU, IntegralTest,
                         Combine(Values( CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(ThresholdTestGPU, ThresholdTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC,
+                                    cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)));
 
 INSTANTIATE_TEST_CASE_P(ThresholdTestGPU, ThresholdOTTest,
                         Combine(Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::THRESH_OTSU, cv::THRESH_TRIANGLE),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(cv::THRESH_OTSU, cv::THRESH_TRIANGLE)));
 
 
 INSTANTIATE_TEST_CASE_P(InRangeTestGPU, InRangeTest,
@@ -259,120 +251,155 @@ INSTANTIATE_TEST_CASE_P(InRangeTestGPU, InRangeTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(Split3TestGPU, Split3Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC3),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_8UC1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(Split4TestGPU, Split4Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC4),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_8UC1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(ResizeTestGPU, ResizeTest,
-                        Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(AbsSimilarPoints(2, 0.05).to_compare_obj()),
+                                Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
                                 Values(cv::Size(64,64),
-                                       cv::Size(30,30)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                       cv::Size(30,30))));
 
 INSTANTIATE_TEST_CASE_P(ResizeTestGPU, ResizeTestFxFy,
-                        Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()),
-                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(AbsSimilarPoints(2, 0.05).to_compare_obj()),
+                                Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA),
                                 Values(0.5, 0.1),
-                                Values(0.5, 0.1),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(0.5, 0.1)));
 
 INSTANTIATE_TEST_CASE_P(Merge3TestGPU, Merge3Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_8UC3),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(Merge4TestGPU, Merge4Test,
-                        Combine(Values(cv::Size(1280, 720),
+                        Combine(Values(CV_8UC1),
+                                Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_8UC4),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(RemapTestGPU, RemapTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(FlipTestGPU, FlipTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(0,1,-1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(0,1,-1)));
 
 INSTANTIATE_TEST_CASE_P(CropTestGPU, CropTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(cv::Rect(10, 8, 20, 35), cv::Rect(4, 10, 37, 50)),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(cv::Rect(10, 8, 20, 35), cv::Rect(4, 10, 37, 50))));
 
 INSTANTIATE_TEST_CASE_P(LUTTestGPU, LUTTest,
                         Combine(Values(CV_8UC1, CV_8UC3),
-                                Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_8UC1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(LUTTestCustomGPU, LUTTest,
                         Combine(Values(CV_8UC3),
-                                Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_8UC3),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(ConvertToGPU, ConvertToTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(CV_8U, CV_16U, CV_16S, CV_32F),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CV_8U, CV_16U, CV_16S, CV_32F),
+                                Values(CORE_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(2.5, 1.0, -1.0),
+                                Values(250.0, 0.0, -128.0)));
 
 INSTANTIATE_TEST_CASE_P(ConcatHorTestGPU, ConcatHorTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(ConcatVertTestGPU, ConcatVertTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
+
+INSTANTIATE_TEST_CASE_P(BackendOutputAllocationTestGPU, BackendOutputAllocationTest,
+                        Combine(Values(CV_8UC3, CV_16SC2, CV_32FC1),
+                                Values(cv::Size(50, 50)),
+                                Values(-1),
+                                Values(CORE_GPU)));
+
+// FIXME: there's an issue in OCL backend with matrix reallocation that shouldn't happen
+INSTANTIATE_TEST_CASE_P(DISABLED_BackendOutputAllocationLargeSizeWithCorrectSubmatrixTestGPU,
+                        BackendOutputAllocationLargeSizeWithCorrectSubmatrixTest,
+                        Combine(Values(CV_8UC3, CV_16SC2, CV_32FC1),
+                                Values(cv::Size(50, 50)),
+                                Values(-1),
+                                Values(CORE_GPU)));
+
+INSTANTIATE_TEST_CASE_P(ReInitOutTestGPU, ReInitOutTest,
+                        Combine(Values(CV_8UC3, CV_16SC4, CV_32FC1),
+                                Values(cv::Size(640, 480)),
+                                Values(-1),
+                                Values(CORE_GPU),
+                                Values(cv::Size(640, 400),
+                                       cv::Size(10, 480))));
 
 //TODO: fix this backend to allow ConcatVertVec ConcatHorVec
 #if 0
@@ -381,13 +408,13 @@ INSTANTIATE_TEST_CASE_P(ConcatVertVecTestGPU, ConcatVertVecTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU)));
 
 INSTANTIATE_TEST_CASE_P(ConcatHorVecTestGPU, ConcatHorVecTest,
                         Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU)));
 #endif
 }
index 92e23e8..e745bbe 100644 (file)
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 
 #include "../common/gapi_imgproc_tests.hpp"
 
-#define IMGPROC_GPU cv::gapi::imgproc::gpu::kernels()
+namespace
+{
+#define IMGPROC_GPU [] () { return cv::compile_args(cv::gapi::imgproc::gpu::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
-
 INSTANTIATE_TEST_CASE_P(Filter2DTestGPU, Filter2DTest,
-                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 4, 5, 7),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(cv::BORDER_DEFAULT),
                                 Values(-1, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(IMGPROC_GPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_obj()),
+                                Values(3, 4, 5, 7),
+                                Values(cv::BORDER_DEFAULT)));
 
-INSTANTIATE_TEST_CASE_P(BoxFilterTestGPU, BoxFilterTest,
-                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_f()),
-                                Values(/*CV_8UC1,*/ CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3,5),
+INSTANTIATE_TEST_CASE_P(BoxFilterTestCPU, BoxFilterTest,
+                        Combine(Values(/*CV_8UC1,*/ CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::BORDER_DEFAULT),
                                 Values(-1, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));  //TODO: 8UC1 doesn't work
+                                Values(IMGPROC_GPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_obj()),
+                                Values(3,5),
+                                Values(cv::BORDER_DEFAULT)));  //TODO: 8UC1 doesn't work
+
 
 INSTANTIATE_TEST_CASE_P(SepFilterTestGPU_8U, SepFilterTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3),
-                                Values(3),
+                        Combine(Values(CV_8UC1, CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_16S, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3)));
 
 INSTANTIATE_TEST_CASE_P(SepFilterTestGPU_other, SepFilterTest,
-                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3),
+                        Combine(Values(CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceFilter(1e-4f, 0.01).to_compare_obj()),
+                                Values(3)));
 
 INSTANTIATE_TEST_CASE_P(BlurTestGPU, BlurTest,
-                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3,5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(cv::BORDER_DEFAULT),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_obj()),
+                                Values(3,5),
+                                Values(cv::BORDER_DEFAULT)));
 
 INSTANTIATE_TEST_CASE_P(gaussBlurTestGPU, GaussianBlurTest,
-                        Combine(Values(ToleranceFilter(1e-5f, 0.01).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3),  // FIXIT 5
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceFilter(1e-5f, 0.01).to_compare_obj()),
+                                Values(3)));  // FIXIT 5
 
 INSTANTIATE_TEST_CASE_P(MedianBlurTestGPU, MedianBlurTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5)));
 
 INSTANTIATE_TEST_CASE_P(ErodeTestGPU, ErodeTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(cv::MorphShapes::MORPH_RECT,
                                        cv::MorphShapes::MORPH_CROSS,
-                                       cv::MorphShapes::MORPH_ELLIPSE),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                       cv::MorphShapes::MORPH_ELLIPSE)));
 
 INSTANTIATE_TEST_CASE_P(Erode3x3TestGPU, Erode3x3Test,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(1,2,4),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(1,2,4)));
 
 INSTANTIATE_TEST_CASE_P(DilateTestGPU, DilateTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(3, 5),
                                 Values(cv::MorphShapes::MORPH_RECT,
                                        cv::MorphShapes::MORPH_CROSS,
-                                       cv::MorphShapes::MORPH_ELLIPSE),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                       cv::MorphShapes::MORPH_ELLIPSE)));
 
 INSTANTIATE_TEST_CASE_P(Dilate3x3TestGPU, Dilate3x3Test,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(1,2,4),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values(1,2,4)));
 
 INSTANTIATE_TEST_CASE_P(SobelTestGPU, SobelTest,
-                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
-                                Values(3, 5),
+                        Combine(Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(-1, CV_16S, CV_32F),
+                                Values(IMGPROC_GPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_obj()),
+                                Values(3, 5),
                                 Values(0, 1),
-                                Values(1, 2),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(1, 2)));
 
 INSTANTIATE_TEST_CASE_P(SobelTestGPU32F, SobelTest,
-                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_f()),
-                                Values(CV_32FC1),
-                                Values(3, 5),
+                        Combine(Values(CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
                                 Values(CV_32F),
+                                Values(IMGPROC_GPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_obj()),
+                                Values(3, 5),
                                 Values(0, 1),
-                                Values(1, 2),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(1, 2)));
 
 INSTANTIATE_TEST_CASE_P(EqHistTestGPU, EqHistTest,
-                        Combine(Values(AbsExact().to_compare_f()),  // FIXIT Non reliable check
+                        Combine(Values(CV_8UC1),
                                 Values(cv::Size(1280, 720),
-                                cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                       cv::Size(640, 480)),
+                                Values(-1),
+                                Values(IMGPROC_GPU),
+                                Values(AbsExact().to_compare_obj())));  // FIXIT Non reliable check
 
 INSTANTIATE_TEST_CASE_P(CannyTestGPU, CannyTest,
-                        Combine(Values(AbsSimilarPoints(0, 0.05).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3),
+                        Combine(Values(CV_8UC1, CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_GPU),
+                                Values(AbsSimilarPoints(0, 0.05).to_compare_obj()),
                                 Values(3.0, 120.0),
                                 Values(125.0, 240.0),
                                 Values(3, 5),
-                                testing::Bool(),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                testing::Bool()));
 
 INSTANTIATE_TEST_CASE_P(RGB2GrayTestGPU, RGB2GrayTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
-                                cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                       cv::Size(640, 480)),
+                                Values(CV_8UC1),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(BGR2GrayTestGPU, BGR2GrayTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(CV_8UC1),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(RGB2YUVTestGPU, RGB2YUVTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(YUV2RGBTestGPU, YUV2RGBTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(RGB2LabTestGPU, RGB2LabTest,
-                        Combine(Values(AbsSimilarPoints(1, 0.05).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_GPU),
+                                Values(AbsSimilarPoints(1, 0.05).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(BGR2LUVTestGPU, BGR2LUVTest,
-                        Combine(Values(ToleranceColor(5e-3, 6).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(5e-3, 6).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(LUV2BGRTestGPU, LUV2BGRTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(BGR2YUVTestGPU, BGR2YUVTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
+                                Values(CV_8UC3),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 INSTANTIATE_TEST_CASE_P(YUV2BGRTestGPU, YUV2BGRTest,
-                        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+                        Combine(Values(CV_8UC3),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(IMGPROC_GPU))));
-
+                                Values(CV_8UC3),
+                                Values(IMGPROC_GPU),
+                                Values(ToleranceColor(1e-3).to_compare_obj())));
 
 } // opencv_test
index 73b1c78..a939d32 100644 (file)
@@ -8,64 +8,62 @@
 #include "../test_precomp.hpp"
 #include "../common/gapi_operators_tests.hpp"
 
-#define CORE_GPU cv::gapi::core::gpu::kernels()
+namespace
+{
+#define CORE_GPU [] () { return cv::compile_args(cv::gapi::core::gpu::kernels()); }
+}  // anonymous namespace
 
 namespace opencv_test
 {
 
-
 INSTANTIATE_TEST_CASE_P(MathOperatorTestGPU, MathOperatorMatMatTest,
-                    Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_f()),
-                            Values( opPlusM, opMinusM, opDivM,
-                                    opGreater, opLess, opGreaterEq, opLessEq, opEq, opNotEq),
-                            Values(CV_8UC1, CV_16SC1, CV_32FC1),
+                    Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
                             Values(cv::Size(1280, 720),
-                               cv::Size(640, 480),
-                               cv::Size(128, 128)),
+                                   cv::Size(640, 480),
+                                   cv::Size(128, 128)),
                             Values(-1, CV_8U, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                            Values(cv::compile_args(CORE_GPU))));
+                            Values(CORE_GPU),
+                            Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_obj()),
+                            Values( opPlusM, opMinusM, opDivM,
+                                    opGreater, opLess, opGreaterEq, opLessEq, opEq, opNotEq)));
 
 INSTANTIATE_TEST_CASE_P(MathOperatorTestGPU, MathOperatorMatScalarTest,
-                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_f()),
-                                Values( opPlus, opPlusR, opMinus, opMinusR, opMul, opMulR,  // FIXIT avoid division by values near zero: opDiv, opDivR,
-                                        opGT, opLT, opGE, opLE, opEQ, opNE,
-                                        opGTR, opLTR, opGER, opLER, opEQR, opNER),
-                                Values(CV_8UC1, CV_16SC1, CV_32FC1),
+                        Combine(Values(CV_8UC1, CV_16SC1, CV_32FC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU),
+                                Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_obj()),
+                                Values( opPlus, opPlusR, opMinus, opMinusR, opMul, opMulR,  // FIXIT avoid division by values near zero: opDiv, opDivR,
+                                        opGT, opLT, opGE, opLE, opEQ, opNE,
+                                        opGTR, opLTR, opGER, opLER, opEQR, opNER)));
 
 INSTANTIATE_TEST_CASE_P(BitwiseOperatorTestGPU, MathOperatorMatMatTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opAnd, opOr, opXor ),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
-                                   cv::Size(640, 480),
-                                   cv::Size(128, 128)),
+                                       cv::Size(640, 480),
+                                       cv::Size(128, 128)),
                                 Values(-1),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opAnd, opOr, opXor )));
 
 INSTANTIATE_TEST_CASE_P(BitwiseOperatorTestGPU, MathOperatorMatScalarTest,
-                        Combine(Values(AbsExact().to_compare_f()),
-                                Values( opAND, opOR, opXOR, opANDR, opORR, opXORR ),
-                                Values(CV_8UC1, CV_16UC1, CV_16SC1),
+                        Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values(-1),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(CORE_GPU),
+                                Values(AbsExact().to_compare_obj()),
+                                Values( opAND, opOR, opXOR, opANDR, opORR, opXORR )));
 
 INSTANTIATE_TEST_CASE_P(BitwiseNotOperatorTestGPU, NotOperatorTest,
                         Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-/*init output matrices or not*/ testing::Bool(),
-                                Values(cv::compile_args(CORE_GPU))));
+                                Values(-1),
+                                Values(CORE_GPU)));
 }
index f5de114..e166b87 100644 (file)
@@ -9,7 +9,7 @@
 
 #include <ade/util/zip_range.hpp>   // util::indexed
 
-#include "opencv2/gapi/gkernel.hpp"
+#include <opencv2/gapi/gkernel.hpp>
 #include "compiler/gmodelbuilder.hpp"
 #include "compiler/gmodel.hpp" // RcDesc, GModel::init
 
index 833ea17..c433025 100644 (file)
@@ -8,9 +8,9 @@
 #include "../test_precomp.hpp"
 #include "api/gcomputation_priv.hpp"
 
-#include "opencv2/gapi/fluid/gfluidkernel.hpp"
-#include "opencv2/gapi/fluid/core.hpp"
-#include "opencv2/gapi/fluid/imgproc.hpp"
+#include <opencv2/gapi/fluid/gfluidkernel.hpp>
+#include <opencv2/gapi/fluid/core.hpp>
+#include <opencv2/gapi/fluid/imgproc.hpp>
 
 namespace opencv_test
 {
@@ -197,8 +197,7 @@ TEST(GComputationCompile, ReshapeRois)
     cv::randn(first_in_mat, cv::Scalar::all(127), cv::Scalar::all(40.f));
     cv::Mat first_out_mat;
     auto fluidKernels = cv::gapi::combine(gapi::imgproc::fluid::kernels(),
-                                          gapi::core::fluid::kernels(),
-                                          cv::unite_policy::REPLACE);
+                                          gapi::core::fluid::kernels());
     cc.apply(first_in_mat, first_out_mat, cv::compile_args(fluidKernels));
     auto first_comp = cc.priv().m_lastCompiled;
 
index 1164165..e71985f 100644 (file)
@@ -4,9 +4,9 @@
 //
 // Copyright (C) 2018 Intel Corporation
 
-#include "opencv2/core/ocl.hpp"
-#include "opencv2/core/ocl_genbase.hpp"
-#include "opencv2/core/opencl/ocl_defs.hpp"
+#include <opencv2/core/ocl.hpp>
+#include <opencv2/core/ocl_genbase.hpp>
+#include <opencv2/core/opencl/ocl_defs.hpp>
 
 #ifdef HAVE_OPENCL
 const char* opencl_symm7x7_src =
index a6453c6..42245a5 100644 (file)
@@ -6,7 +6,7 @@
 
 
 #include "../test_precomp.hpp"
-#include "opencv2/gapi/own/mat.hpp"
+#include <opencv2/gapi/own/mat.hpp>
 #include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
 
 namespace opencv_test
index 0ee626c..09fec67 100644 (file)
@@ -6,7 +6,7 @@
 
 
 #include "../test_precomp.hpp"
-#include "opencv2/gapi/own/scalar.hpp"
+#include <opencv2/gapi/own/scalar.hpp>
 
 namespace opencv_test
 {
index 9fb0e4d..a2f8f3f 100644 (file)
 #include <cstdint>
 #include <vector>
 
-#include "opencv2/ts.hpp"
-#include "opencv2/gapi.hpp"
-#include "opencv2/gapi/imgproc.hpp"
-#include "opencv2/gapi/core.hpp"
-#include "opencv2/gapi/cpu/gcpukernel.hpp"
-#include "opencv2/gapi/gpu/ggpukernel.hpp"
-#include "opencv2/gapi/gpu/imgproc.hpp"
-#include "opencv2/gapi/gpu/core.hpp"
-#include "opencv2/gapi/gcompoundkernel.hpp"
-#include "opencv2/gapi/operators.hpp"
-#include "opencv2/gapi/fluid/imgproc.hpp"
-#include "opencv2/gapi/fluid/core.hpp"
+#include <opencv2/ts.hpp>
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <opencv2/gapi/gpu/ggpukernel.hpp>
+#include <opencv2/gapi/gpu/imgproc.hpp>
+#include <opencv2/gapi/gpu/core.hpp>
+#include <opencv2/gapi/gcompoundkernel.hpp>
+#include <opencv2/gapi/operators.hpp>
+#include <opencv2/gapi/fluid/imgproc.hpp>
+#include <opencv2/gapi/fluid/core.hpp>
 
 #endif // __OPENCV_GAPI_TEST_PRECOMP_HPP__
index 1c6c9cc..9d3e9c9 100644 (file)
@@ -6,7 +6,7 @@
 
 
 #include "../test_precomp.hpp"
-#include "opencv2/gapi/util/any.hpp"
+#include <opencv2/gapi/util/any.hpp>
 
 namespace opencv_test
 {
index 7b6cdb1..7dde9fc 100644 (file)
@@ -6,7 +6,7 @@
 
 
 #include "../test_precomp.hpp"
-#include "opencv2/gapi/util/optional.hpp"
+#include <opencv2/gapi/util/optional.hpp>
 #include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
 
 namespace opencv_test
index 328afe7..bdeea94 100644 (file)
@@ -6,7 +6,7 @@
 
 
 #include "../test_precomp.hpp"
-#include "opencv2/gapi/util/variant.hpp"
+#include <opencv2/gapi/util/variant.hpp>
 #include <cstddef> //std::max_align_t
 
 namespace opencv_test
index 03d5840..069981e 100644 (file)
@@ -649,6 +649,9 @@ mkldnn_status_t MKLDNN_API mkldnn_memory_get_data_handle(
 mkldnn_status_t MKLDNN_API mkldnn_memory_set_data_handle(
         mkldnn_primitive_t memory, void *handle);
 
+mkldnn_status_t MKLDNN_API mkldnn_memory_set_data_handle_no_pads_proc(
+        mkldnn_primitive_t memory, void *handle);
+
 /** @} */
 
 /** @addtogroup c_api_reorder Reorder
index 07a4b04..41dcb27 100644 (file)
@@ -965,6 +965,11 @@ struct memory: public primitive  {
                 "could not set native handle");
     }
 
+    inline void set_data_handle_no_pads_proc(void *handle) const {
+        error::wrap_c_api(mkldnn_memory_set_data_handle_no_pads_proc(get(), handle),
+                          "could not set native handle");
+    }
+
     // Must go away or be private:
     static mkldnn_data_type_t convert_to_c(data_type adata_type) {
         return static_cast<mkldnn_data_type_t>(adata_type);
index 4c6656b..fe6fdc0 100644 (file)
@@ -163,7 +163,13 @@ status_t mkldnn_memory_get_data_handle(const primitive_t *memory,
 status_t mkldnn_memory_set_data_handle(primitive_t *memory, void *handle) {
     if (any_null(memory) || memory->kind() != primitive_kind::memory)
         return invalid_arguments;
-    return memory->set_data_handle(handle);
+    return memory->set_data_handle(handle, true);
+}
+
+status_t mkldnn_memory_set_data_handle_no_pads_proc(primitive_t *memory, void *handle) {
+    if (any_null(memory) || memory->kind() != primitive_kind::memory)
+        return invalid_arguments;
+    return memory->set_data_handle(handle, false);
 }
 
 status_t mkldnn_concat_primitive_desc_create_v2(primitive_desc_t **concat_pd,
index e91a627..1568c6b 100644 (file)
@@ -88,8 +88,9 @@ struct mkldnn_primitive: public mkldnn::impl::c_compatible {
         return mkldnn::impl::status::invalid_arguments;
     }
     /** sets data handle. Applicable for memory primitives only. */
-    virtual mkldnn::impl::status_t set_data_handle(void *handle) {
+    virtual mkldnn::impl::status_t set_data_handle(void *handle, bool pads_zeroing) {
         UNUSED(handle);
+        UNUSED(pads_zeroing);
         assert(this->kind() == mkldnn::impl::primitive_kind::memory);
         return mkldnn::impl::status::invalid_arguments;
     }
index 4ac2f87..7db5ddc 100644 (file)
@@ -49,6 +49,7 @@ typedef enum {
     avx512_mic,
     avx512_mic_4ops,
     avx512_core_bf16,
+    avx512_vpopcnt,
 } cpu_isa_t;
 
 template <cpu_isa_t> struct cpu_isa_traits {}; /* ::vlen -> 32 (for avx2) */
@@ -129,6 +130,9 @@ static inline bool mayiuse(const cpu_isa_t cpu_isa) {
         return true
             && mayiuse(avx512_core_vnni)
             && cpu.has(Cpu::tAVX512_BF);
+    case avx512_vpopcnt:
+        return true
+            && cpu.has(Cpu::tAVX512_VPOPCNTDQ);
     case isa_any:
         return true;
     }
index 830adcc..02ba03e 100644 (file)
@@ -61,9 +61,9 @@ struct cpu_memory_t: public cpu_primitive_t {
         *handle = static_cast<void *>(data_);
         return success;
     }
-    virtual mkldnn::impl::status_t set_data_handle(void *handle) {
+    virtual mkldnn::impl::status_t set_data_handle(void *handle, bool pads_zeroing) {
         data_ = static_cast<char *>(handle);
-        return zero_pad();
+        return pads_zeroing ? zero_pad() : success;
     }
 
     virtual char *memory(size_t output_index = 0) const
index 189bd11..04bca4d 100644 (file)
@@ -174,35 +174,41 @@ void jit_uni_bin_conv_fwd_kernel<isa>::apply_filter(int ur_w, int pad_l, int pad
                         if (jcp.ic_padded != jcp.ic && last_icb && ifm2 == (ic_blocks - 1))
                             uni_vandps(vmm_tmp, vmm_tmp, ptr[reg_table + 7 * vlen]);
 
-                        if (isa == sse42) {
-                            movups(vmm_tmp1, vmm_tmp);
-                            pand(vmm_tmp1, vmm_mask);
+                        if (mayiuse(avx512_vpopcnt)) {
+                            vpopcntd(vmm_tmp, vmm_tmp);
+                            uni_vpaddd(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj),
+                                       Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj), vmm_tmp);
                         } else {
-                            uni_vandps(vmm_tmp1, vmm_mask, vmm_tmp);
-                        }
+                            if (isa == sse42) {
+                                movups(vmm_tmp1, vmm_tmp);
+                                pand(vmm_tmp1, vmm_mask);
+                            } else {
+                                uni_vandps(vmm_tmp1, vmm_mask, vmm_tmp);
+                            }
 
-                        uni_vpsrld(vmm_tmp, vmm_tmp, 4);
-                        uni_vandps(vmm_tmp, vmm_tmp, vmm_mask);
+                            uni_vpsrld(vmm_tmp, vmm_tmp, 4);
+                            uni_vandps(vmm_tmp, vmm_tmp, vmm_mask);
 
-                        if (isa == sse42) {
-                            movups(vmm_tmp2, vmm_lookup);
-                            pshufb(vmm_tmp2, vmm_tmp);
-                            movups(vmm_tmp, vmm_lookup);
-                            pshufb(vmm_tmp, vmm_tmp1);
-                            paddb(vmm_tmp, vmm_tmp2);
-                        } else {
-                            uni_vpshufb(vmm_tmp, vmm_lookup, vmm_tmp);
-                            uni_vpshufb(vmm_tmp1, vmm_lookup, vmm_tmp1);
-                            uni_vpaddb(vmm_tmp, vmm_tmp, vmm_tmp1);
-                        }
+                            if (isa == sse42) {
+                                movups(vmm_tmp2, vmm_lookup);
+                                pshufb(vmm_tmp2, vmm_tmp);
+                                movups(vmm_tmp, vmm_lookup);
+                                pshufb(vmm_tmp, vmm_tmp1);
+                                paddb(vmm_tmp, vmm_tmp2);
+                            } else {
+                                uni_vpshufb(vmm_tmp, vmm_lookup, vmm_tmp);
+                                uni_vpshufb(vmm_tmp1, vmm_lookup, vmm_tmp1);
+                                uni_vpaddb(vmm_tmp, vmm_tmp, vmm_tmp1);
+                            }
 
-                        if (mayiuse(avx512_core_vnni)) {
-                            vpdpbusd(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj), vmm_tmp, vmm_one_u8);
-                        } else {
-                            uni_vpmaddubsw(vmm_tmp, vmm_tmp, vmm_one_u8);
-                            uni_vpmaddwd(vmm_tmp, vmm_tmp, vmm_one_s16);
-                            uni_vpaddd(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj),
-                                       Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj), vmm_tmp);
+                            if (mayiuse(avx512_core_vnni)) {
+                                vpdpbusd(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj), vmm_tmp, vmm_one_u8);
+                            } else {
+                                uni_vpmaddubsw(vmm_tmp, vmm_tmp, vmm_one_u8);
+                                uni_vpmaddwd(vmm_tmp, vmm_tmp, vmm_one_s16);
+                                uni_vpaddd(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj),
+                                           Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj), vmm_tmp);
+                            }
                         }
                     }
                 }
index 4e95474..ec99c62 100644 (file)
@@ -74,7 +74,9 @@ void ref_depthwise_fwd_t<data_type>::execute_forward() const {
 
     parallel_nd(MB, C, D, H, W,
         [&](int n, int c, int d, int h, int w) {
-        size_t data_off = data_d.ndims() == 4
+        size_t data_off = data_d.ndims() == 3
+                        ? data_d.off(n, c, d)
+                        : data_d.ndims() == 4
                         ? data_d.off(n, c, h, w)
                         : data_d.ndims() == 5
                             ? data_d.off(n, c, d, h, w)
index 7c15e3b..6821e5a 100644 (file)
@@ -3,12 +3,11 @@
 #
 
 if (ENABLE_MYRIAD)
-    add_subdirectory(
-        "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/XLink"
-        "${CMAKE_BINARY_DIR}/thirdparty/movidius/XLink")
+    set(XLINK_DIR "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/XLink" CACHE PATH "path to Xlink")
+    add_subdirectory("${XLINK_DIR}" "${CMAKE_BINARY_DIR}/thirdparty/movidius/XLink")
 
     add_subdirectory(
         "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/mvnc"
         "${CMAKE_BINARY_DIR}/thirdparty/movidius/mvnc")
+
 endif()
-    
\ No newline at end of file
diff --git a/inference-engine/thirdparty/movidius/WinPthread/pthread_semaphore.c b/inference-engine/thirdparty/movidius/WinPthread/pthread_semaphore.c
new file mode 100644 (file)
index 0000000..06d6326
--- /dev/null
@@ -0,0 +1,265 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "pthread_semaphore.h"
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <limits.h>
+
+#ifndef SEM_VALUE_MAX
+# define SEM_VALUE_MAX INT_MAX
+#endif
+
+struct pthread_sem_private_t {
+    pthread_mutex_t   access;
+    pthread_cond_t    conditional;
+    volatile int counter; // >= 0 no waiters, == -1 some waiters
+};
+
+int pthread_sem_init(pthread_sem_t *psem, int pshared, unsigned int value) {
+    int result = 0;
+    if (NULL == psem) {
+        errno = EINVAL;
+        return -1;
+    }
+    if (value > SEM_VALUE_MAX){
+        errno = EINVAL;
+        return -1;
+    }
+    if (pshared != 0) {
+        errno = ENOSYS;
+        return -1;
+    }
+    struct pthread_sem_private_t *psem_private = malloc(sizeof(struct pthread_sem_private_t));
+    if (NULL == psem_private) {
+        return -1;
+    }
+
+    result = pthread_mutex_init(&psem_private->access, NULL);
+    if (result) {
+        free(psem_private);
+        errno = result;
+        return -1;
+    }
+
+    result = pthread_cond_init(&psem_private->conditional, NULL);
+    if (result) {
+        pthread_mutex_destroy(&psem_private->access);
+        free(psem_private);
+        errno = result;
+        return -1;
+    }
+
+    psem_private->counter = value;
+
+    *psem = (pthread_sem_t)psem_private;
+    errno = 0;
+    return 0;
+}
+
+int pthread_sem_destroy(pthread_sem_t *psem) {
+    int result = 0;
+
+    if (NULL == psem) {
+        errno = EINVAL;
+        return -1;
+    }
+    if (0 == *psem) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    struct pthread_sem_private_t *psem_private = (struct pthread_sem_private_t *)*psem;
+
+    result = pthread_mutex_lock(&psem_private->access);
+    if (result) {
+        errno = result;
+        return -1;
+    }
+
+    if (psem_private->counter == -1) {
+        pthread_mutex_unlock(&psem_private->access);
+        errno = EBUSY;
+        return -1;
+    }
+
+    // conditional variable might not be deleted due to wait queue - lets notify users
+    result = pthread_cond_destroy(&psem_private->conditional);
+    if (result) {
+        pthread_mutex_unlock(&psem_private->access);
+        errno = result;
+        return -1;
+    }
+
+    result = pthread_mutex_unlock(&psem_private->access);
+    if (result) {
+        errno = result;
+        return -1;
+    }
+
+    // UB - untested if mutex object corrupted
+    result = pthread_mutex_destroy(&psem_private->access);
+    if (result) {
+        errno = result;
+        return -1;
+    }
+
+    free(psem_private);
+    *psem = 0;
+
+    errno = 0;
+    return 0;
+}
+static int pthread_sem_post_signal_or_broadcast(pthread_sem_t *psem, int broadcast) {
+    int result;
+    if (NULL == psem) {
+        errno = EINVAL;
+        return -1;
+    }
+    if (0 == *psem) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    struct pthread_sem_private_t *psem_private = (struct pthread_sem_private_t *)*psem;
+    result = pthread_mutex_lock(&psem_private->access);
+    if (result) {
+        errno = result;
+        return -1;
+    }
+
+    // right now value == 0 not usually means that there is a waiter queue
+    if (broadcast) {
+        result = pthread_cond_broadcast(&psem_private->conditional);
+    } else {
+        result = pthread_cond_signal(&psem_private->conditional);
+    }
+    if (result) {
+        pthread_mutex_unlock(&psem_private->access);
+        errno = result;
+        return -1;
+    }
+
+    // up counter
+    if (psem_private->counter == INT_MAX) {
+        pthread_mutex_unlock(&psem_private->access);
+        errno = EOVERFLOW;
+        return -1;
+    }
+    if (psem_private->counter == -1) {
+        psem_private->counter = 1;
+    } else {
+        psem_private->counter ++;
+    }
+
+    result = pthread_mutex_unlock(&psem_private->access);
+    if (result) {
+        errno = result;
+        return -1;
+    }
+
+    errno = 0;
+    return 0;
+}
+
+int pthread_sem_post_broadcast(pthread_sem_t *psem) {
+    return pthread_sem_post_signal_or_broadcast(psem, 1);
+}
+
+int pthread_sem_post(pthread_sem_t *psem) {
+    return pthread_sem_post_signal_or_broadcast(psem, 0);
+}
+
+static int pthread_sem_timed_or_blocked_wait(pthread_sem_t *psem, const struct timespec *abstime) {
+    int result = 0;
+    if (NULL == psem) {
+        errno = EINVAL;
+        return -1;
+    }
+    if (0 == *psem) {
+        errno = EINVAL;
+        return -1;
+    }
+    struct pthread_sem_private_t *psem_private = (struct pthread_sem_private_t *)*psem;
+    result = pthread_mutex_lock(&psem_private->access);
+    if (result) {
+        errno = result;
+        return -1;
+    }
+
+    for (;psem_private->counter < 1;) {
+        // indicate that we will be waiting this counter
+        psem_private->counter = -1;
+        if (abstime == NULL) {
+            result = pthread_cond_wait(&psem_private->conditional, &psem_private->access);
+        } else {
+            result = pthread_cond_timedwait(&psem_private->conditional, &psem_private->access, abstime);
+        }
+        if (result != 0) {
+            break;
+        }
+    }
+
+    // printf("cond_wait=%d\n", result);
+    if (result) {
+        // sema not obtained - resetting counter back
+        if (psem_private->counter == -1) {
+            psem_private->counter = 0;
+        }
+        pthread_mutex_unlock(&psem_private->access);
+        errno = result;
+        return -1;
+    }
+
+    // acquire semaphore
+    psem_private->counter --;
+
+    result = pthread_mutex_unlock(&psem_private->access);
+    if (result) {
+        errno = result;
+        return -1;
+    }
+
+    errno = 0;
+    return 0;
+}
+
+int pthread_sem_wait(pthread_sem_t *psem) {
+    return pthread_sem_timed_or_blocked_wait(psem, NULL);
+}
+
+int pthread_sem_timedwait(pthread_sem_t *psem, const struct timespec *abstime) {
+    if (NULL == abstime) {
+        errno = EINVAL;
+        return -1;
+    }
+    if (abstime->tv_sec < 0 || abstime->tv_nsec < 0) {
+        errno = EINVAL;
+        return -1;
+    }
+    return pthread_sem_timed_or_blocked_wait(psem, abstime);
+}
+
+
+# ifdef __APPLE__
+
+int sem_init(sem_t *psem, int pshared, unsigned int value) {
+    return pthread_sem_init(psem, pshared, value);
+}
+int sem_destroy(sem_t *psem) {
+    return pthread_sem_destroy(psem);
+}
+int sem_post(sem_t *psem) {
+    return pthread_sem_post(psem);
+}
+int sem_wait(sem_t *psem) {
+    return pthread_sem_wait(psem);
+}
+int sem_timedwait(sem_t *psem, const struct timespec *abstime) {
+    return pthread_sem_timedwait(psem, abstime);
+}
+
+#endif
diff --git a/inference-engine/thirdparty/movidius/WinPthread/pthread_semaphore.h b/inference-engine/thirdparty/movidius/WinPthread/pthread_semaphore.h
new file mode 100644 (file)
index 0000000..7cde510
--- /dev/null
@@ -0,0 +1,48 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef PTHREAD_SEMAPHORE_H
+#define PTHREAD_SEMAPHORE_H
+
+# include <time.h>
+# include <stdint.h>
+typedef intptr_t pthread_sem_t;
+
+# ifdef __cplusplus
+extern "C" {
+# endif
+int pthread_sem_init(pthread_sem_t *psem, int pshared, unsigned int value);
+int pthread_sem_destroy(pthread_sem_t *psem);
+int pthread_sem_post(pthread_sem_t *psem);
+int pthread_sem_post_broadcast(pthread_sem_t *psem);
+int pthread_sem_wait(pthread_sem_t *psem);
+int pthread_sem_timedwait(pthread_sem_t *psem, const struct timespec *abstime);
+# ifdef __cplusplus
+}
+# endif
+
+# ifdef __APPLE__
+
+typedef pthread_sem_t sem_t;
+
+#  ifdef __cplusplus
+extern "C" {
+#  endif
+
+int sem_init(sem_t *psem, int pshared, unsigned int value);
+int sem_destroy(sem_t *psem);
+int sem_post(sem_t *psem);
+int sem_wait(sem_t *psem);
+int sem_timedwait(sem_t *psem, const struct timespec *abstime);
+
+#  ifdef __cplusplus
+}
+#  endif
+
+# elif defined(_WIN32)
+#  error "pthread based semaphores not implemented for WIN32"
+# else
+#  include <semaphore.h>
+# endif  // linux case
+#endif  // PTHREAD_SEMAPHORE_H
index a28791d..658081f 100644 (file)
@@ -4,29 +4,22 @@
 
 set(TARGET_NAME "XLink")
 
-if(NOT WIN32)
-    find_package(Threads REQUIRED)
-
-    find_path(LIBUSB_INCLUDE_DIR NAMES libusb.h PATH_SUFFIXES "include" "libusb" "libusb-1.0")
-    find_library(LIBUSB_LIBRARY NAMES usb-1.0 PATH_SUFFIXES "lib")
-
-    if(NOT LIBUSB_INCLUDE_DIR OR NOT LIBUSB_LIBRARY)
-        message(FATAL_ERROR "libusb is required")
-    endif()
-endif()
-
-file(GLOB_RECURSE SOURCES *.c *.h)
-file(GLOB_RECURSE SHARED "../shared/*")
+include(XLink.cmake)
 
-# FIXME: WIN_PTHREAD also should be built as a library
+# Windows threads sources
 if(WIN32)
     file(GLOB USB_WIN_SOURCES "../USB_WIN/*")
-    file(GLOB WIN_PTHREAD_SOURCES "../WinPthread/*")
-    list(APPEND SOURCES ${USB_WIN_SOURCES} ${WIN_PTHREAD_SOURCES})
+    set(WIN_PTHREAD_SOURCES
+            "${CMAKE_CURRENT_SOURCE_DIR}/../WinPthread/win_semaphore.c"
+            "${CMAKE_CURRENT_SOURCE_DIR}/../WinPthread/win_pthread.c")
+    list(APPEND XLINK_SOURCES ${USB_WIN_SOURCES} ${WIN_PTHREAD_SOURCES})
+else()
+    list(APPEND XLINK_SOURCES "../WinPthread/pthread_semaphore.c")
 endif()
 
-add_library(${TARGET_NAME} STATIC ${SOURCES} ${SHARED})
+add_library(${TARGET_NAME} STATIC ${XLINK_SOURCES})
 
+# Threads and usb include
 if(WIN32)
     target_include_directories(${TARGET_NAME}
             PRIVATE
@@ -40,36 +33,30 @@ endif()
 
 target_include_directories(${TARGET_NAME}
         PUBLIC
-            "shared"
-            "../shared/include"
-            "pc")
+        ${XLINK_INCLUDE}
+        "../WinPthread")
 
 if(NOT WIN32)
     target_link_libraries(${TARGET_NAME}
             PUBLIC
-                Threads::Threads
-                ${LIBUSB_LIBRARY})
+            Threads::Threads
+            ${LIBUSB_LIBRARY})
 endif()
 
 target_compile_definitions(${TARGET_NAME}
         PRIVATE
-            __PC__
-            HAVE_STRUCT_TIMESPEC
-            _CRT_SECURE_NO_WARNINGS
+        __PC__
+        HAVE_STRUCT_TIMESPEC
+        _CRT_SECURE_NO_WARNINGS
+        USE_USB_VSC
         )
 
 if (ENABLE_MYRIAD_NO_BOOT)
     target_compile_definitions(${TARGET_NAME}
-        PRIVATE
-            NO_BOOT
-            USE_USB_VSC)
-else()
-    target_compile_definitions(${TARGET_NAME}
-        PRIVATE
-            USE_USB_VSC)
+            PRIVATE
+            NO_BOOT)
 endif()
 
+add_dependencies(${TARGET_NAME} vpu_copy_firmware)
 
-if(ENABLE_TESTS)
-    add_subdirectory(tests)
-endif()
\ No newline at end of file
+set_property(TARGET ${TARGET_NAME} PROPERTY C_STANDARD 99)
diff --git a/inference-engine/thirdparty/movidius/XLink/XLink.cmake b/inference-engine/thirdparty/movidius/XLink/XLink.cmake
new file mode 100644 (file)
index 0000000..2cd1380
--- /dev/null
@@ -0,0 +1,36 @@
+if(EXISTS "$ENV{MV_COMMON_BASE}")
+    set(MV_COMMON_BASE $ENV{MV_COMMON_BASE})
+else()
+    set(MV_COMMON_BASE ${CMAKE_CURRENT_LIST_DIR}/..)
+endif(EXISTS "$ENV{MV_COMMON_BASE}")
+
+if(NOT WIN32)
+    find_package(Threads REQUIRED)
+
+    find_path(LIBUSB_INCLUDE_DIR NAMES libusb.h PATH_SUFFIXES "include" "libusb" "libusb-1.0")
+    find_library(LIBUSB_LIBRARY NAMES usb-1.0 PATH_SUFFIXES "lib")
+
+    if(NOT LIBUSB_INCLUDE_DIR OR NOT LIBUSB_LIBRARY)
+        message(FATAL_ERROR "libusb is required")
+    endif()
+endif()
+
+set(XLINK_INCLUDE
+        ${MV_COMMON_BASE}/XLink/pc
+        ${MV_COMMON_BASE}/XLink/shared
+        ${MV_COMMON_BASE}/shared/include
+        )
+
+set(XLINK_INCLUDE_DIRECTORIES
+        ${XLINK_INCLUDE}
+        ${LIBUSB_INCLUDE_DIR}
+        )
+
+set(XLINK_SOURCES
+        ${MV_COMMON_BASE}/XLink/pc/XLinkPlatform.c
+        ${MV_COMMON_BASE}/XLink/pc/usb_boot.c
+        ${MV_COMMON_BASE}/XLink/pc/pcie_host.c
+        ${MV_COMMON_BASE}/XLink/shared/XLink.c
+        ${MV_COMMON_BASE}/XLink/shared/XLinkDispatcher.c
+        ${MV_COMMON_BASE}/shared/src/mvStringUtils.c
+        )
index e3f4961..c15145a 100644 (file)
@@ -10,6 +10,7 @@
 #include <sys/timeb.h>
 #include <errno.h>
 #include <assert.h>
+#include <string.h>
 
 #include "XLinkPlatform.h"
 #include "usb_boot.h"
@@ -89,6 +90,26 @@ extern void initialize_usb_boot();
 #define OPEN_DEV_ERROR_MESSAGE_LENGTH 128
 #endif
 
+static char* XLinkPlatformErrorToStr(const xLinkPlatformErrorCode_t errorCode) {
+    switch (errorCode) {
+        case X_LINK_PLATFORM_SUCCESS: return "X_LINK_PLATFORM_SUCCESS";
+        case X_LINK_PLATFORM_DEVICE_NOT_FOUND: return "X_LINK_PLATFORM_DEVICE_NOT_FOUND";
+        case X_LINK_PLATFORM_ERROR: return "X_LINK_PLATFORM_ERROR";
+        case X_LINK_PLATFORM_TIMEOUT: return "X_LINK_PLATFORM_TIMEOUT";
+        case X_LINK_PLATFORM_DRIVER_NOT_LOADED: return "X_LINK_PLATFORM_DRIVER_NOT_LOADED";
+        default: return "";
+    }
+}
+
+static char* pciePlatformStateToStr(const pciePlatformState_t platformState) {
+    switch (platformState) {
+        case PCIE_PLATFORM_ANY_STATE: return "PCIE_PLATFORM_ANY_STATE";
+        case PCIE_PLATFORM_BOOTED: return "PCIE_PLATFORM_BOOTED";
+        case PCIE_PLATFORM_UNBOOTED: return "PCIE_PLATFORM_UNBOOTED";
+        default: return "";
+    }
+}
+
 static xLinkPlatformErrorCode_t parseUsbBootError(usbBootError_t rc) {
     switch (rc) {
         case USB_BOOT_SUCCESS:
@@ -102,13 +123,31 @@ static xLinkPlatformErrorCode_t parseUsbBootError(usbBootError_t rc) {
     }
 }
 
+static xLinkPlatformErrorCode_t parsePCIeHostError(pcieHostError_t rc) {
+    switch (rc) {
+        case PCIE_HOST_SUCCESS:
+            return X_LINK_PLATFORM_SUCCESS;
+        case PCIE_HOST_DEVICE_NOT_FOUND:
+            return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
+        case PCIE_HOST_ERROR:
+            return X_LINK_PLATFORM_ERROR;
+        case PCIE_HOST_TIMEOUT:
+            return X_LINK_PLATFORM_TIMEOUT;
+        case PCIE_HOST_DRIVER_NOT_LOADED:
+            return X_LINK_PLATFORM_DRIVER_NOT_LOADED;
+        default:
+            return X_LINK_PLATFORM_ERROR;
+    }
+}
+
 static int usb_write(libusb_device_handle *f, const void *data, size_t size, unsigned int timeout)
 {
+    const int chunk_size = DEFAULT_CHUNKSZ;
     while(size > 0)
     {
         int bt, ss = size;
-        if(ss > 1024*1024*5)
-            ss = 1024*1024*5;
+        if(ss > chunk_size)
+            ss = chunk_size;
 #if (defined(_WIN32) || defined(_WIN64) )
         int rc = usb_bulk_write(f, USB_ENDPOINT_OUT, (unsigned char *)data, ss, &bt, timeout);
 #else
@@ -125,11 +164,12 @@ static int usb_write(libusb_device_handle *f, const void *data, size_t size, uns
 
 static int usb_read(libusb_device_handle *f, void *data, size_t size, unsigned int timeout)
 {
+    const int chunk_size = DEFAULT_CHUNKSZ;
     while(size > 0)
     {
         int bt, ss = size;
-        if(ss > 1024*1024*5)
-            ss = 1024*1024*5;
+        if(ss > chunk_size)
+            ss = chunk_size;
 #if (defined(_WIN32) || defined(_WIN64))
         int rc = usb_bulk_read(f, USB_ENDPOINT_IN, (unsigned char *)data, ss, &bt, timeout);
 #else
@@ -170,9 +210,9 @@ libusb_device_handle *usblink_open(const char *path)
 
 #if (!defined(_WIN32) && !defined(_WIN64))
         uint16_t  bcdusb = -1;
-        rc = usb_find_device_with_bcd(0, (char *)path, size, (void **)&dev, DEFAULT_OPENVID, DEFAULT_OPENPID, &bcdusb, 0);
+        rc = usb_find_device_with_bcd(0, (char *)path, size, (void **)&dev, DEFAULT_OPENVID, DEFAULT_OPENPID, &bcdusb);
 #else
-               rc = usb_find_device(0, (char *)path, size, (void **)&dev, DEFAULT_OPENVID, DEFAULT_OPENPID, 0);
+        rc = usb_find_device(0, (char *)path, size, (void **)&dev, DEFAULT_OPENVID, DEFAULT_OPENPID);
 #endif
         if(rc == USB_BOOT_SUCCESS)
             break;
@@ -181,7 +221,7 @@ libusb_device_handle *usblink_open(const char *path)
     if (rc == USB_BOOT_TIMEOUT || rc == USB_BOOT_DEVICE_NOT_FOUND) // Timeout
         return 0;
 #if (defined(_WIN32) || defined(_WIN64) )
-       h = usb_open_device(dev, NULL, 0, stderr, OPEN_DEV_ERROR_MESSAGE_LENGTH);
+    h = usb_open_device(dev, NULL, 0, stderr, OPEN_DEV_ERROR_MESSAGE_LENGTH);
        int libusb_rc = ((h != NULL) ? (0) : (-1));
        if (libusb_rc < 0)
        {
@@ -267,7 +307,7 @@ int USBLinkWrite(void* fd, void* data, int size, unsigned int timeout)
     return rc;
 }
 
- int USBLinkRead(void* fd, void* data, int size, unsigned int timeout)
+int USBLinkRead(void* fd, void* data, int size, unsigned int timeout)
 {
     int rc = 0;
 #ifndef USE_USB_VSC
@@ -290,7 +330,7 @@ int USBLinkWrite(void* fd, void* data, int size, unsigned int timeout)
 
         while(toRead > 0)
         {
-            rc = read(usbFdRead, &((char*)gl_protocoldata)[nread], toRead);
+            rc = read(usbFdRead, &((char*)data)[nread], toRead);
             if ( rc < 0)
             {
                 return -2;
@@ -336,7 +376,7 @@ int USBLinkPlatformResetRemote(void* fd)
 
 int UsbLinkPlatformConnect(const char* devPathRead, const char* devPathWrite, void** fd)
 {
-    #if (!defined(USE_USB_VSC))
+#if (!defined(USE_USB_VSC))
 #ifdef USE_LINK_JTAG
     struct sockaddr_in serv_addr;
     usbFdWrite = socket(AF_INET, SOCK_STREAM, 0);
@@ -361,7 +401,7 @@ int UsbLinkPlatformConnect(const char* devPathRead, const char* devPathWrite, vo
     usbFdRead= open(devPathRead, O_RDWR);
     if(usbFdRead < 0)
     {
-        return -1;
+        return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
     }
     // set tty to raw mode
     struct termios  tty;
@@ -369,8 +409,9 @@ int UsbLinkPlatformConnect(const char* devPathRead, const char* devPathWrite, vo
     int rc;
     rc = tcgetattr(usbFdRead, &tty);
     if (rc < 0) {
+        close(usbFdRead);
         usbFdRead = -1;
-        return -2;
+        return X_LINK_PLATFORM_ERROR;
     }
 
     spd = B115200;
@@ -381,21 +422,25 @@ int UsbLinkPlatformConnect(const char* devPathRead, const char* devPathWrite, vo
 
     rc = tcsetattr(usbFdRead, TCSANOW, &tty);
     if (rc < 0) {
+        close(usbFdRead);
         usbFdRead = -1;
-        return -2;
+        return X_LINK_PLATFORM_ERROR;
     }
 
     usbFdWrite= open(devPathWrite, O_RDWR);
     if(usbFdWrite < 0)
     {
+        close(usbFdRead);
         usbFdWrite = -1;
-        return -2;
+        return X_LINK_PLATFORM_ERROR;
     }
     // set tty to raw mode
     rc = tcgetattr(usbFdWrite, &tty);
     if (rc < 0) {
+        close(usbFdRead);
+        close(usbFdWrite);
         usbFdWrite = -1;
-        return -2;
+        return X_LINK_PLATFORM_ERROR;
     }
 
     spd = B115200;
@@ -406,8 +451,10 @@ int UsbLinkPlatformConnect(const char* devPathRead, const char* devPathWrite, vo
 
     rc = tcsetattr(usbFdWrite, TCSANOW, &tty);
     if (rc < 0) {
+        close(usbFdRead);
+        close(usbFdWrite);
         usbFdWrite = -1;
-        return -2;
+        return X_LINK_PLATFORM_ERROR;
     }
     return 0;
 #endif  /*USE_LINK_JTAG*/
@@ -415,8 +462,8 @@ int UsbLinkPlatformConnect(const char* devPathRead, const char* devPathWrite, vo
     *fd = usblink_open(devPathWrite);
     if (*fd == 0)
     {
-       /* could fail due to port name change */
-       return -1;
+        /* could fail due to port name change */
+        return -1;
     }
 
     if(*fd)
@@ -465,7 +512,7 @@ static int pcie_host_write(void *f,
                            unsigned int timeout)
 {
 #if (defined(_WIN32) || defined(_WIN64))
-#define CHUNK_SIZE_BYTES (5ULL * 1024ULL * 1024ULL)
+    #define CHUNK_SIZE_BYTES (5ULL * 1024ULL * 1024ULL)
 
     while (size)
     {
@@ -573,13 +620,24 @@ static int pcie_host_open(UNUSED const char* devPathRead,
 
 static int pcie_host_close(void *f)
 {
-#if (!defined(_WIN32) && !defined(_WIN64))
+    int rc;
     /**  For PCIe device reset is called on host side  */
-    pcie_reset_device(*(int*)f);
+#if (defined(_WIN32) && defined(_WIN64))
+    rc = pcie_reset_device((HANDLE)f);
+#else
+    rc = pcie_reset_device(*(int*)f);
 #endif
-
-    pcie_close(f);
-    return 0;
+    if (rc) {
+        mvLog(MVLOG_ERROR, "Device resetting failed with error %d", rc);
+        pciePlatformState_t state = PCIE_PLATFORM_ANY_STATE;
+        pcie_get_device_state(f, &state);
+        mvLog(MVLOG_INFO, "Device state is %s", pciePlatformStateToStr(state));
+    }
+    rc = pcie_close(f);
+    if (rc) {
+        mvLog(MVLOG_ERROR, "Device closing failed with error %d", rc);
+    }
+    return rc;
 }
 
 /*############################### FUNCTION ARRAYS #################################*/
@@ -619,7 +677,7 @@ int XLinkRead(xLinkDeviceHandle_t* deviceHandle, void* data, int size, unsigned
 int XLinkPlatformCloseRemote(xLinkDeviceHandle_t* deviceHandle)
 {
     if(deviceHandle->protocol == X_LINK_ANY_PROTOCOL ||
-        deviceHandle->protocol == X_LINK_NMB_OF_PROTOCOLS) {
+       deviceHandle->protocol == X_LINK_NMB_OF_PROTOCOLS) {
         perror("No method for closing handler with protocol value equals to X_LINK_ANY_PROTOCOL and X_LINK_NMB_OF_PROTOCOLS\n");
         return X_LINK_PLATFORM_ERROR;
     }
@@ -634,72 +692,187 @@ void XLinkPlatformInit()
 #endif
 }
 
-static int getDeviceName(int index, XLinkDeviceState_t state, deviceDesc_t* out_deviceDesc,
-    XLinkProtocol_t protocol, XLinkPlatform_t platform, int searchByName)
-{
-    if (index < 0) {
-        perror("Incorrect index value\n");
-        return X_LINK_PLATFORM_ERROR;
-    }
+pciePlatformState_t XLinkStateToPciePlatformState(const XLinkDeviceState_t state);
 
-    if(protocol == X_LINK_ANY_PROTOCOL ||
-       protocol == X_LINK_USB_VSC) {
-        // At the moment there is no situation where you may need a non standard vid
-        int vid = AUTO_VID;
+static xLinkPlatformErrorCode_t getUSBDeviceName(int index,
+                                                 XLinkDeviceState_t state,
+                                                 const deviceDesc_t in_deviceRequirements,
+                                                 deviceDesc_t* out_foundDevice) {
+    ASSERT_X_LINK_PLATFORM(index >= 0);
+    ASSERT_X_LINK_PLATFORM(out_foundDevice);
 
-        int pid = AUTO_PID;
-        if(state == X_LINK_UNBOOTED) {
-            pid = XLinkPlatformToPid(platform);
-            if(searchByName) {
-                pid = get_pid_by_name(out_deviceDesc->name);
-            }
-        } else if(state == X_LINK_BOOTED) {
-            pid = DEFAULT_OPENPID;
+    int vid = AUTO_VID;
+    int pid = AUTO_PID;
+
+    char name[XLINK_MAX_NAME_SIZE] = {};
+
+    int searchByName = 0;
+    if (strlen(in_deviceRequirements.name) > 0) {
+        searchByName = 1;
+        mv_strcpy(name, XLINK_MAX_NAME_SIZE, in_deviceRequirements.name);
+    }
+
+    // Set PID
+    if (state == X_LINK_BOOTED) {
+        if (in_deviceRequirements.platform != X_LINK_ANY_PLATFORM) {
+            mvLog(MVLOG_WARN, "Search specific platform for booted device unavailable");
+            return X_LINK_PLATFORM_ERROR;
+        }
+        pid = DEFAULT_OPENPID;
+    } else {
+        if (searchByName) {
+            pid = get_pid_by_name(in_deviceRequirements.name);
+        } else {
+            pid = XLinkPlatformToPid(in_deviceRequirements.platform, state);
         }
+    }
 
 #if (!defined(_WIN32) && !defined(_WIN64))
-        uint16_t  bcdusb = -1;
-        usbBootError_t rc = usb_find_device_with_bcd(index, out_deviceDesc->name, XLINK_MAX_NAME_SIZE, 0, vid, pid, &bcdusb, searchByName);
+    uint16_t  bcdusb = -1;
+    usbBootError_t rc = usb_find_device_with_bcd(
+        index, name, XLINK_MAX_NAME_SIZE, 0, vid, pid, &bcdusb);
 #else
-        usbBootError_t rc = usb_find_device(index, out_deviceDesc->name, XLINK_MAX_NAME_SIZE, 0, vid, pid, searchByName);
+    usbBootError_t rc = usb_find_device(
+                index, name, XLINK_MAX_NAME_SIZE, 0, vid, pid);
 #endif
+    xLinkPlatformErrorCode_t xLinkRc = parseUsbBootError(rc);
+    if(xLinkRc == X_LINK_PLATFORM_SUCCESS)
+    {
+        mv_strcpy(out_foundDevice->name, XLINK_MAX_NAME_SIZE, name);
+        out_foundDevice->protocol = X_LINK_USB_VSC;
+        out_foundDevice->platform = XLinkPlatformPidToPlatform(get_pid_by_name(name));
+    }
+    return xLinkRc;
+}
 
-        xLinkPlatformErrorCode_t xLinkRc = parseUsbBootError(rc);
-        if(xLinkRc == X_LINK_PLATFORM_SUCCESS)
-        {
-            out_deviceDesc->protocol = X_LINK_USB_VSC;
-            out_deviceDesc->platform = XLinkPlatformPidToPlatform(
-                                               get_pid_by_name(out_deviceDesc->name));
-            return xLinkRc;
-        }
+static xLinkPlatformErrorCode_t getPCIeDeviceName(int index,
+                                                  XLinkDeviceState_t state,
+                                                  const deviceDesc_t in_deviceRequirements,
+                                                  deviceDesc_t* out_foundDevice) {
+    ASSERT_X_LINK_PLATFORM(index >= 0);
+    ASSERT_X_LINK_PLATFORM(out_foundDevice);
+    ASSERT_X_LINK_PLATFORM(in_deviceRequirements.platform != X_LINK_MYRIAD_2);
+
+    char name[XLINK_MAX_NAME_SIZE] = {};
+
+    if (strlen(in_deviceRequirements.name) > 0) {
+        mv_strcpy(name, XLINK_MAX_NAME_SIZE, in_deviceRequirements.name);
     }
 
-    if((protocol == X_LINK_ANY_PROTOCOL ||
-        protocol == X_LINK_PCIE) && !searchByName) {
-        out_deviceDesc->protocol = X_LINK_PCIE;
-        out_deviceDesc->platform = platform;
+    pcieHostError_t rc = pcie_find_device_port(
+        index, name, XLINK_MAX_NAME_SIZE, XLinkStateToPciePlatformState(state));
 
-        // #-18686
-        return pcie_find_device_port(0, out_deviceDesc->name, XLINK_MAX_NAME_SIZE);
+    xLinkPlatformErrorCode_t xLinkRc = parsePCIeHostError(rc);
+    if(xLinkRc == X_LINK_PLATFORM_SUCCESS)
+    {
+        mv_strcpy(out_foundDevice->name, XLINK_MAX_NAME_SIZE, name);
+        out_foundDevice->protocol = X_LINK_PCIE;
+        out_foundDevice->platform = X_LINK_MYRIAD_X;
     }
+    return xLinkRc;
+}
+
+xLinkPlatformErrorCode_t XLinkPlatformFindDeviceName(XLinkDeviceState_t state,
+                                                     const deviceDesc_t in_deviceRequirements,
+                                                     deviceDesc_t* out_foundDevice) {
+    memset(out_foundDevice, 0, sizeof(deviceDesc_t));
+    xLinkPlatformErrorCode_t USB_rc;
+    xLinkPlatformErrorCode_t PCIe_rc;
+
+    switch (in_deviceRequirements.protocol){
+        case X_LINK_USB_CDC:
+        case X_LINK_USB_VSC:
+            return getUSBDeviceName(0, state, in_deviceRequirements, out_foundDevice);
+
+        case X_LINK_PCIE:
+            return getPCIeDeviceName(0, state, in_deviceRequirements, out_foundDevice);
+
+        case X_LINK_ANY_PROTOCOL:
+            USB_rc = getUSBDeviceName(0, state, in_deviceRequirements, out_foundDevice);
+            if (USB_rc == X_LINK_PLATFORM_SUCCESS) {      // Found USB device, return it
+                return X_LINK_PLATFORM_SUCCESS;
+            }
+            if (USB_rc != X_LINK_PLATFORM_DEVICE_NOT_FOUND) {   // Issue happen, log it
+                mvLog(MVLOG_DEBUG, "USB find device failed with rc: %s",
+                      XLinkPlatformErrorToStr(USB_rc));
+            }
+
+            // Try to find PCIe device
+            memset(out_foundDevice, 0, sizeof(deviceDesc_t));
+            PCIe_rc = getPCIeDeviceName(0, state, in_deviceRequirements, out_foundDevice);
+            if (PCIe_rc == X_LINK_PLATFORM_SUCCESS) {     // Found PCIe device, return it
+                return X_LINK_PLATFORM_SUCCESS;
+            }
+            if (PCIe_rc != X_LINK_PLATFORM_DEVICE_NOT_FOUND) {   // Issue happen, log it
+                mvLog(MVLOG_DEBUG, "PCIe find device failed with rc: %s",
+                      XLinkPlatformErrorToStr(PCIe_rc));
+            }
+            return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
 
-    memset(out_deviceDesc, 0, sizeof(deviceDesc_t));
-    return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
+        default:
+            mvLog(MVLOG_WARN, "Unknown protocol");
+            return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
+    }
 }
 
-int XLinkPlatformFindDeviceName(int index,
+xLinkPlatformErrorCode_t XLinkPlatformFindArrayOfDevicesNames(
     XLinkDeviceState_t state,
-    deviceDesc_t* in_deviceRequirements,
-    deviceDesc_t* out_foundDevice)
-{
-    if(strnlen(in_deviceRequirements->name, XLINK_MAX_NAME_SIZE)) {
-        mv_strcpy(out_foundDevice->name, XLINK_MAX_NAME_SIZE, in_deviceRequirements->name);
+    const deviceDesc_t in_deviceRequirements,
+    deviceDesc_t* out_foundDevice,
+    const unsigned int devicesArraySize,
+    unsigned int *out_amountOfFoundDevices) {
+
+    memset(out_foundDevice, 0, sizeof(deviceDesc_t) * devicesArraySize);
+
+    unsigned int usb_index = 0;
+    unsigned int pcie_index = 0;
+    unsigned int both_protocol_index = 0;
+
+    // TODO Handle possible errors
+    switch (in_deviceRequirements.protocol){
+        case X_LINK_USB_CDC:
+        case X_LINK_USB_VSC:
+            while(getUSBDeviceName(
+                usb_index, state, in_deviceRequirements, &out_foundDevice[usb_index]) ==
+                  X_LINK_PLATFORM_SUCCESS) {
+                ++usb_index;
+            }
 
-        return getDeviceName(index, state, out_foundDevice, in_deviceRequirements->protocol, in_deviceRequirements->platform, 1);
-    }
+            *out_amountOfFoundDevices = usb_index;
+            return X_LINK_PLATFORM_SUCCESS;
+
+        case X_LINK_PCIE:
+            while(getPCIeDeviceName(
+                pcie_index, state, in_deviceRequirements, &out_foundDevice[pcie_index]) ==
+                  X_LINK_PLATFORM_SUCCESS) {
+                ++pcie_index;
+            }
+
+            *out_amountOfFoundDevices = pcie_index;
+            return X_LINK_PLATFORM_SUCCESS;
+
+        case X_LINK_ANY_PROTOCOL:
+            while(getUSBDeviceName(
+                usb_index, state, in_deviceRequirements,
+                &out_foundDevice[both_protocol_index]) ==
+                  X_LINK_PLATFORM_SUCCESS) {
+                ++usb_index;
+                ++both_protocol_index;
+            }
+            while(getPCIeDeviceName(
+                pcie_index, state, in_deviceRequirements,
+                &out_foundDevice[both_protocol_index]) ==
+                  X_LINK_PLATFORM_SUCCESS) {
+                ++pcie_index;
+                ++both_protocol_index;
+            }
+            *out_amountOfFoundDevices = both_protocol_index;
+            return X_LINK_PLATFORM_SUCCESS;
 
-    memset(out_foundDevice->name, 0, XLINK_MAX_NAME_SIZE);
-    return getDeviceName(index, state, out_foundDevice, in_deviceRequirements->protocol, in_deviceRequirements->platform, 0);
+        default:
+            mvLog(MVLOG_WARN, "Unknown protocol");
+            return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
+    }
 }
 
 int XLinkPlatformIsDescriptionValid(deviceDesc_t *in_deviceDesc) {
@@ -713,7 +886,7 @@ int XLinkPlatformIsDescriptionValid(deviceDesc_t *in_deviceDesc) {
 
     if(in_deviceDesc->platform != X_LINK_ANY_PLATFORM) {
         int namePid = get_pid_by_name(in_deviceDesc->name);
-        int platformPid = XLinkPlatformToPid(in_deviceDesc->platform);
+        int platformPid = XLinkPlatformToPid(in_deviceDesc->platform, X_LINK_UNBOOTED);
 
         return namePid == platformPid;
     }
@@ -721,12 +894,24 @@ int XLinkPlatformIsDescriptionValid(deviceDesc_t *in_deviceDesc) {
     return 1;
 }
 
-int XLinkPlatformToPid(const XLinkPlatform_t platform) {
-    switch (platform) {
-        case X_LINK_MYRIAD_2: return DEFAULT_UNBOOTPID_2150;
-        case X_LINK_MYRIAD_X: return DEFAULT_UNBOOTPID_2485;
-        default:       return AUTO_UNBOOTED_PID;
+int XLinkPlatformToPid(const XLinkPlatform_t platform, const XLinkDeviceState_t state) {
+    if (state == X_LINK_UNBOOTED) {
+        switch (platform) {
+            case X_LINK_MYRIAD_2:  return DEFAULT_UNBOOTPID_2150;
+            case X_LINK_MYRIAD_X:  return DEFAULT_UNBOOTPID_2485;
+            default:               return AUTO_UNBOOTED_PID;
+        }
+    } else if (state == X_LINK_BOOTED) {
+        return DEFAULT_OPENPID;
+    } else if (state == X_LINK_ANY_STATE) {
+        switch (platform) {
+            case X_LINK_MYRIAD_2:  return DEFAULT_UNBOOTPID_2150;
+            case X_LINK_MYRIAD_X:  return DEFAULT_UNBOOTPID_2485;
+            default:               return AUTO_PID;
+        }
     }
+
+    return AUTO_PID;
 }
 
 XLinkPlatform_t XLinkPlatformPidToPlatform(const int pid) {
@@ -737,6 +922,25 @@ XLinkPlatform_t XLinkPlatformPidToPlatform(const int pid) {
     }
 }
 
+XLinkDeviceState_t XLinkPlatformPidToState(const int pid) {
+    switch (pid) {
+        case DEFAULT_OPENPID: return X_LINK_BOOTED;
+        case AUTO_PID: return X_LINK_ANY_STATE;
+        default:       return X_LINK_UNBOOTED;
+    }
+}
+
+pciePlatformState_t XLinkStateToPciePlatformState(const XLinkDeviceState_t state) {
+    switch (state) {
+        case X_LINK_ANY_STATE:  return PCIE_PLATFORM_ANY_STATE;
+        case X_LINK_BOOTED:     return PCIE_PLATFORM_BOOTED;
+        case X_LINK_UNBOOTED:   return PCIE_PLATFORM_UNBOOTED;
+        default:
+            return PCIE_PLATFORM_ANY_STATE;
+    }
+}
+
+#if (!defined(_WIN32) && !defined(_WIN64))
 int XLinkPlatformBootRemote(deviceDesc_t* deviceDesc, const char* binaryPath)
 {
     int rc = 0;
@@ -775,7 +979,7 @@ int XLinkPlatformBootRemote(deviceDesc_t* deviceDesc, const char* binaryPath)
     fclose(file);
 
     if (deviceDesc->protocol == X_LINK_PCIE) {
-        // FIXME Temporary open fd to boot device and then close it. But it can cause some problem
+        // Temporary open fd to boot device and then close it
         int* pcieFd = NULL;
         rc = pcie_init(deviceDesc->name, (void**)&pcieFd);
         if (rc) {
@@ -809,3 +1013,72 @@ int XLinkPlatformBootRemote(deviceDesc_t* deviceDesc, const char* binaryPath)
         return -1;
     }
 }
+#else
+int XLinkPlatformBootRemote(deviceDesc_t* deviceDesc, const char* binaryPath)
+{
+    int rc = 0;
+    FILE *file;
+    long file_size;
+
+    void *image_buffer;
+
+    if (deviceDesc->protocol == X_LINK_PCIE) {
+        // FIXME Temporary open fd to boot device and then close it. But it can cause some problem
+        int* pcieFd = NULL;
+
+        rc = pcie_init(deviceDesc->name, (void**)&pcieFd);
+        if (rc) {
+            printf("pcie_init failed with %s \n",deviceDesc->name);
+            return rc;
+        }
+        rc = pcie_boot_device(pcieFd);
+        pcie_close(pcieFd); // Will not check result for now
+        return rc;
+    } else if (deviceDesc->protocol == X_LINK_USB_VSC) {
+        /* Open the mvcmd file */
+        file = fopen(binaryPath, "rb");
+        if(file == NULL) {
+            printf("fw file open  failed with %s \n",binaryPath);
+            if(usb_loglevel)
+                perror(binaryPath);
+            return -7;
+        }
+        fseek(file, 0, SEEK_END);
+        file_size = ftell(file);
+        rewind(file);
+        if(file_size <= 0 || !(image_buffer = (char*)malloc(file_size)))
+        {
+            if(usb_loglevel)
+                perror("buffer");
+            fclose(file);
+            return -3;
+        }
+        if(fread(image_buffer, 1, file_size, file) != file_size)
+        {
+            if(usb_loglevel)
+                perror(binaryPath);
+            fclose(file);
+            free(image_buffer);
+            return -7;
+        }
+        fclose(file);
+
+        char subaddr[28+2];
+        // This will be the string to search for in /sys/dev/char links
+        int chars_to_write = snprintf(subaddr, 28, "-%s:", deviceDesc->name);
+        if(chars_to_write >= 28) {
+            printf("Path to your boot util is too long for the char array here!\n");
+        }
+        // Boot it
+        rc = usb_boot(deviceDesc->name, image_buffer, file_size);
+
+        if(!rc && usb_loglevel > 1) {
+            fprintf(stderr, "Boot successful, device address %s\n", deviceDesc->name);
+        }
+        return rc;
+    } else {
+        printf("Selected protocol not supported\n");
+        return -1;
+    }
+}
+#endif
index e2af8dd..e8f8a53 100644 (file)
@@ -33,6 +33,8 @@
 #define MVLOG_UNIT_NAME PCIe
 #include "mvLog.h"
 #include "mvStringUtils.h"
+#include "pcie_host.h"
+
 
 #define PCIE_DEVICE_ID 0x6200
 #define PCIE_VENDOR_ID 0x8086
@@ -42,14 +44,23 @@ static HANDLE global_pcie_lock_fd = NULL;
 static OVERLAPPED global_pcie_lock_overlap = { 0 };
 #define GLOBAL_PCIE_LOCK() LockFileEx(global_pcie_lock_fd, LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, 0, MAXDWORD, MAXDWORD, &global_pcie_lock_overlap)
 #define GLOBAL_PCIE_UNLOCK() UnlockFileEx(global_pcie_lock_fd, 0, MAXDWORD, MAXDWORD, &global_pcie_lock_overlap)
+/* IOCTL commands IDs. for Windows*/
+#define MXLK_DEVICE_TYPE 40001
+
+#define MXLK_STATUS_DEV   CTL_CODE(MXLK_DEVICE_TYPE, 0xA08, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define MXLK_RESET_DEV    CTL_CODE(MXLK_DEVICE_TYPE, 0xA09, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define MXLK_BOOT_DEV     CTL_CODE(MXLK_DEVICE_TYPE, 0xA0A, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+
 #endif
 
-#if (!defined(_WIN32) || !defined(_WIN64))
+#if (!defined(_WIN32) && !defined(_WIN64))
 /**         MXLK data           */
 /* IOCTL commands IDs. */
 #define IOC_MAGIC 'Z'
-#define MXLK_RESET_DEV _IO(IOC_MAGIC, 0x80)
-#define MXLK_BOOT_DEV  _IOW(IOC_MAGIC, 0x81, struct mxlk_boot_param)
+#define MXLK_RESET_DEV    _IO(IOC_MAGIC,  0x80)
+#define MXLK_BOOT_DEV     _IOW(IOC_MAGIC, 0x81, struct mxlk_boot_param)
+#define MXLK_STATUS_DEV   _IOR(IOC_MAGIC, 0x82,  enum mx_fw_status)
+#endif
 
 struct mxlk_boot_param {
     /* Buffer containing the MX application image (MVCMD format) */
@@ -57,8 +68,17 @@ struct mxlk_boot_param {
     /* Size of the image in bytes. */
     size_t length;
 };
+
+/* State of Myriad X device. */
+enum mx_fw_status {
+    /* MX waiting for FW to be loaded from host */
+    MX_FW_STATE_BOOTLOADER,
+    /* MX running FW loaded from host. */
+    MX_FW_STATUS_USER_APP,
+    /* MX context is not restored or device is lost*/
+    MX_FW_STATUS_UNKNOWN_STATE,
+};
 /**         MXLK data end       */
-#endif
 
 #if !(defined(_WIN32) || defined(_WIN64))
 static inline void timeout_to_timeval(unsigned int timeout_ms,
@@ -77,14 +97,14 @@ int pcie_write(HANDLE fd, void * buf, size_t bufSize, unsigned int timeout)
     HANDLE dev = fd;
 
     BOOL ret = WriteFile(dev, buf, bufSize, &bytesWritten, 0);
-
+    mvLog(MVLOG_DEBUG, "pcie_write windows return  fd %d buff %p bytesWritten %d  errno %d", dev,buf, bytesWritten, errno);
     if (ret == FALSE)
         return -errno;
 
     return bytesWritten;
 }
 #else
-int pcie_write(void *fd, void * buf, size_t bufSize, unsigned int timeout_ms)
+pcieHostError_t pcie_write(void *fd, void * buf, size_t bufSize, unsigned int timeout_ms)
 {
     fd_set wrfds;
     struct timeval timeval;
@@ -107,17 +127,17 @@ int pcie_write(void *fd, void * buf, size_t bufSize, unsigned int timeout_ms)
     ret = select(*((int*)fd) + 1, NULL, &wrfds, NULL, select_timeout);
     if (ret < 0)
     {
-        return X_LINK_PLATFORM_ERROR;
+        return PCIE_HOST_ERROR;
     }
     if (!FD_ISSET(*((int*)fd), &wrfds))
     {
-        return X_LINK_PLATFORM_TIMEOUT;
+        return PCIE_HOST_TIMEOUT;
     }
 
     ret = write(*((int*)fd), buf, bufSize);
     if (ret < 0)
     {
-        return X_LINK_PLATFORM_ERROR;
+        return PCIE_HOST_ERROR;
     }
 
     return ret;
@@ -125,7 +145,7 @@ int pcie_write(void *fd, void * buf, size_t bufSize, unsigned int timeout_ms)
 #endif  // (defined(_WIN32) || defined(_WIN64))
 
 #if (defined(_WIN32) || defined(_WIN64))
-int pcie_read(HANDLE fd, void * buf, size_t bufSize, int timeout)
+int pcie_read(HANDLE fd, void * buf, size_t bufSize, unsigned int timeout)
 {
     int bytesRead;
     HANDLE dev = fd;
@@ -138,7 +158,7 @@ int pcie_read(HANDLE fd, void * buf, size_t bufSize, int timeout)
    return bytesRead;
 }
 #else
-int pcie_read(void *fd, void *buf, size_t bufSize, int timeout_ms)
+pcieHostError_t pcie_read(void *fd, void *buf, size_t bufSize, unsigned int timeout_ms)
 {
     fd_set rdfds;
     struct timeval timeval;
@@ -157,15 +177,15 @@ int pcie_read(void *fd, void *buf, size_t bufSize, int timeout_ms)
 
     ret = select(*((int*)fd) + 1, &rdfds, NULL, NULL, select_timeout);
     if (ret < 0) {
-        return X_LINK_PLATFORM_ERROR;
+        return PCIE_HOST_ERROR;
     }
     if (!FD_ISSET(*((int*)fd), &rdfds)) {
-        return X_LINK_PLATFORM_TIMEOUT;
+        return PCIE_HOST_TIMEOUT;
     }
 
     ret = read(*((int*)fd), buf, bufSize);
     if (ret < 0) {
-        return X_LINK_PLATFORM_ERROR;
+        return PCIE_HOST_ERROR;
     }
 
     return ret;
@@ -175,6 +195,9 @@ int pcie_read(void *fd, void *buf, size_t bufSize, int timeout_ms)
 #if (defined(_WIN32) || defined(_WIN64))
 int pcie_init(const char *slot, HANDLE *fd)
 {
+
+// Commented out to re-run when the execution is aborted
+/*
     const char* tempPath = getenv("TEMP");
     const char pcieMutexName[] = "\\pcie.mutex";
     if (tempPath) {
@@ -198,7 +221,7 @@ int pcie_init(const char *slot, HANDLE *fd)
         mvLog(MVLOG_ERROR, "Only one device supported.");
         return -1;
     }
-
+*/
     HANDLE hDevice = CreateFile(slot,
         GENERIC_READ | GENERIC_WRITE,
         FILE_SHARE_READ | FILE_SHARE_WRITE,
@@ -214,13 +237,14 @@ int pcie_init(const char *slot, HANDLE *fd)
 
     *fd = hDevice;
 
+    mvLog(MVLOG_DEBUG, "pcie_init windows new fd %d", *fd);
     return 0;
 }
 #else
 int pcie_init(const char *slot, void **fd)
 {
     if (!fd)
-     return -1;
+        return -1;
 
     int mx_fd = open(slot, O_RDWR);
 
@@ -246,7 +270,8 @@ int pcie_init(const char *slot, void **fd)
 int pcie_close(void *fd)
 {
 #if (defined(_WIN32) || defined(_WIN64))
-    GLOBAL_PCIE_UNLOCK();
+    // Commented out to re-run when the execution is aborted
+    //GLOBAL_PCIE_UNLOCK();
 
     HANDLE hDevice = (HANDLE)fd;
     if (hDevice == INVALID_HANDLE_VALUE) {
@@ -309,32 +334,61 @@ int pci_count_devices(uint16_t vid, uint16_t pid)
 }
 #endif  // (defined(_WIN32) || defined(_WIN64))
 
-xLinkPlatformErrorCode_t pcie_find_device_port(int index, char* port_name, int size) {
+pcieHostError_t pcie_find_device_port(
+    int index, char* port_name, int name_length, const pciePlatformState_t requiredState) {
+    ASSERT_X_LINK_PLATFORM(port_name);
+    ASSERT_X_LINK_PLATFORM(index >= 0);
+    ASSERT_X_LINK_PLATFORM(name_length > 0);
+
+    pcieHostError_t rc = PCIE_HOST_DEVICE_NOT_FOUND;
+
+    char found_device[XLINK_MAX_NAME_SIZE] = { 0 };
+    pciePlatformState_t platformState;
+
 #if (defined(_WIN32) || defined(_WIN64))
-    snprintf(port_name, size, "%s%d", "\\\\.\\mxlink", index);
+    int amoutOfMyriadPCIeDevices = pci_count_devices(PCIE_VENDOR_ID, PCIE_DEVICE_ID);
+    if (amoutOfMyriadPCIeDevices == 0)
+        return PCIE_HOST_DEVICE_NOT_FOUND;
 
-    if (pci_count_devices(PCIE_VENDOR_ID, PCIE_DEVICE_ID) == 0) {
-        mvLog(MVLOG_DEBUG, "No PCIe device(s) with Vendor ID: 0x%hX and Device ID: 0x%hX found",
-                PCIE_VENDOR_ID, PCIE_DEVICE_ID);
-        return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
-    }
+    int amountOfSuitableDevices = 0;
+    int deviceCount = 0;
 
-    if (index > pci_count_devices(PCIE_VENDOR_ID, PCIE_DEVICE_ID)) {
-        return X_LINK_PLATFORM_DEVICE_NOT_FOUND;
-    }
+    while (deviceCount < amoutOfMyriadPCIeDevices) {
+        snprintf(found_device, XLINK_MAX_NAME_SIZE, "%s%d", "\\\\.\\mxlink", deviceCount);
 
-    return X_LINK_PLATFORM_SUCCESS;
+        // Get state of device
+        if (pcie_get_device_state(found_device, &platformState) != 0) {
+            return PCIE_HOST_ERROR;   // Get device state step failed
+        }
+
+        // Found device suits requested state
+        if (platformState == requiredState || requiredState == PCIE_PLATFORM_ANY_STATE) {
+            // If port_name is specified, we search for specific device
+            if (strnlen(port_name, name_length) > 1 &&
+                strncmp(port_name, found_device, name_length) == 0) {
+                rc = PCIE_HOST_SUCCESS;
+                break;
+                // Trying to find device which suits requirements and index
+            }
+            else if (amountOfSuitableDevices == index) {
+                mv_strncpy(port_name, name_length,
+                    found_device, XLINK_MAX_NAME_SIZE - 1);
+                rc = PCIE_HOST_SUCCESS;
+                break;
+            }
+            ++amountOfSuitableDevices;
+        }
+        ++deviceCount;
+    }
 
+    return rc;
 #else
-    xLinkPlatformErrorCode_t rc = X_LINK_PLATFORM_DEVICE_NOT_FOUND;
     struct dirent *entry;
     DIR *dp;
-    if (port_name == NULL)
-        return X_LINK_PLATFORM_ERROR;
 
     dp = opendir("/sys/class/mxlk/");
     if (dp == NULL) {
-        return X_LINK_PLATFORM_DRIVER_NOT_LOADED;
+        return PCIE_HOST_DRIVER_NOT_LOADED;
     }
 
     // All entries in this (virtual) directory are generated when the driver
@@ -344,13 +398,30 @@ xLinkPlatformErrorCode_t pcie_find_device_port(int index, char* port_name, int s
         // Compare the beginning of the name to make sure it is a device name
         if (strncmp(entry->d_name, "mxlk", 4) == 0)
         {
-            if (device_cnt == index)
-            {
-                snprintf(port_name, size, "/dev/%s", entry->d_name);
-                rc = X_LINK_PLATFORM_SUCCESS;
-                break;
+            // Save name
+            snprintf(found_device, name_length, "/dev/%s", entry->d_name);
+            // Get state of device
+            if (pcie_get_device_state(found_device, &platformState) != 0) {
+                closedir(dp);
+                return PCIE_HOST_ERROR;   // Get device state step failed
+            }
+
+            // Found device suits requested state
+            if (platformState == requiredState || requiredState == PCIE_PLATFORM_ANY_STATE) {
+                // If port_name is specified, we search for specific device
+                if (strnlen(port_name, name_length) > 1 &&
+                    strncmp(port_name, found_device, name_length) == 0) {
+                    rc = PCIE_HOST_SUCCESS;
+                    break;
+                    // Trying to find device which suits requirements and index
+                } else if (device_cnt == index){
+                    mv_strncpy(port_name, name_length,
+                               found_device, XLINK_MAX_NAME_SIZE - 1);
+                    rc = PCIE_HOST_SUCCESS;
+                    break;
+                }
+                ++device_cnt;
             }
-            device_cnt++;
         }
     }
     closedir(dp);
@@ -359,24 +430,154 @@ xLinkPlatformErrorCode_t pcie_find_device_port(int index, char* port_name, int s
 #endif  // (!defined(_WIN32) && !defined(_WIN64))
 }
 
+#if (!defined(_WIN32) && !defined(_WIN64))
 int pcie_reset_device(int fd)
 {
-#if (!defined(_WIN32) || !defined(_WIN64))
     return ioctl(fd, MXLK_RESET_DEV);
+}
 #else
-    return -1;
-#endif
+int pcie_reset_device(HANDLE fd)
+{
+    BOOL bResult   = FALSE;
+    DWORD junk     = 0;                     // discard results
+    int output_buffer;
+
+    mvLog(MVLOG_DEBUG, "calling Windows RESET DeviceIoControl fd %d", fd);
+    if (fd == 0) {
+        return PCIE_HOST_ERROR;
+    }
+
+    bResult = DeviceIoControl(fd,                    // device to be queried
+                              MXLK_RESET_DEV,                // operation to perform
+                              NULL, 0,                       // no input buffer
+                              &output_buffer, sizeof(output_buffer), // output buffer
+                              &junk,                         // # bytes returned
+                              (LPOVERLAPPED) NULL);          // synchronous I/O
+
+    if (!bResult) {
+        mvLog(MVLOG_ERROR, "RESET failed(status = %d).", GetLastError());
+        return PCIE_HOST_ERROR;
+    } else {
+        return PCIE_HOST_SUCCESS;
+    }
 }
+#endif
 
+#if (!defined(_WIN32) && !defined(_WIN64))
 int pcie_boot_device(int fd, void *buffer, size_t length)
 {
-#if (!defined(_WIN32) || !defined(_WIN64))
+    int rc = pcie_reset_device(fd);
+    if (rc) {
+        mvLog(MVLOG_INFO, "Device resetting failed with error: %d\n", rc);
+        return rc;
+    }
     struct mxlk_boot_param boot_param;
 
     boot_param.buffer = buffer;
     boot_param.length = length;
     return ioctl(fd, MXLK_BOOT_DEV, &boot_param);
+}
 #else
-    return -1;
+ int pcie_boot_device(HANDLE fd)
+ {
+    int rc = pcie_reset_device(fd);
+    if (rc) {
+        mvLog(MVLOG_INFO, "Device resetting failed with error: %d\n", rc);
+        return rc;
+    }
+
+    BOOL bResult   = FALSE;
+    DWORD junk     = 0;                     // discard results
+    int output_buffer;
+    struct mxlk_boot_param boot_param;
+
+    mvLog(MVLOG_DEBUG, "calling Windows BOOT DeviceIoControl %d",fd);
+    if (fd == 0) {
+        return PCIE_HOST_ERROR;
+    }
+    bResult = DeviceIoControl(fd,                    // device to be queried
+                              MXLK_BOOT_DEV,                 // operation to perform
+                              NULL, 0,                      // no input buffer
+                              &output_buffer, sizeof(output_buffer), // output buffer
+                              &junk,                         // # bytes returned
+                              (LPOVERLAPPED) NULL);          // synchronous I/O
+    if (!bResult) {
+        mvLog(MVLOG_ERROR, "BOOT failed(status = %d)", GetLastError());
+        return PCIE_HOST_ERROR;
+    } else {
+        return PCIE_HOST_SUCCESS;
+    }
+}
+#endif
+
+
+pcieHostError_t pcie_get_device_state(const char *port_name, pciePlatformState_t *platformState) {
+    ASSERT_X_LINK_PLATFORM(port_name);
+    ASSERT_X_LINK_PLATFORM(platformState);
+    pcieHostError_t retCode = PCIE_HOST_SUCCESS;
+
+#if (!defined(_WIN32) && !defined(_WIN64))       // Linux implementation
+    int mx_fd = open(port_name, O_RDONLY);
+
+    if (mx_fd == -1) {
+        // driver returns EACCESS in case it instance already used.
+        *platformState = PCIE_PLATFORM_BOOTED;
+    } else {
+        enum mx_fw_status fw_status= MX_FW_STATUS_UNKNOWN_STATE;
+        int ret = ioctl(mx_fd, MXLK_STATUS_DEV, &fw_status);
+        if(ret){
+            *platformState = PCIE_PLATFORM_ANY_STATE;
+            mvLog(MVLOG_WARN, "Failed to get device status: %d. Errno %d", ret, errno);
+            retCode = PCIE_HOST_DEVICE_NOT_FOUND;
+        } else if(fw_status == MX_FW_STATUS_USER_APP) {
+            *platformState = PCIE_PLATFORM_BOOTED;
+        } else {
+            *platformState = PCIE_PLATFORM_UNBOOTED;
+        }
+        close(mx_fd);
+    }
+#else                                           // Windows implementation
+    HANDLE hDevice = INVALID_HANDLE_VALUE;  // handle to the drive to be examined
+    BOOL bResult   = FALSE;                 // results flag
+    DWORD junk     = 0;                     // discard results
+
+    hDevice = CreateFile(port_name,         // drive to open
+                         0,                 // no access to the drive
+                         FILE_SHARE_READ |  // share mode
+                         FILE_SHARE_WRITE,
+                         NULL,              // default security attributes
+                         OPEN_EXISTING,     // disposition
+                         0,                 // file attributes
+                         NULL);             // do not copy file attributes
+
+    if (hDevice == INVALID_HANDLE_VALUE){   // cannot open the drive
+        mvLog(MVLOG_ERROR, "Failed to open device: %s. Error %d", port_name, GetLastError());
+        *platformState = PCIE_PLATFORM_ANY_STATE;
+        return PCIE_HOST_DEVICE_NOT_FOUND;
+    }
+    enum mx_fw_status fw_status = MX_FW_STATUS_USER_APP;
+
+    bResult = DeviceIoControl(hDevice,                       // device to be queried
+                              MXLK_STATUS_DEV, // operation to perform
+                              NULL, 0,                       // no input buffer
+                              &fw_status, sizeof(fw_status), // output buffer
+                              &junk,                         // # bytes returned
+                              (LPOVERLAPPED) NULL);          // synchronous I/O
+
+    if (!bResult) {
+        mvLog(MVLOG_ERROR, "Failed to get device status. Error %d", GetLastError());
+        *platformState = PCIE_PLATFORM_ANY_STATE;
+        retCode = PCIE_HOST_DEVICE_NOT_FOUND;
+        mvLog(MVLOG_DEBUG, "PCIE_PLATFORM_ANY_STATE");
+    } else if (fw_status == MX_FW_STATUS_USER_APP) {
+        *platformState = PCIE_PLATFORM_BOOTED;
+        mvLog(MVLOG_DEBUG, "PCIE_PLATFORM_BOOTED");
+    } else {
+        *platformState = PCIE_PLATFORM_UNBOOTED;
+        mvLog(MVLOG_DEBUG, "PCIE_PLATFORM_UNBOOTED");
+    }
+
+    CloseHandle(hDevice);
 #endif
+    return retCode;
 }
index 9ce2273..4a54b1d 100644 (file)
@@ -6,13 +6,49 @@
 #define PCIE_HOST_H
 
 #include "XLinkPlatform.h"
+#include "XLinkPlatform_tool.h"
+
+typedef enum {
+    /* PCIE_PLATFORM_ANY_STATE intended for use in the device requirement,
+    /  but also means an unknown state if we cannot get the device status */
+    PCIE_PLATFORM_ANY_STATE = 0,
+    PCIE_PLATFORM_BOOTED = 1,
+    PCIE_PLATFORM_UNBOOTED = 2,
+} pciePlatformState_t;
+
+typedef enum {
+    PCIE_HOST_SUCCESS = 0,
+    PCIE_HOST_DEVICE_NOT_FOUND,
+    PCIE_HOST_ERROR,
+    PCIE_HOST_TIMEOUT,
+    PCIE_HOST_DRIVER_NOT_LOADED
+} pcieHostError_t;
 
 int pcie_init(const char *slot, void **fd);
-int pcie_write(void *fd, void * buf, size_t bufSize, unsigned int timeout_ms);
-int pcie_read(void *fd, void *buf, size_t bufSize, unsigned int timeout_ms);
+pcieHostError_t pcie_write(void *fd, void * buf, size_t bufSize, unsigned int timeout_ms);
+pcieHostError_t pcie_read(void *fd, void *buf, size_t bufSize, unsigned int timeout_ms);
 int pcie_close(void *fd);
-xLinkPlatformErrorCode_t pcie_find_device_port(int index, char* port_name, int size);
+
+/**
+ *  @brief Get device name on index
+ *  @param port_name   Port on which device is located.
+ *                      If not empty, function will search for device with this name
+ */
+pcieHostError_t pcie_find_device_port(
+    int index, char* port_name, int name_length, pciePlatformState_t requiredState);
+
+/**
+ * @brief Get state for pcie device on specified port
+ */
+pcieHostError_t pcie_get_device_state(
+    const char * port_name, pciePlatformState_t* platformState);
+
+
+#if (!defined(_WIN32) && !defined(_WIN64))
 int pcie_reset_device(int fd);
 int pcie_boot_device(int fd, void *buffer, size_t length);
-
+#else
+int pcie_reset_device(HANDLE fd);
+int pcie_boot_device(HANDLE fd);
+#endif
 #endif  // PCIE_HOST_H
index 7e770c8..95dcdfe 100644 (file)
@@ -2,11 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-// USB utility for use with Myriad2v2 ROM
-// Very heavily modified from Sabre version of usb_boot
-// Copyright(C) 2015 Movidius Ltd.
-
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -35,7 +30,6 @@
 #define DEFAULT_WRITE_TIMEOUT       2000
 #define DEFAULT_CONNECT_TIMEOUT     20000
 #define DEFAULT_SEND_FILE_TIMEOUT   10000
-#define DEFAULT_CHUNKSZ             1024*1024
 #define USB1_CHUNKSZ                64
 
 /*
@@ -192,7 +186,7 @@ static int isBootedMyriadDevice(const int idVendor, const int idProduct) {
 static int isNotBootedMyriadDevice(const int idVendor, const int idProduct) {
     // Device is Myriad, pid supported and it's is not booted device
     if (idVendor == DEFAULT_VID && is_pid_supported(idProduct) == 1
-            && idProduct != DEFAULT_OPENPID) {
+        && idProduct != DEFAULT_OPENPID) {
         return 1;
     }
     return 0;
@@ -214,8 +208,10 @@ static const char *gen_addr(libusb_device *dev, int pid)
     }
     p = buff;
 
+#ifdef XLINK_USE_BUS
     uint8_t bus = libusb_get_bus_number(dev);
     p += snprintf(p, sizeof(buff), "%u.", bus);
+#endif
 
     for (i = 0; i < pnum_cnt - 1; i++)
         p += snprintf(p, sizeof(buff),"%u.", pnums[i]);
@@ -237,8 +233,8 @@ static pthread_mutex_t globalMutex = PTHREAD_MUTEX_INITIALIZER;
 
 /**
  * @brief Find usb device address
- * @param addr         Device name (address) which would be returned
- * @param searchByName Means that need to find device with name which contains in addr parameter
+ * @param input_addr  Device name (address) which would be returned. If not empty, we will try to
+ *                  find device with this name
  *
  * @details
  * Find any device (device = 0):
@@ -253,13 +249,13 @@ static pthread_mutex_t globalMutex = PTHREAD_MUTEX_INITIALIZER;
  * Index can be used to iterate through all connected myriad devices and save their names.
  * It will loop only over suitable devices specified by vid and pid
  */
-usbBootError_t usb_find_device_with_bcd(unsigned idx, char *addr,
-        unsigned addrsize, void **device, int vid, int pid, uint16_t* bcdusb, int searchByName) {
+usbBootError_t usb_find_device_with_bcd(unsigned idx, char *input_addr,
+                                        unsigned addrsize, void **device, int vid, int pid, uint16_t* bcdusb) {
     if (pthread_mutex_lock(&globalMutex)) {
         fprintf(stderr, "Mutex lock failed\n");
         return USB_BOOT_ERROR;
     }
-
+    int searchByName = 0;
     static libusb_device **devs = NULL;
     libusb_device *dev = NULL;
     struct libusb_device_descriptor desc;
@@ -276,6 +272,10 @@ usbBootError_t usb_find_device_with_bcd(unsigned idx, char *addr,
         return USB_BOOT_ERROR;
     }
 
+    if (strlen(input_addr) > 1) {
+        searchByName = 1;
+    }
+
     // Update device list if empty or if indx 0
     if (!devs || idx == 0) {
         if (devs) {
@@ -303,45 +303,46 @@ usbBootError_t usb_find_device_with_bcd(unsigned idx, char *addr,
 
         // If found device have the same id and vid as input
         if ( (desc.idVendor == vid && desc.idProduct == pid)
-                // Any myriad device
-                || (vid == AUTO_VID && pid == AUTO_PID
-                        && isMyriadDevice(desc.idVendor, desc.idProduct))
-                // Any not booted myriad device
-                || (vid == AUTO_VID && (pid == AUTO_UNBOOTED_PID)
-                        && isNotBootedMyriadDevice(desc.idVendor, desc.idProduct))
-                // Any not booted with specific pid
-                || (vid == AUTO_VID && pid == desc.idProduct
-                        && isNotBootedMyriadDevice(desc.idVendor, desc.idProduct))
-                // Any booted device
-                || (vid == AUTO_VID && pid == DEFAULT_OPENPID
-                        && isBootedMyriadDevice(desc.idVendor, desc.idProduct)) )
+             // Any myriad device
+             || (vid == AUTO_VID && pid == AUTO_PID
+                 && isMyriadDevice(desc.idVendor, desc.idProduct))
+             // Any not booted myriad device
+             || (vid == AUTO_VID && (pid == AUTO_UNBOOTED_PID)
+                 && isNotBootedMyriadDevice(desc.idVendor, desc.idProduct))
+             // Any not booted with specific pid
+             || (vid == AUTO_VID && pid == desc.idProduct
+                 && isNotBootedMyriadDevice(desc.idVendor, desc.idProduct))
+             // Any booted device
+             || (vid == AUTO_VID && pid == DEFAULT_OPENPID
+                 && isBootedMyriadDevice(desc.idVendor, desc.idProduct)) )
         {
             if (device) {
-                const char *caddr = gen_addr(dev, get_pid_by_name(addr));
-                // If the same add as input
-                if (!strcmp(caddr, addr)) {
+                const char *dev_addr = gen_addr(dev, get_pid_by_name(input_addr));
+                if (!strcmp(dev_addr, input_addr)) {
                     if (usb_loglevel > 1) {
                         fprintf(stderr, "Found Address: %s - VID/PID %04x:%04x\n",
-                         addr, desc.idVendor, desc.idProduct);
+                                input_addr, desc.idVendor, desc.idProduct);
                     }
+
                     libusb_ref_device(dev);
                     libusb_free_device_list(devs, 1);
                     if (bcdusb)
                         *bcdusb = desc.bcdUSB;
                     *device = dev;
                     devs = 0;
+
                     if (pthread_mutex_unlock(&globalMutex)) {
                         fprintf(stderr, "Mutex unlock failed\n");
                     }
                     return USB_BOOT_SUCCESS;
                 }
             } else if (searchByName) {
-                const char *caddr = gen_addr(dev, get_pid_by_name(addr));
+                const char *dev_addr = gen_addr(dev, desc.idProduct);
                 // If the same add as input
-                if (!strcmp(caddr, addr)) {
+                if (!strcmp(dev_addr, input_addr)) {
                     if (usb_loglevel > 1) {
                         fprintf(stderr, "Found Address: %s - VID/PID %04x:%04x\n",
-                                addr, desc.idVendor, desc.idProduct);
+                                input_addr, desc.idVendor, desc.idProduct);
                     }
 
                     if (pthread_mutex_unlock(&globalMutex)) {
@@ -354,7 +355,7 @@ usbBootError_t usb_find_device_with_bcd(unsigned idx, char *addr,
                 if (usb_loglevel > 1)
                     fprintf(stderr, "Device %d Address: %s - VID/PID %04x:%04x\n",
                             idx, caddr, desc.idVendor, desc.idProduct);
-                mv_strncpy(addr, addrsize, caddr, addrsize - 1);
+                mv_strncpy(input_addr, addrsize, caddr, addrsize - 1);
                 if (pthread_mutex_unlock(&globalMutex)) {
                     fprintf(stderr, "Mutex unlock failed\n");
                 }
@@ -373,8 +374,14 @@ usbBootError_t usb_find_device_with_bcd(unsigned idx, char *addr,
 #endif
 
 #if (defined(_WIN32) || defined(_WIN64) )
-usbBootError_t usb_find_device(unsigned idx, char *addr, unsigned addrsize, void **device, int vid, int pid, int specificDevice)
+usbBootError_t usb_find_device(unsigned idx, char *addr, unsigned addrsize, void **device, int vid, int pid)
 {
+    if (!addr)
+        return USB_BOOT_ERROR;
+    int specificDevice = 0;
+    if (strlen(addr) > 1)
+        specificDevice = 1;
+
     // TODO There is no global mutex as in linux version
     int res;
     // 2 => vid
@@ -497,7 +504,7 @@ static libusb_device_handle *usb_open_device(libusb_device *dev, uint8_t *endpoi
     {
         if(usb_loglevel > 1)
             fprintf(stderr, "Found EP 0x%02x : max packet size is %u bytes\n",
-                ifdesc->endpoint[i].bEndpointAddress, ifdesc->endpoint[i].wMaxPacketSize);
+                    ifdesc->endpoint[i].bEndpointAddress, ifdesc->endpoint[i].wMaxPacketSize);
         if((ifdesc->endpoint[i].bmAttributes & LIBUSB_TRANSFER_TYPE_MASK) != LIBUSB_TRANSFER_TYPE_BULK)
             continue;
         if( !(ifdesc->endpoint[i].bEndpointAddress & LIBUSB_ENDPOINT_DIR_MASK) )
@@ -510,7 +517,7 @@ static libusb_device_handle *usb_open_device(libusb_device *dev, uint8_t *endpoi
     }
     libusb_free_config_descriptor(cdesc);
     mv_strcpy(err_string_buff, OPEN_DEV_ERROR_MESSAGE_LENGTH,
-        "Unable to find BULK OUT endpoint\n");
+              "Unable to find BULK OUT endpoint\n");
     libusb_close(h);
     return 0;
 }
@@ -533,66 +540,66 @@ static int wait_findopen(const char *device_address, int timeout, libusb_device
         return USB_BOOT_ERROR;
     }
 
-       usleep(100000);
-       if(usb_loglevel > 1)
-       {
-               if(timeout == -1)
-                       fprintf(stderr, "Starting wait for connect, no timeout\n");
-               else if(timeout == 0)
-                       fprintf(stderr, "Trying to connect\n");
-               else fprintf(stderr, "Starting wait for connect with %ums timeout\n", timeout);
-       }
-       last_open_dev_err[0] = 0;
-       i = 0;
-       for(;;)
-       {
-    highres_gettime(&t1);
-    int addr_size = strlen(device_address);
+    usleep(100000);
+    if(usb_loglevel > 1)
+    {
+        if(timeout == -1)
+            fprintf(stderr, "Starting wait for connect, no timeout\n");
+        else if(timeout == 0)
+            fprintf(stderr, "Trying to connect\n");
+        else fprintf(stderr, "Starting wait for connect with %ums timeout\n", timeout);
+    }
+    last_open_dev_err[0] = 0;
+    i = 0;
+    for(;;)
+    {
+        highres_gettime(&t1);
+        int addr_size = strlen(device_address);
 #if (!defined(_WIN32) && !defined(_WIN64) )
         rc = usb_find_device_with_bcd(0, (char*)device_address, addr_size, (void**)dev,
-            DEFAULT_VID, get_pid_by_name(device_address), bcdusb, 0);
+                                      DEFAULT_VID, get_pid_by_name(device_address), bcdusb);
 #else
         rc = usb_find_device(0, (char *)device_address, addr_size, (void **)dev,
-            DEFAULT_VID, get_pid_by_name(device_address), 0);
+            DEFAULT_VID, get_pid_by_name(device_address));
 #endif
-               if(rc < 0)
-                       return USB_BOOT_ERROR;
-               if(!rc)
-               {
+        if(rc < 0)
+            return USB_BOOT_ERROR;
+        if(!rc)
+        {
 #if (!defined(_WIN32) && !defined(_WIN64) )
             *devh = usb_open_device(*dev, endpoint, last_open_dev_err, OPEN_DEV_ERROR_MESSAGE_LENGTH);
 #else
             *devh = usb_open_device(*dev, endpoint, 0, last_open_dev_err, OPEN_DEV_ERROR_MESSAGE_LENGTH);
 #endif
             if(*devh != NULL)
-                       {
-                               if(usb_loglevel > 1)
-                                       fprintf(stderr, "Found and opened device\n");
-                               return 0;
-                       }
+            {
+                if(usb_loglevel > 1)
+                    fprintf(stderr, "Found and opened device\n");
+                return 0;
+            }
 #if (!defined(_WIN32) && !defined(_WIN64) )
-                       libusb_unref_device(*dev);
+            libusb_unref_device(*dev);
 #endif
-               }
+        }
         highres_gettime(&t2);
         elapsedTime += highres_elapsed_ms(&t1, &t2);
 
-               if(timeout != -1)
-               {
-                       if(usb_loglevel)
-                       {
-                               if(last_open_dev_err[0])
-                                       fprintf(stderr, "%s", last_open_dev_err);
-                               fprintf(stderr, "error: device not found!\n");
-                       }
-                       return rc ? USB_BOOT_DEVICE_NOT_FOUND : USB_BOOT_TIMEOUT;
+        if(timeout != -1)
+        {
+            if(usb_loglevel)
+            {
+                if(last_open_dev_err[0])
+                    fprintf(stderr, "%s", last_open_dev_err);
+                fprintf(stderr, "error: device not found!\n");
+            }
+            return rc ? USB_BOOT_DEVICE_NOT_FOUND : USB_BOOT_TIMEOUT;
         } else if (elapsedTime > (double)timeout) {
             return rc ? USB_BOOT_DEVICE_NOT_FOUND : USB_BOOT_TIMEOUT;
-               }
-               i++;
-               usleep(100000);
-       }
-       return 0;
+        }
+        i++;
+        usleep(100000);
+    }
+    return 0;
 }
 
 #if (!defined(_WIN32) && !defined(_WIN64) )
@@ -601,11 +608,11 @@ static int send_file(libusb_device_handle* h, uint8_t endpoint, const uint8_t* t
 static int send_file(libusb_device_handle *h, uint8_t endpoint, const uint8_t *tx_buf, unsigned filesize)
 #endif
 {
-       const uint8_t *p;
-       int rc;
-       int wb, twb, wbr;
-       double elapsedTime;
-       highres_time_t t1, t2;
+    const uint8_t *p;
+    int rc;
+    int wb, twb, wbr;
+    double elapsedTime;
+    highres_time_t t1, t2;
     unsigned int bulk_chunklen=DEFAULT_CHUNKSZ;
     elapsedTime = 0;
     twb = 0;
@@ -616,44 +623,44 @@ static int send_file(libusb_device_handle *h, uint8_t endpoint, const uint8_t *t
         bulk_chunklen = USB1_CHUNKSZ;
     }
 #endif
-       if(usb_loglevel > 1)
-               fprintf(stderr, "Performing bulk write of %u bytes...\n", filesize);
-       while(twb < filesize)
-       {
-               highres_gettime(&t1);
-               wb = filesize - twb;
-               if(wb > bulk_chunklen)
-                       wb = bulk_chunklen;
-               wbr = 0;
+    if(usb_loglevel > 1)
+        fprintf(stderr, "Performing bulk write of %u bytes...\n", filesize);
+    while(twb < filesize)
+    {
+        highres_gettime(&t1);
+        wb = filesize - twb;
+        if(wb > bulk_chunklen)
+            wb = bulk_chunklen;
+        wbr = 0;
 #if (!defined(_WIN32) && !defined(_WIN64) )
         rc = libusb_bulk_transfer(h, endpoint, (void *)p, wb, &wbr, write_timeout);
 #else
         rc = usb_bulk_write(h, endpoint, (void *)p, wb, &wbr, write_timeout);
 #endif
-               if(rc || (wb != wbr))
-               {
-                       if(rc == LIBUSB_ERROR_NO_DEVICE)
-                               break;
-                       if(usb_loglevel)
-                               fprintf(stderr, "bulk write: %s (%d bytes written, %d bytes to write)\n", libusb_strerror(rc), wbr, wb);
-                       if(rc == LIBUSB_ERROR_TIMEOUT)
-                               return USB_BOOT_TIMEOUT;
-                       else return USB_BOOT_ERROR;
-               }
-               highres_gettime(&t2);
-               elapsedTime += highres_elapsed_ms(&t1, &t2);
-               if (elapsedTime > DEFAULT_SEND_FILE_TIMEOUT) {
-                   return USB_BOOT_TIMEOUT;
-               }
-               twb += wbr;
-               p += wbr;
-       }
-       if(usb_loglevel > 1)
-       {
-               double MBpS = ((double)filesize / 1048576.) / (elapsedTime * 0.001);
-               fprintf(stderr, "Successfully sent %u bytes of data in %lf ms (%lf MB/s)\n", filesize, elapsedTime, MBpS);
-       }
-       return 0;
+        if(rc || (wb != wbr))
+        {
+            if(rc == LIBUSB_ERROR_NO_DEVICE)
+                break;
+            if(usb_loglevel)
+                fprintf(stderr, "bulk write: %s (%d bytes written, %d bytes to write)\n", libusb_strerror(rc), wbr, wb);
+            if(rc == LIBUSB_ERROR_TIMEOUT)
+                return USB_BOOT_TIMEOUT;
+            else return USB_BOOT_ERROR;
+        }
+        highres_gettime(&t2);
+        elapsedTime += highres_elapsed_ms(&t1, &t2);
+        if (elapsedTime > DEFAULT_SEND_FILE_TIMEOUT) {
+            return USB_BOOT_TIMEOUT;
+        }
+        twb += wbr;
+        p += wbr;
+    }
+    if(usb_loglevel > 1)
+    {
+        double MBpS = ((double)filesize / 1048576.) / (elapsedTime * 0.001);
+        fprintf(stderr, "Successfully sent %u bytes of data in %lf ms (%lf MB/s)\n", filesize, elapsedTime, MBpS);
+    }
+    return 0;
 }
 
 int usb_boot(const char *addr, const void *mvcmd, unsigned size)
@@ -685,9 +692,9 @@ int usb_boot(const char *addr, const void *mvcmd, unsigned size)
         return rc;
     }
     rc = send_file(h, endpoint, mvcmd, size,bcdusb);
-       libusb_release_interface(h, 0);
-       libusb_close(h);
-       libusb_unref_device(dev);
+    libusb_release_interface(h, 0);
+    libusb_close(h);
+    libusb_unref_device(dev);
 #endif
     return rc;
 }
index 91fe89f..7deef99 100644 (file)
@@ -18,6 +18,7 @@ extern int usb_loglevel;
 #define DEFAULT_UNBOOTVID           0x03E7
 #define DEFAULT_UNBOOTPID_2485      0x2485
 #define DEFAULT_UNBOOTPID_2150      0x2150
+#define DEFAULT_CHUNKSZ             1024*1024
 
 
 typedef enum usbBootError {
@@ -28,9 +29,11 @@ typedef enum usbBootError {
 } usbBootError_t;
 
 #if (!defined(_WIN32) && !defined(_WIN64))
-usbBootError_t usb_find_device_with_bcd(unsigned idx, char *addr, unsigned addrsize, void **device, int vid, int pid,unsigned short* bcdusb, int searchByName);
+usbBootError_t usb_find_device_with_bcd(unsigned idx, char *input_addr,
+                                        unsigned addrsize, void **device, int vid, int pid,unsigned short* bcdusb);
 #else
-usbBootError_t usb_find_device(unsigned idx, char *addr, unsigned addrsize, void **device, int vid, int pid, int specificDevice);
+usbBootError_t usb_find_device(unsigned idx, char *addr, unsigned addrsize,
+   void **device, int vid, int pid);
 #endif
 int usb_boot(const char *addr, const void *mvcmd, unsigned size);
 int get_pid_by_name(const char* name);
index 470b11d..5da79ad 100644 (file)
@@ -9,6 +9,7 @@
 ///
 
 #include "XLink.h"
+#include "XLink_tool.h"
 
 #include "stdio.h"
 #include "stdint.h"
 
 #include <assert.h>
 #include <stdlib.h>
+
+
 #if (defined(_WIN32) || defined(_WIN64))
+#include "gettime.h"
 #include "win_pthread.h"
 #include "win_semaphore.h"
-#include "gettime.h"
 #else
-#include <pthread.h>
-#include <semaphore.h>
-#endif
-#if (defined(_WIN32) || defined(_WIN64))
-#include "gettime.h"
+# ifdef __APPLE__
+#  include "pthread_semaphore.h"
+# else
+#  include <semaphore.h>
+# endif
 #endif
+
 #include "mvMacros.h"
 #include "XLinkPlatform.h"
 #include "XLinkDispatcher.h"
+#include "XLinkPublicDefines.h"
+
 #define _XLINK_ENABLE_PRIVATE_INCLUDE_
 #include "XLinkPrivateDefines.h"
 
+#ifdef MVLOG_UNIT_NAME
+#undef MVLOG_UNIT_NAME
 #define MVLOG_UNIT_NAME xLink
+#endif
 #include "mvLog.h"
 #include "mvStringUtils.h"
 
-#define USB_DATA_TIMEOUT 10000
-#define CIRCULAR_INCREMENT(x,maxVal) \
-    { \
-         x++; \
-         if (x == maxVal) \
-             x = 0; \
-    }
-//avoid problems with unsigned. first compare and then give the nuw value
-#define CIRCULAR_DECREMENT(x,maxVal) \
-{ \
-    if (x == 0) \
-        x = maxVal; \
-    else \
-        x--; \
-}
-#define EXTRACT_IDS(streamId, linkId) \
-{ \
-    linkId = (streamId >> 24) & 0XFF; \
-    streamId = streamId & 0xFFFFFF; \
-}
+#ifndef XLINK_USB_DATA_TIMEOUT
+#define XLINK_USB_DATA_TIMEOUT 0
+#endif
 
-#define COMBIN_IDS(streamId, linkid) \
-     streamId = streamId | ((linkid & 0xFF) << 24);
+#ifndef XLINK_COMMON_TIMEOUT_MSEC
+#define XLINK_COMMON_TIMEOUT_MSEC (1*60*1000)
+#endif
 
 #define DEFAULT_TIMEOUT ((unsigned int)-1)
 #define MAX_PATH_LENGTH (255)
 
-static unsigned int glCommonTimeOutMsec = 1000;
+static unsigned int glCommonTimeOutMsec = XLINK_COMMON_TIMEOUT_MSEC;
 static unsigned int glDeviceOpenTimeOutMsec = 5000;
 static unsigned int glAllocateGraphTimeOutMsec = 12000;
 
-
 XLinkError_t XLinkSetCommonTimeOutMsec(unsigned int msec) {
     glCommonTimeOutMsec = msec;
     return X_LINK_SUCCESS;
@@ -87,6 +79,7 @@ XLinkError_t XLinkSetAllocateGraphTimeOutMsec(unsigned int msec) {
 
 int XLinkWaitSem(sem_t* sem)
 {
+#ifdef __PC__
     ASSERT_X_LINK_R(sem != NULL, -1);
 
     if (glCommonTimeOutMsec == 0)
@@ -109,6 +102,9 @@ int XLinkWaitSem(sem_t* sem)
 
         return sem_timedwait(sem, &ts);
     }
+#else
+    return sem_wait(sem);
+#endif
 }
 
 int XLinkWaitSemUserMode(sem_t* sem, unsigned int timeout)
@@ -141,6 +137,10 @@ int XLinkWaitSemUserMode(sem_t* sem, unsigned int timeout)
     }
 }
 
+static int is_semaphore_initialized(const streamDesc_t *stream) {
+    return stream && strnlen(stream->name, MAX_STREAM_NAME_LENGTH) != 0;
+}
+
 int dispatcherLocalEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response);
 int dispatcherRemoteEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response);
 //adds a new event with parameters and returns event id
@@ -148,18 +148,24 @@ int dispatcherEventSend(xLinkEvent_t* event);
 streamDesc_t* getStreamById(void* fd, streamId_t id);
 void releaseStream(streamDesc_t*);
 int addNewPacketToStream(streamDesc_t* stream, void* buffer, uint32_t size);
+xLinkDesc_t* getLink(void* fd);
 
+#ifdef __PC__
 static XLinkError_t checkEventHeader(xLinkEventHeader_t header);
+#endif
 
 struct dispatcherControlFunctions controlFunctionTbl;
 XLinkGlobalHandler_t* glHandler; //TODO need to either protect this with semaphor
-                                 // or make profiling data per device
+                                 //or make profiling data per device
 linkId_t nextUniqueLinkId = 0; //incremental number, doesn't get decremented.
 
 xLinkDesc_t availableXLinks[MAX_LINKS];
 
+xLinkDesc_t* getLinkById(linkId_t id);
+xLinkDesc_t* getLink(void* fd);
 sem_t  pingSem; //to b used by myriad
 
+
 char* TypeToStr(int type)
 {
     switch(type)
@@ -181,7 +187,7 @@ char* TypeToStr(int type)
         case XLINK_RESET_RESP: return "XLINK_RESET_RESP";
         case XLINK_RESP_LAST:  return "XLINK_RESP_LAST";
         default:
-        break;
+            break;
     }
     return "";
 }
@@ -246,67 +252,69 @@ int handleIncomingEvent(xLinkEvent_t* event){
     mvLog(MVLOG_DEBUG, "%s, size %u, streamId %u.\n", TypeToStr(event->header.type), event->header.size, event->header.streamId);
     void* buffer ;
     streamDesc_t* stream ;
+    int sc = 0 ;
     switch (event->header.type){
-    case XLINK_WRITE_REQ:
-        /*If we got here, we will read the data no matter what happens.
-          If we encounter any problems we will still read the data to keep
-          the communication working but send a NACK.*/
-        stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
-        ASSERT_X_LINK(stream);
-
-        stream->localFillLevel += event->header.size;
-        mvLog(MVLOG_DEBUG,"Got write, current local fill level is %u out of %u %u\n", stream->localFillLevel, stream->readSize, stream->writeSize);
-
-        buffer = allocateData(ALIGN_UP(event->header.size, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
-        if (buffer == NULL){
-            mvLog(MVLOG_FATAL,"out of memory\n");
-            ASSERT_X_LINK(0);
-        }
-        int sc = XLinkRead(&event->deviceHandle, buffer, event->header.size, USB_DATA_TIMEOUT);
-        if(sc < 0){
-            mvLog(MVLOG_ERROR,"%s() Read failed (err %d)\n", __func__, (int)sc);
-            deallocateData(buffer, ALIGN_UP(event->header.size, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
-            ASSERT_X_LINK(0);
-        }
+        case XLINK_WRITE_REQ:
+            /*If we got here, we will read the data no matter what happens.
+              If we encounter any problems we will still read the data to keep
+              the communication working but send a NACK.*/
+            stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
+            ASSERT_X_LINK(stream);
 
-        event->data = buffer;
-        if (addNewPacketToStream(stream, buffer, event->header.size)){
-            mvLog(MVLOG_WARN,"No more place in stream. release packet\n");
-            deallocateData(buffer, ALIGN_UP(event->header.size, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
-            event->header.flags.bitField.ack = 0;
-            event->header.flags.bitField.nack = 1;
-            assert(0);
-        }
-        releaseStream(stream);
-        break;
-    case XLINK_READ_REQ:
-        break;
-    case XLINK_READ_REL_REQ:
-        break;
-    case XLINK_CREATE_STREAM_REQ:
-        break;
-    case XLINK_CLOSE_STREAM_REQ:
-        break;
-    case XLINK_PING_REQ:
-        break;
-    case XLINK_RESET_REQ:
-        break;
-    case XLINK_WRITE_RESP:
-        break;
-    case XLINK_READ_RESP:
-        break;
-    case XLINK_READ_REL_RESP:
-        break;
-    case XLINK_CREATE_STREAM_RESP:
-        break;
-    case XLINK_CLOSE_STREAM_RESP:
-        break;
-    case XLINK_PING_RESP:
-        break;
-    case XLINK_RESET_RESP:
-        break;
-    default:
-        ASSERT_X_LINK(0);
+            stream->localFillLevel += event->header.size;
+            mvLog(MVLOG_DEBUG,"S%d: Got write of %ld, current local fill level is %ld out of %ld %ld\n",
+                  event->header.streamId, event->header.size, stream->localFillLevel, stream->readSize, stream->writeSize);
+
+            buffer = allocateData(ALIGN_UP(event->header.size, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
+            if (buffer == NULL){
+                mvLog(MVLOG_FATAL,"out of memory\n");
+                ASSERT_X_LINK(0);
+            }
+            sc = XLinkRead(&event->deviceHandle, buffer, event->header.size, XLINK_USB_DATA_TIMEOUT);
+            if(sc < 0){
+                mvLog(MVLOG_ERROR,"%s() Read failed %d\n", __func__, (int)sc);
+                deallocateData(buffer, ALIGN_UP(event->header.size, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
+                ASSERT_X_LINK(0);
+            }
+
+            event->data = buffer;
+            if (addNewPacketToStream(stream, buffer, event->header.size)){
+                mvLog(MVLOG_WARN,"No more place in stream. release packet\n");
+                deallocateData(buffer, ALIGN_UP(event->header.size, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
+                event->header.flags.bitField.ack = 0;
+                event->header.flags.bitField.nack = 1;
+                assert(0);
+            }
+            releaseStream(stream);
+            break;
+        case XLINK_READ_REQ:
+            break;
+        case XLINK_READ_REL_REQ:
+            break;
+        case XLINK_CREATE_STREAM_REQ:
+            break;
+        case XLINK_CLOSE_STREAM_REQ:
+            break;
+        case XLINK_PING_REQ:
+            break;
+        case XLINK_RESET_REQ:
+            break;
+        case XLINK_WRITE_RESP:
+            break;
+        case XLINK_READ_RESP:
+            break;
+        case XLINK_READ_REL_RESP:
+            break;
+        case XLINK_CREATE_STREAM_RESP:
+            break;
+        case XLINK_CLOSE_STREAM_RESP:
+            break;
+        case XLINK_PING_RESP:
+            break;
+        case XLINK_RESET_RESP:
+            break;
+        default:
+            ASSERT_X_LINK(0);
     }
     //adding event for the scheduler. We let it know that this is a remote event
     dispatcherAddEvent(EVENT_REMOTE, event);
@@ -315,31 +323,41 @@ int handleIncomingEvent(xLinkEvent_t* event){
 
 int dispatcherEventReceive(xLinkEvent_t* event){
     static xLinkEvent_t prevEvent = {0};
-
-    const unsigned int unlimitedUsbTimeout = 0;
-    int sc = XLinkRead(&event->deviceHandle, &event->header, sizeof(event->header), unlimitedUsbTimeout);
+#ifdef __PC__
+    int sc = XLinkRead(&event->deviceHandle, &event->header, sizeof(event->header), 0);
+#else
+    int sc = XLinkRead(&event->deviceHandle, &event->header, sizeof(event->header), XLINK_USB_DATA_TIMEOUT);
+#endif
 
     mvLog(MVLOG_DEBUG,"Incoming event %p: %s %d %p prevEvent: %s %d %p\n",
-                                event,
-                                TypeToStr(event->header.type),
-                                (int)event->header.id,
-                                event->deviceHandle.xLinkFD,
-                                TypeToStr(prevEvent.header.type),
-                                (int)prevEvent.header.id,
-                                prevEvent.deviceHandle.xLinkFD);
-
-    if(sc < 0 && event->header.type == XLINK_RESET_RESP) {
-        return sc;
+          event,
+          TypeToStr(event->header.type),
+          (int)event->header.id,
+          event->deviceHandle.xLinkFD,
+          TypeToStr(prevEvent.header.type),
+          (int)prevEvent.header.id,
+          prevEvent.deviceHandle.xLinkFD);
+
+
+    if(sc < 0) {
+        xLinkDesc_t* link = getLink(&event->deviceHandle.xLinkFD);
+        if (event->header.type == XLINK_RESET_RESP || link == NULL) {
+            return sc;
+        } else if (link->hostClosedFD) {
+            //host intentionally closed usb, finish normally
+            event->header.type = XLINK_RESET_RESP;
+            return 0;
+        }
     }
 
-    if(sc < 0){
-        mvLog(MVLOG_ERROR,"%s() Read failed (err %d) | event %p %s\n", __func__, (int)sc, event, TypeToStr(event->header.type));
+    if(sc < 0) {
+        mvLog(MVLOG_ERROR,"%s() Read failed %d\n", __func__, (int)sc);
         return sc;
     }
 
     if (prevEvent.header.id == event->header.id &&
-            prevEvent.header.type == event->header.type &&
-            prevEvent.deviceHandle.xLinkFD == event->deviceHandle.xLinkFD)
+        prevEvent.header.type == event->header.type &&
+        prevEvent.deviceHandle.xLinkFD == event->deviceHandle.xLinkFD)
     {
         mvLog(MVLOG_FATAL,"Duplicate id detected. \n");
     }
@@ -349,7 +367,8 @@ int dispatcherEventReceive(xLinkEvent_t* event){
         mvLog(MVLOG_WARN,"Failed to handle incoming event");
     }
 
-    if(event->header.type == XLINK_RESET_REQ ) {
+    if(event->header.type == XLINK_RESET_REQ)
+    {
         if(event->deviceHandle.protocol == X_LINK_PCIE) {
             mvLog(MVLOG_DEBUG,"XLINK_RESET_REQ received - doing nothing, we dont want to reset device");
         }
@@ -393,11 +412,6 @@ static linkId_t getNextAvailableLinkUniqueId()
     do
     {
         int i;
-        nextUniqueLinkId++;
-        if (nextUniqueLinkId == INVALID_LINK_ID)
-        {
-            nextUniqueLinkId = 0;
-        }
         for (i = 0; i < MAX_LINKS; i++)
         {
             if (availableXLinks[i].id != INVALID_LINK_ID &&
@@ -408,12 +422,17 @@ static linkId_t getNextAvailableLinkUniqueId()
         {
             return nextUniqueLinkId;
         }
+        nextUniqueLinkId++;
+        if (nextUniqueLinkId == INVALID_LINK_ID)
+        {
+            nextUniqueLinkId = 0;
+        }
     } while (start != nextUniqueLinkId);
     mvLog(MVLOG_ERROR, "%s():- no next available link!\n", __func__);
     return INVALID_LINK_ID;
 }
 
-static int getNextAvailableLinkIndex()
+int getNextAvailableLinkIndex()
 {
     int i;
     for (i = 0; i < MAX_LINKS; i++)
@@ -446,8 +465,11 @@ streamDesc_t* getStreamById(void* fd, streamId_t id)
     int stream;
     for (stream = 0; stream < XLINK_MAX_STREAMS; stream++) {
         if (link->availableStreams[stream].id == id) {
-            if (XLinkWaitSem(&link->availableStreams[stream].sem))
+            if (XLinkWaitSem(&link->availableStreams[stream].sem)) {
+#ifdef __PC__
                 return NULL;
+#endif
+            }
             return &link->availableStreams[stream];
         }
     }
@@ -461,9 +483,12 @@ streamDesc_t* getStreamByName(xLinkDesc_t* link, const char* name)
     for (stream = 0; stream < XLINK_MAX_STREAMS; stream++) {
         if (link->availableStreams[stream].id != INVALID_STREAM_ID &&
             strcmp(link->availableStreams[stream].name, name) == 0) {
-                if (XLinkWaitSem(&link->availableStreams[stream].sem))
-                    return NULL;
-                return &link->availableStreams[stream];
+            if (XLinkWaitSem(&link->availableStreams[stream].sem)) {
+#ifdef __PC__
+                return NULL;
+#endif
+            }
+            return &link->availableStreams[stream];
         }
     }
     return NULL;
@@ -500,7 +525,7 @@ streamPacketDesc_t* getPacketFromStream(streamDesc_t* stream)
         ret = &stream->packets[stream->firstPacketUnused];
         stream->availablePackets--;
         CIRCULAR_INCREMENT(stream->firstPacketUnused,
-                            XLINK_MAX_PACKETS_PER_STREAM);
+                           XLINK_MAX_PACKETS_PER_STREAM);
         stream->blockedPackets++;
     }
     return ret;
@@ -515,10 +540,16 @@ void deallocateStream(streamDesc_t* stream)
             stream->readSize = 0;
             stream->closeStreamInitiated = 0;
         }
+
+#ifndef __PC__
+        if (is_semaphore_initialized(stream)) {
+            if(sem_destroy(&stream->sem))
+                perror("Can't destroy semaphore");
+        }
+#endif
     }
 }
 
-
 int releasePacketFromStream(streamDesc_t* stream, uint32_t* releasedSize)
 {
     streamPacketDesc_t* currPack = &stream->packets[stream->firstPacket];
@@ -526,22 +557,28 @@ int releasePacketFromStream(streamDesc_t* stream, uint32_t* releasedSize)
         mvLog(MVLOG_ERROR,"There is no packet to release\n");
         return 0; // ignore this, although this is a big problem on application side
     }
+
     stream->localFillLevel -= currPack->length;
-    mvLog(MVLOG_DEBUG,"Got release, current local fill level is %u out of %u %u\n", stream->localFillLevel, stream->readSize, stream->writeSize);
+    mvLog(MVLOG_DEBUG, "S%d: Got release of %ld , current local fill level is %ld out of %ld %ld\n",
+          stream->id, currPack->length, stream->localFillLevel, stream->readSize, stream->writeSize);
+
+    deallocateData(currPack->data,
+                   ALIGN_UP_INT32((int32_t)currPack->length, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
 
-    deallocateData(currPack->data, ALIGN_UP_INT32((int32_t)currPack->length, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
     CIRCULAR_INCREMENT(stream->firstPacket, XLINK_MAX_PACKETS_PER_STREAM);
     stream->blockedPackets--;
-    *releasedSize = currPack->length;
+    if (releasedSize) {
+        *releasedSize = currPack->length;
+    }
     return 0;
 }
 
 int isStreamSpaceEnoughFor(streamDesc_t* stream, uint32_t size)
 {
     if(stream->remoteFillPacketLevel >= XLINK_MAX_PACKETS_PER_STREAM ||
-        stream->remoteFillLevel + size > stream->writeSize){
-        mvLog(MVLOG_DEBUG, "S%d: Not enough space in stream for %u: PKT %u, FILL %u SIZE %u\n",
-            stream->id, size, stream->remoteFillPacketLevel, stream->remoteFillLevel, stream->writeSize);
+       stream->remoteFillLevel + size > stream->writeSize){
+        mvLog(MVLOG_DEBUG, "S%d: Not enough space in stream '%s' for %ld: PKT %ld, FILL %ld SIZE %ld\n",
+              stream->id, stream->name, size, stream->remoteFillPacketLevel, stream->remoteFillLevel, stream->writeSize);
         return 0;
     }
     else
@@ -561,10 +598,10 @@ int addNewPacketToStream(streamDesc_t* stream, void* buffer, uint32_t size){
 }
 
 streamId_t allocateNewStream(void* fd,
-                            const char* name,
-                            uint32_t writeSize,
-                            uint32_t readSize,
-                            streamId_t forcedId)
+                             const char* name,
+                             uint32_t writeSize,
+                             uint32_t readSize,
+                             streamId_t forcedId)
 {
     streamId_t streamId;
     streamDesc_t* stream;
@@ -597,9 +634,18 @@ streamId_t allocateNewStream(void* fd,
         else
             stream->id = forcedId;
         link->nextUniqueStreamId++; //even if we didnt use a new one, we need to align with total number of  unique streams
-        int sem_initiated = strlen(stream->name) != 0;
+        if (!is_semaphore_initialized(stream)) //if sem_init is called for already initiated sem, behavior is undefined
+        {
+            if(sem_init(&stream->sem, 0, 0))
+                perror("Can't create semaphore\n");
+        }
+        else
+        {
+            mvLog(MVLOG_INFO, "is_semaphore_initialized\n");
+        }
+
         mv_strncpy(stream->name, MAX_STREAM_NAME_LENGTH,
-            name, MAX_STREAM_NAME_LENGTH - 1);
+                   name, MAX_STREAM_NAME_LENGTH - 1);
         stream->readSize = 0;
         stream->writeSize = 0;
         stream->remoteFillLevel = 0;
@@ -607,28 +653,44 @@ streamId_t allocateNewStream(void* fd,
 
         stream->localFillLevel = 0;
         stream->closeStreamInitiated = 0;
-        if (!sem_initiated) //if sem_init is called for already initiated sem, behavior is undefined
-            sem_init(&stream->sem, 0, 0);
     }
     if (readSize && !stream->readSize)
     {
         stream->readSize = readSize;
+
+#ifndef __PC__
+        // FIXME: not the best solution but the simplest for now:
+        // it is just for a check; real allocation will be done during receiving an usb package
+        void *buffer = allocateData(ALIGN_UP(readSize, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
+        if (buffer == NULL) {
+            mvLog(MVLOG_ERROR,"Cannot create stream. Requested memory = %u", stream->readSize);
+            return INVALID_STREAM_ID;
+        } else {
+            deallocateData(buffer, ALIGN_UP(readSize, __CACHE_LINE_SIZE), __CACHE_LINE_SIZE);
+        }
+#endif
     }
     if (writeSize && !stream->writeSize)
     {
         stream->writeSize = writeSize;
     }
+
+    mvLog(MVLOG_DEBUG, "The stream \"%s\"  created, id = %u, readSize = %d, writeSize = %d\n",
+          stream->name, stream->id, stream->readSize, stream->writeSize);
+
     streamId = stream->id;
     releaseStream(stream);
     return streamId;
 }
 
+#ifdef __PC__
 static void setEventFailed(xLinkEvent_t * event )
 {
     event->header.flags.bitField.localServe = 1;
     event->header.flags.bitField.ack = 0;
     event->header.flags.bitField.nack = 1;
 }
+#endif
 
 //this function should be called only for remote requests
 int dispatcherLocalEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response)
@@ -637,101 +699,114 @@ int dispatcherLocalEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response)
     response->header.id = event->header.id;
     mvLog(MVLOG_DEBUG, "%s\n",TypeToStr(event->header.type));
     switch (event->header.type){
-    case XLINK_WRITE_REQ:
-        //in case local tries to write after it issues close (writeSize is zero)
-        stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
-        if(!stream){
-            mvLog(MVLOG_DEBUG, "stream %d has been closed!\n", event->header.streamId);
-            setEventFailed(event);
-            break;
-        }
-        if (stream->writeSize == 0)
-        {
-            event->header.flags.bitField.nack = 1;
-            event->header.flags.bitField.ack = 0;
-            // return -1 to don't even send it to the remote
-            releaseStream(stream);
-            return -1;
-        }
-        event->header.flags.bitField.ack = 1;
-        event->header.flags.bitField.nack = 0;
-        event->header.flags.bitField.localServe = 0;
+        case XLINK_WRITE_REQ:
+            //in case local tries to write after it issues close (writeSize is zero)
+            stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
 
-        if(!isStreamSpaceEnoughFor(stream, event->header.size)){
-            mvLog(MVLOG_DEBUG,"local NACK RTS. stream is full\n");
-            event->header.flags.bitField.block = 1;
-            event->header.flags.bitField.localServe = 1;
-        }else{
-            event->header.flags.bitField.block = 0;
-            stream->remoteFillLevel += event->header.size;
-            stream->remoteFillPacketLevel++;
+#ifdef __PC__
+            if(!stream){
+                mvLog(MVLOG_DEBUG, "stream %d has been closed!\n", event->header.streamId);
+                setEventFailed(event);
+                break;
+            }
+#else
+            ASSERT_X_LINK(stream);
+#endif
 
-            mvLog(MVLOG_DEBUG,"Got local write remote fill level %u out of %u %u\n", stream->remoteFillLevel, stream->writeSize, stream->readSize);
-        }
-        releaseStream(stream);
-        break;
-    case XLINK_READ_REQ:
-        stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
-        if(!stream){
-            mvLog(MVLOG_DEBUG, "stream %d has been closed!\n", event->header.streamId);
-            setEventFailed(event);
-            break;
-        }
-        streamPacketDesc_t* packet = getPacketFromStream(stream);
-        if (packet){
-            //the read can be served with this packet
-            event->data = packet;
+            if (stream->writeSize == 0)
+            {
+                event->header.flags.bitField.nack = 1;
+                event->header.flags.bitField.ack = 0;
+                // return -1 to don't even send it to the remote
+                releaseStream(stream);
+                return -1;
+            }
             event->header.flags.bitField.ack = 1;
             event->header.flags.bitField.nack = 0;
-            event->header.flags.bitField.block = 0;
-        }
-        else{
-            event->header.flags.bitField.block = 1;
-        }
-        releaseStream(stream);
-        event->header.flags.bitField.localServe = 1;
-        break;
-    case XLINK_READ_REL_REQ:
-        stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
-        ASSERT_X_LINK(stream);
-        uint32_t releasedSize = 0;
-        releasePacketFromStream(stream, &releasedSize);
-        event->header.size = releasedSize;
-        releaseStream(stream);
-        break;
-    case XLINK_CREATE_STREAM_REQ:
-        break;
-    case XLINK_CLOSE_STREAM_REQ:
-        stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
-
-        ASSERT_X_LINK(stream);
-        if (stream->remoteFillLevel != 0){
-            stream->closeStreamInitiated = 1;
-            event->header.flags.bitField.block = 1;
-            event->header.flags.bitField.localServe = 1;
-        }else{
-            event->header.flags.bitField.block = 0;
             event->header.flags.bitField.localServe = 0;
-        }
-        releaseStream(stream);
-        break;
-    case XLINK_RESET_REQ:
-        mvLog(MVLOG_DEBUG,"XLINK_RESET_REQ - do nothing\n");
-        break;
-    case XLINK_PING_REQ:
-    case XLINK_WRITE_RESP:
-    case XLINK_READ_RESP:
-    case XLINK_READ_REL_RESP:
-    case XLINK_CREATE_STREAM_RESP:
-    case XLINK_CLOSE_STREAM_RESP:
-    case XLINK_PING_RESP:
-        break;
-    case XLINK_RESET_RESP:
-        //should not happen
-        event->header.flags.bitField.localServe = 1;
-        break;
-    default:
-        ASSERT_X_LINK(0);
+
+            if(!isStreamSpaceEnoughFor(stream, event->header.size)){
+                mvLog(MVLOG_FATAL,"local NACK RTS. stream '%s' is full (event %d)\n", stream->name, event->header.id);
+                event->header.flags.bitField.block = 1;
+                event->header.flags.bitField.localServe = 1;
+                // TODO: easy to implement non-blocking read here, just return nack
+                mvLog(MVLOG_WARN, "Blocked event would cause dispatching thread to wait on semaphore infinitely\n");
+            }else{
+                event->header.flags.bitField.block = 0;
+                stream->remoteFillLevel += event->header.size;
+                stream->remoteFillPacketLevel++;
+                mvLog(MVLOG_DEBUG,"S%d: Got local write of %ld , remote fill level %ld out of %ld %ld\n",
+                      event->header.streamId, event->header.size, stream->remoteFillLevel, stream->writeSize, stream->readSize);
+            }
+            releaseStream(stream);
+            break;
+        case XLINK_READ_REQ:
+            stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
+#ifdef __PC__
+            if(!stream){
+                mvLog(MVLOG_DEBUG, "stream %d has been closed!\n", event->header.streamId);
+                setEventFailed(event);
+                break;
+            }
+#else
+            ASSERT_X_LINK(stream);
+#endif
+            streamPacketDesc_t* packet = getPacketFromStream(stream);
+            if (packet){
+                //the read can be served with this packet
+                event->data = packet;
+                event->header.flags.bitField.ack = 1;
+                event->header.flags.bitField.nack = 0;
+                event->header.flags.bitField.block = 0;
+            }
+            else{
+                event->header.flags.bitField.block = 1;
+                // TODO: easy to implement non-blocking read here, just return nack
+            }
+            event->header.flags.bitField.localServe = 1;
+            releaseStream(stream);
+            break;
+        case XLINK_READ_REL_REQ:
+            stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
+            ASSERT_X_LINK(stream);
+            uint32_t releasedSize = 0;
+            releasePacketFromStream(stream, &releasedSize);
+            event->header.size = releasedSize;
+            releaseStream(stream);
+            break;
+        case XLINK_CREATE_STREAM_REQ:
+            break;
+        case XLINK_CLOSE_STREAM_REQ:
+            stream = getStreamById(event->deviceHandle.xLinkFD, event->header.streamId);
+
+            ASSERT_X_LINK(stream);
+            if (stream->remoteFillLevel != 0){
+                stream->closeStreamInitiated = 1;
+                event->header.flags.bitField.block = 1;
+                event->header.flags.bitField.localServe = 1;
+            }else{
+                event->header.flags.bitField.block = 0;
+                event->header.flags.bitField.localServe = 0;
+            }
+            releaseStream(stream);
+            break;
+        case XLINK_RESET_REQ:
+            mvLog(MVLOG_DEBUG,"XLINK_RESET_REQ - do nothing\n");
+            break;
+        case XLINK_PING_REQ:
+        case XLINK_WRITE_RESP:
+        case XLINK_READ_RESP:
+        case XLINK_READ_REL_RESP:
+        case XLINK_CREATE_STREAM_RESP:
+        case XLINK_CLOSE_STREAM_RESP:
+        case XLINK_PING_RESP:
+            break;
+        case XLINK_RESET_RESP:
+            //should not happen
+            event->header.flags.bitField.localServe = 1;
+            break;
+        default:
+            ASSERT_X_LINK(0);
     }
     return 0;
 }
@@ -776,12 +851,12 @@ int dispatcherRemoteEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response
             stream->remoteFillLevel -= event->header.size;
             stream->remoteFillPacketLevel--;
 
-            mvLog(MVLOG_DEBUG,"Got remote release %u, remote fill level %u out of %u %u\n",
-                  event->header.size, stream->remoteFillLevel, stream->writeSize, stream->readSize);
+            mvLog(MVLOG_DEBUG,"S%d: Got remote release of %ld, remote fill level %ld out of %ld %ld\n",
+                  event->header.streamId, event->header.size, stream->remoteFillLevel, stream->writeSize, stream->readSize);
             releaseStream(stream);
 
             dispatcherUnblockEvent(-1, XLINK_WRITE_REQ, event->header.streamId,
-                                    event->deviceHandle.xLinkFD);
+                                   event->deviceHandle.xLinkFD);
             //with every released packet check if the stream is already marked for close
             if (stream->closeStreamInitiated && stream->localFillLevel == 0)
             {
@@ -801,48 +876,56 @@ int dispatcherRemoteEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response
                                                           event->header.streamName,
                                                           0, event->header.size,
                                                           INVALID_STREAM_ID);
+
+            if (response->header.streamId == INVALID_STREAM_ID) {
+                response->header.flags.bitField.ack = 0;
+                response->header.flags.bitField.sizeTooBig = 1;
+                break;
+            }
+
             response->deviceHandle = event->deviceHandle;
             mv_strncpy(response->header.streamName, MAX_STREAM_NAME_LENGTH,
-                event->header.streamName, MAX_STREAM_NAME_LENGTH - 1);
+                       event->header.streamName, MAX_STREAM_NAME_LENGTH - 1);
             response->header.size = event->header.size;
             mvLog(MVLOG_DEBUG,"creating stream %x\n", (int)response->header.streamId);
             break;
         case XLINK_CLOSE_STREAM_REQ:
-            {
-                response->header.type = XLINK_CLOSE_STREAM_RESP;
-                response->header.streamId = event->header.streamId;
-                response->deviceHandle = event->deviceHandle;
-
-                streamDesc_t* stream = getStreamById(event->deviceHandle.xLinkFD,
-                                                     event->header.streamId);
-                if (!stream) {
-                    //if we have sent a NACK before, when the event gets unblocked
-                    //the stream might already be unavailable
-                    response->header.flags.bitField.ack = 1; //All is good, we are done
+        {
+            response->header.type = XLINK_CLOSE_STREAM_RESP;
+            response->header.streamId = event->header.streamId;
+            response->deviceHandle = event->deviceHandle;
+
+            streamDesc_t* stream = getStreamById(event->deviceHandle.xLinkFD,
+                                                 event->header.streamId);
+            if (!stream) {
+                //if we have sent a NACK before, when the event gets unblocked
+                //the stream might already be unavailable
+                response->header.flags.bitField.ack = 1; //All is good, we are done
+                response->header.flags.bitField.nack = 0;
+                mvLog(MVLOG_DEBUG,"%s() got a close stream on aready closed stream\n", __func__);
+            } else {
+                if (stream->localFillLevel == 0)
+                {
+                    response->header.flags.bitField.ack = 1;
                     response->header.flags.bitField.nack = 0;
-                    mvLog(MVLOG_DEBUG,"%s() got a close stream on aready closed stream\n", __func__);
-                } else {
-                    if (stream->localFillLevel == 0)
-                    {
-                        response->header.flags.bitField.ack = 1;
-                        response->header.flags.bitField.nack = 0;
-
-                        deallocateStream(stream);
-                        if (!stream->writeSize) {
-                            stream->id = INVALID_STREAM_ID;
-                        }
-                    }
-                    else
-                    {
-                        mvLog(MVLOG_DEBUG,"%s():fifo is NOT empty returning NACK \n", __func__);
-                        response->header.flags.bitField.nack = 1;
-                        stream->closeStreamInitiated = 1;
-                    }
 
-                    releaseStream(stream);
+                    deallocateStream(stream);
+                    if (!stream->writeSize) {
+                        stream->id = INVALID_STREAM_ID;
+                        stream->name[0] = '\0';
+                    }
                 }
-                break;
+                else
+                {
+                    mvLog(MVLOG_DEBUG,"%s():fifo is NOT empty returning NACK \n", __func__);
+                    response->header.flags.bitField.nack = 1;
+                    stream->closeStreamInitiated = 1;
+                }
+
+                releaseStream(stream);
             }
+            break;
+        }
         case XLINK_PING_REQ:
             response->header.type = XLINK_PING_RESP;
             response->header.flags.bitField.ack = 1;
@@ -850,7 +933,7 @@ int dispatcherRemoteEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response
             sem_post(&pingSem);
             break;
         case XLINK_RESET_REQ:
-            mvLog(MVLOG_DEBUG,"reset request\n");
+            mvLog(MVLOG_DEBUG,"reset request - received! Sending ACK *****\n");
             response->header.flags.bitField.ack = 1;
             response->header.flags.bitField.nack = 0;
             response->header.type = XLINK_RESET_RESP;
@@ -870,6 +953,9 @@ int dispatcherRemoteEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response
                                                           event->header.streamName,
                                                           event->header.size,0,
                                                           event->header.streamId);
+#ifndef __PC__
+            ASSERT_X_LINK_R(response->header.streamId != INVALID_STREAM_ID, X_LINK_ERROR);
+#endif
             response->deviceHandle = event->deviceHandle;
             break;
         }
@@ -888,6 +974,7 @@ int dispatcherRemoteEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response
                 response->header.flags.bitField.nack = 1;
                 response->header.flags.bitField.ack = 0;
                 stream->id = INVALID_STREAM_ID;
+                stream->name[0] = '\0';
                 break;
             }
             releaseStream(stream);
@@ -902,12 +989,16 @@ int dispatcherRemoteEventGetResponse(xLinkEvent_t* event, xLinkEvent_t* response
     }
     return 0;
 }
-
 //adds a new event with parameters and returns event id
 int dispatcherEventSend(xLinkEvent_t *event)
 {
     mvLog(MVLOG_DEBUG, "%s, size %d, streamId %d.\n", TypeToStr(event->header.type), event->header.size, event->header.streamId);
-    int rc = XLinkWrite(&event->deviceHandle, &event->header, sizeof(event->header), USB_DATA_TIMEOUT);
+#ifdef __PC__
+    int rc = XLinkWrite(&event->deviceHandle, &event->header, sizeof(event->header), XLINK_USB_DATA_TIMEOUT);
+#else
+    int rc = XLinkWrite(&event->deviceHandle, &event->header, sizeof(event->header), 0);
+#endif
+
     if(rc < 0)
     {
         mvLog(MVLOG_ERROR,"Write failed (header) (err %d) | event %s\n", rc, TypeToStr(event->header.type));
@@ -917,10 +1008,12 @@ int dispatcherEventSend(xLinkEvent_t *event)
     {
         //write requested data
         rc = XLinkWrite(&event->deviceHandle, event->data,
-                          event->header.size, USB_DATA_TIMEOUT);
-
+                        event->header.size, XLINK_USB_DATA_TIMEOUT);
         if(rc < 0) {
-            mvLog(MVLOG_ERROR,"Write failed (event) (err %d)\n", rc);
+            mvLog(MVLOG_ERROR,"Write failed %d\n", rc);
+#ifndef __PC__
+            return rc;
+#endif
         }
     }
     // this function will send events to the remote node
@@ -934,7 +1027,7 @@ static xLinkState_t getXLinkState(xLinkDesc_t* link)
     return link->peerState;
 }
 
-void dispatcherCloseLink(void*fd)
+void dispatcherCloseLink(void* fd, int fullClose)
 {
     xLinkDesc_t* link = getLink(fd);
 
@@ -943,30 +1036,36 @@ void dispatcherCloseLink(void*fd)
         return;
     }
 
+    if (!fullClose) {
+        link->peerState = XLINK_DOWN;
+        return;
+    }
+
+#ifndef __PC__
+    link->peerState = X_LINK_COMMUNICATION_NOT_OPEN;
+#else
     link->peerState = XLINK_NOT_INIT;
+#endif
+
     link->id = INVALID_LINK_ID;
     link->deviceHandle.xLinkFD = NULL;
     link->nextUniqueStreamId = 0;
 
-    int index;
-    uint32_t release_size = 0;
-    streamDesc_t* stream;
-    for (index = 0; index < XLINK_MAX_STREAMS; index++)
-    {
-        stream = &link->availableStreams[index];
-        while (NULL != getPacketFromStream(stream))
-        {
-            releasePacketFromStream(stream, &release_size);
+    for (int index = 0; index < XLINK_MAX_STREAMS; index++) {
+        streamDesc_t* stream = &link->availableStreams[index];
+        if (!stream) {
+            continue;
         }
-        while (stream->blockedPackets != 0)
-        {
-            releasePacketFromStream(stream, &release_size);
+
+        while (getPacketFromStream(stream) || stream->blockedPackets) {
+            releasePacketFromStream(stream, NULL);
         }
-        if (stream->name[0] != '\0')
-        {
-            sem_destroy(&stream->sem); // ignore the error for some unused semaphore
+
+        if (is_semaphore_initialized(stream)) {
+            sem_destroy(&stream->sem);
             stream->name[0] = '\0';
         }
+
         stream->id = INVALID_STREAM_ID;
     }
 }
@@ -976,10 +1075,10 @@ void dispatcherCloseDeviceFd(xLinkDeviceHandle_t* deviceHandle)
     XLinkPlatformCloseRemote(deviceHandle);
 }
 
-
 /*#################################################################################
 ###################################### EXTERNAL ###################################
 ##################################################################################*/
+
 //Called only from app - per device
 XLinkError_t XLinkConnect(XLinkHandler_t* handler)
 {
@@ -997,7 +1096,7 @@ XLinkError_t XLinkConnect(XLinkHandler_t* handler)
 
     link->deviceHandle.protocol = handler->protocol;
     if (XLinkPlatformConnect(handler->devicePath2, handler->devicePath,
-        link->deviceHandle.protocol, &link->deviceHandle.xLinkFD) < 0) {
+                             link->deviceHandle.protocol, &link->deviceHandle.xLinkFD) < 0) {
         return X_LINK_ERROR;
     }
 
@@ -1005,6 +1104,7 @@ XLinkError_t XLinkConnect(XLinkHandler_t* handler)
         return X_LINK_TIMEOUT;
 
     xLinkEvent_t event = {0};
+
     event.header.type = XLINK_PING_REQ;
     event.deviceHandle = link->deviceHandle;
     dispatcherAddEvent(EVENT_LOCAL, &event);
@@ -1016,21 +1116,40 @@ XLinkError_t XLinkConnect(XLinkHandler_t* handler)
 
     link->id = getNextAvailableLinkUniqueId();
     link->peerState = XLINK_UP;
+    link->hostClosedFD = 0;
     handler->linkId = link->id;
-
     return X_LINK_SUCCESS;
 }
 
 XLinkError_t XLinkInitialize(XLinkGlobalHandler_t* handler)
 {
+#ifndef __PC__
+    mvLogLevelSet(MVLOG_FATAL);
+    mvLogDefaultLevelSet(MVLOG_FATAL);
+#endif
+
+    ASSERT_X_LINK(handler);
     ASSERT_X_LINK(XLINK_MAX_STREAMS <= MAX_POOLS_ALLOC);
     glHandler = handler;
-    sem_init(&pingSem,0,0);
+    if (sem_init(&pingSem,0,0)) {
+        mvLog(MVLOG_ERROR, "Can't create semaphore\n");
+    }
     int i;
 
     XLinkPlatformInit();
+
+    //Using deprecated fields. Begin.
+    int loglevel = handler->loglevel;
+    int protocol = handler->protocol;
+    //Using deprecated fields. End.
+
     memset((void*)handler, 0, sizeof(XLinkGlobalHandler_t));
 
+    //Using deprecated fields. Begin.
+    handler->loglevel = loglevel;
+    handler->protocol = protocol;
+    //Using deprecated fields. End.
+
     //initialize availableStreams
     xLinkDesc_t* link;
     for (i = 0; i < MAX_LINKS; i++) {
@@ -1051,12 +1170,27 @@ XLinkError_t XLinkInitialize(XLinkGlobalHandler_t* handler)
     controlFunctionTbl.closeDeviceFd = &dispatcherCloseDeviceFd;
 
     if (dispatcherInitialize(&controlFunctionTbl))
+    {
+#ifdef __PC__
         return X_LINK_TIMEOUT;
+#endif
+    }
+
+#ifndef __PC__
+    int index = getNextAvailableLinkIndex();
+    if (index == -1)
+        return X_LINK_COMMUNICATION_NOT_OPEN;
 
+    link = &availableXLinks[index];
+    link->deviceHandle.xLinkFD = NULL;
+    link->id = nextUniqueLinkId++;
+    link->peerState = XLINK_UP;
+
+    sem_wait(&pingSem);
+#endif
     return X_LINK_SUCCESS;
 }
 
-
 XLinkError_t XLinkGetFillLevel(streamId_t streamId, int isRemote, int* fillLevel)
 {
     linkId_t id;
@@ -1082,6 +1216,11 @@ XLinkError_t XLinkGetFillLevel(streamId_t streamId, int isRemote, int* fillLevel
 
 streamId_t XLinkOpenStream(linkId_t id, const char* name, int stream_write_size)
 {
+    ASSERT_X_LINK(name);
+    if (stream_write_size < 0) {
+        return X_LINK_ERROR;
+    }
+
     xLinkEvent_t event = {0};
     xLinkDesc_t* link = getLinkById(id);
     mvLog(MVLOG_DEBUG,"%s() id %d link %p\n", __func__, id, link);
@@ -1092,8 +1231,7 @@ streamId_t XLinkOpenStream(linkId_t id, const char* name, int stream_write_size)
         return INVALID_STREAM_ID;
     }
 
-    if(strlen(name) > MAX_STREAM_NAME_LENGTH)
-    {
+    if(strlen(name) > MAX_STREAM_NAME_LENGTH) {
         mvLog(MVLOG_WARN,"name too long\n");
         return INVALID_STREAM_ID;
     }
@@ -1103,7 +1241,7 @@ streamId_t XLinkOpenStream(linkId_t id, const char* name, int stream_write_size)
         stream_write_size = ALIGN_UP(stream_write_size, __CACHE_LINE_SIZE);
         event.header.type = XLINK_CREATE_STREAM_REQ;
         mv_strncpy(event.header.streamName, MAX_STREAM_NAME_LENGTH,
-            name, MAX_STREAM_NAME_LENGTH - 1);
+                   name, MAX_STREAM_NAME_LENGTH - 1);
         event.header.size = stream_write_size;
         event.header.streamId = INVALID_STREAM_ID;
         event.deviceHandle = link->deviceHandle;
@@ -1112,6 +1250,7 @@ streamId_t XLinkOpenStream(linkId_t id, const char* name, int stream_write_size)
         if (dispatcherWaitEventComplete(&link->deviceHandle, DEFAULT_TIMEOUT))
             return INVALID_STREAM_ID;
 
+#ifdef __PC__
         XLinkError_t eventStatus = checkEventHeader(event.header);
         if (eventStatus != X_LINK_SUCCESS) {
             mvLog(MVLOG_ERROR, "Got wrong package from device, error code = %s", XLinkErrorToStr(eventStatus));
@@ -1122,14 +1261,23 @@ streamId_t XLinkOpenStream(linkId_t id, const char* name, int stream_write_size)
                 return INVALID_STREAM_ID;
             }
         }
+#endif
     }
     streamId_t streamId = getStreamIdByName(link, name);
 
+#ifdef __PC__
     if (streamId > 0x0FFFFFFF) {
         mvLog(MVLOG_ERROR, "Cannot find stream id by the \"%s\" name", name);
         mvLog(MVLOG_ERROR,"Max streamId reached!");
         return INVALID_STREAM_ID;
     }
+#else
+    if (streamId == INVALID_STREAM_ID) {
+        mvLog(MVLOG_ERROR,"Max streamId reached %x!", streamId);
+        return INVALID_STREAM_ID;
+    }
+#endif
+
     COMBIN_IDS(streamId, id);
     return streamId;
 }
@@ -1157,7 +1305,6 @@ XLinkError_t checkEventHeader(xLinkEventHeader_t header) {
     }
 }
 
-
 // Just like open stream, when closeStream is called
 // on the local size we are resetting the writeSize
 // and on the remote side we are freeing the read buffer
@@ -1176,7 +1323,8 @@ XLinkError_t XLinkCloseStream(streamId_t streamId)
     event.header.type = XLINK_CLOSE_STREAM_REQ;
     event.header.streamId = streamId;
     event.deviceHandle = link->deviceHandle;
-    if (dispatcherAddEvent(EVENT_LOCAL, &event) == NULL) {
+    xLinkEvent_t* ev = dispatcherAddEvent(EVENT_LOCAL, &event);
+    if (ev == NULL) {
         mvLog(MVLOG_ERROR, "Dispatcher failed on adding event");
         return X_LINK_ERROR;
     }
@@ -1184,9 +1332,7 @@ XLinkError_t XLinkCloseStream(streamId_t streamId)
     if (dispatcherWaitEventComplete(&link->deviceHandle, DEFAULT_TIMEOUT))
         return X_LINK_TIMEOUT;
 
-    if (event.header.flags.bitField.ack == 1)
-        return X_LINK_SUCCESS;
-    else
+    if (event.header.flags.bitField.ack != 1)
         return X_LINK_COMMUNICATION_FAIL;
 
     return X_LINK_SUCCESS;
@@ -1201,22 +1347,43 @@ XLinkError_t XLinkGetAvailableStreams(linkId_t id)
     {
         return X_LINK_COMMUNICATION_NOT_OPEN;
     }
+    /*...get other statuses*/
     return X_LINK_SUCCESS;
 }
 
-XLinkError_t XLinkFindDevice(int index, XLinkDeviceState_t state,
-    deviceDesc_t* in_deviceRequirements, deviceDesc_t* out_foundDevice)
+XLinkError_t XLinkFindFirstSuitableDevice(XLinkDeviceState_t state,
+                                          const deviceDesc_t in_deviceRequirements,
+                                          deviceDesc_t *out_foundDevice)
 {
-    memset(out_foundDevice, 0, sizeof(struct deviceDesc_t));
+    ASSERT_X_LINK(out_foundDevice);
 
     xLinkPlatformErrorCode_t rc;
-    rc = XLinkPlatformFindDeviceName(index, state, in_deviceRequirements, out_foundDevice);
+    rc = XLinkPlatformFindDeviceName(state, in_deviceRequirements, out_foundDevice);
+    return parseUsbLinkPlatformError(rc);
+}
+
+XLinkError_t XLinkFindAllSuitableDevices(XLinkDeviceState_t state,
+                                         deviceDesc_t in_deviceRequirements,
+                                         deviceDesc_t *out_foundDevicesPtr,
+                                         const unsigned int devicesArraySize,
+                                         unsigned int* out_amountOfFoundDevices) {
+    ASSERT_X_LINK(out_foundDevicesPtr);
+    ASSERT_X_LINK(devicesArraySize > 0);
+    ASSERT_X_LINK(out_amountOfFoundDevices);
+
+    xLinkPlatformErrorCode_t rc;
+    rc = XLinkPlatformFindArrayOfDevicesNames(
+        state, in_deviceRequirements,
+        out_foundDevicesPtr, devicesArraySize, out_amountOfFoundDevices);
+
     return parseUsbLinkPlatformError(rc);
 }
 
 static XLinkError_t writeData(streamId_t streamId, const uint8_t* buffer,
-                            int size, unsigned int timeout)
+                              int size, unsigned int timeout)
 {
+    ASSERT_X_LINK(buffer);
+
     linkId_t id;
     EXTRACT_IDS(streamId,id);
     xLinkDesc_t* link = getLinkById(id);
@@ -1225,8 +1392,6 @@ static XLinkError_t writeData(streamId_t streamId, const uint8_t* buffer,
     {
         return X_LINK_COMMUNICATION_NOT_OPEN;
     }
-    struct timespec start, end;
-    clock_gettime(CLOCK_REALTIME, &start);
 
     xLinkEvent_t event = {0};
     event.header.type = XLINK_WRITE_REQ;
@@ -1235,10 +1400,15 @@ static XLinkError_t writeData(streamId_t streamId, const uint8_t* buffer,
     event.deviceHandle = link->deviceHandle;
     event.data = (void*)buffer;
 
-    if (dispatcherAddEvent(EVENT_LOCAL, &event) == NULL) {
+    struct timespec start, end;
+    clock_gettime(CLOCK_REALTIME, &start);
+
+    xLinkEvent_t* ev = dispatcherAddEvent(EVENT_LOCAL, &event);
+    if (ev == NULL) {
         mvLog(MVLOG_ERROR, "Dispatcher failed on adding event");
         return X_LINK_ERROR;
     }
+
     if (dispatcherWaitEventComplete(&link->deviceHandle, timeout))
         return X_LINK_TIMEOUT;
 
@@ -1246,7 +1416,7 @@ static XLinkError_t writeData(streamId_t streamId, const uint8_t* buffer,
 
     if (event.header.flags.bitField.ack == 1)
     {
-         //profile only on success
+        //profile only on success
         if( glHandler->profEnable)
         {
             glHandler->profilingData.totalWriteBytes += size;
@@ -1265,17 +1435,17 @@ XLinkError_t XLinkWriteData(streamId_t streamId, const uint8_t* buffer,
 }
 
 XLinkError_t XLinkWriteDataWithTimeout(streamId_t streamId, const uint8_t* buffer,
-                            int size, unsigned int timeout)
+                                       int size, unsigned int timeout)
 {
     return writeData(streamId, buffer, size, timeout);
 }
 
-
 XLinkError_t XLinkWriteGraphData(streamId_t streamId, const uint8_t* buffer, int size)
 {
     return writeData(streamId, buffer, size, glAllocateGraphTimeOutMsec);
 }
 
+
 XLinkError_t XLinkAsyncWriteData()
 {
     if (getXLinkState(NULL) != XLINK_UP)
@@ -1302,20 +1472,21 @@ XLinkError_t XLinkReadDataWithTimeOut(streamId_t streamId, streamPacketDesc_t**
     }
 
     xLinkEvent_t event = {0};
-    struct timespec start, end;
-
     event.header.type = XLINK_READ_REQ;
     event.header.size = 0;
     event.header.streamId = streamId;
     event.deviceHandle = link->deviceHandle;
     event.data = NULL;
 
+    struct timespec start, end;
     clock_gettime(CLOCK_REALTIME, &start);
 
-    if (dispatcherAddEvent(EVENT_LOCAL, &event) == NULL) {
+    xLinkEvent_t* ev = dispatcherAddEvent(EVENT_LOCAL, &event);
+    if (ev == NULL) {
         mvLog(MVLOG_ERROR, "Dispatcher failed on adding event");
         return X_LINK_ERROR;
     }
+
     if (dispatcherWaitEventComplete(&link->deviceHandle, timeout))
         return X_LINK_TIMEOUT;
 
@@ -1327,17 +1498,16 @@ XLinkError_t XLinkReadDataWithTimeOut(streamId_t streamId, streamPacketDesc_t**
     *packet = (streamPacketDesc_t *)event.data;
     clock_gettime(CLOCK_REALTIME, &end);
 
-    if (event.header.flags.bitField.ack == 1)
+    if (event.header.flags.bitField.ack != 1)
+        return X_LINK_COMMUNICATION_FAIL;
+
+    if( glHandler->profEnable)
     {
-        if( glHandler->profEnable)
-        {
-            glHandler->profilingData.totalReadBytes += (*packet)->length;
-            glHandler->profilingData.totalReadTime += timespec_diff(&start, &end);
-        }
-        return X_LINK_SUCCESS;
+        glHandler->profilingData.totalReadBytes += (*packet)->length;
+        glHandler->profilingData.totalReadTime += timespec_diff(&start, &end);
     }
-    else
-        return X_LINK_COMMUNICATION_FAIL;
+
+    return X_LINK_SUCCESS;
 }
 
 XLinkError_t XLinkReleaseData(streamId_t streamId)
@@ -1356,10 +1526,12 @@ XLinkError_t XLinkReleaseData(streamId_t streamId)
     event.header.streamId = streamId;
     event.deviceHandle = link->deviceHandle;
 
-    if (dispatcherAddEvent(EVENT_LOCAL, &event) == NULL) {
+    xLinkEvent_t* ev = dispatcherAddEvent(EVENT_LOCAL, &event);
+    if (ev == NULL) {
         mvLog(MVLOG_ERROR, "Dispatcher failed on adding event");
         return X_LINK_ERROR;
     }
+
     if (dispatcherWaitEventComplete(&link->deviceHandle, DEFAULT_TIMEOUT))
         return X_LINK_TIMEOUT;
 
@@ -1369,7 +1541,7 @@ XLinkError_t XLinkReleaseData(streamId_t streamId)
         return X_LINK_COMMUNICATION_FAIL;
 }
 
-XLinkError_t XLinkBootRemote(deviceDesc_t* deviceDesc, const char* binaryPath)
+XLinkError_t XLinkBoot(deviceDesc_t* deviceDesc, const char* binaryPath)
 {
     if (XLinkPlatformBootRemote(deviceDesc, binaryPath) == 0)
         return X_LINK_SUCCESS;
@@ -1392,9 +1564,9 @@ XLinkError_t XLinkResetRemote(linkId_t id)
     xLinkEvent_t event = {0};
     event.header.type = XLINK_RESET_REQ;
     event.deviceHandle = link->deviceHandle;
-    mvLog(MVLOG_DEBUG,"sending reset remote event\n");
+    mvLog(MVLOG_DEBUG, "sending reset remote event\n");
     dispatcherAddEvent(EVENT_LOCAL, &event);
-    if (dispatcherWaitEventComplete(&link->deviceHandle, DEFAULT_TIMEOUT))
+    if (dispatcherWaitEventComplete(&link->deviceHandle, glDeviceOpenTimeOutMsec))
         return X_LINK_TIMEOUT;
 
     return X_LINK_SUCCESS;
@@ -1407,8 +1579,7 @@ XLinkError_t XLinkResetAll()
 #else
     int i;
     for (i = 0; i < MAX_LINKS; i++) {
-        if (availableXLinks[i].id != INVALID_LINK_ID &&
-            availableXLinks[i].deviceHandle.protocol != X_LINK_PCIE) {
+        if (availableXLinks[i].id != INVALID_LINK_ID) {
             xLinkDesc_t* link = &availableXLinks[i];
             int stream;
             for (stream = 0; stream < XLINK_MAX_STREAMS; stream++) {
@@ -1477,4 +1648,90 @@ XLinkError_t XLinkProfPrint()
     }
     return X_LINK_SUCCESS;
 }
+
+// ------------------------------------
+// Deprecated API. Begin.
+// ------------------------------------
+
+XLinkError_t getDeviceName(int index, char* name, int nameSize, XLinkPlatform_t platform, XLinkDeviceState_t state)
+{
+    ASSERT_X_LINK(name != NULL);
+    ASSERT_X_LINK(index >= 0);
+    ASSERT_X_LINK(nameSize >= 0 && nameSize <= XLINK_MAX_NAME_SIZE);
+
+    deviceDesc_t in_deviceRequirements = {};
+    in_deviceRequirements.protocol = glHandler != NULL ? glHandler->protocol : USB_VSC;
+    in_deviceRequirements.platform = platform;
+    memset(name, 0, nameSize);
+
+    if(index == 0)
+    {
+        deviceDesc_t deviceToBoot = {};
+        XLinkError_t rc =
+            XLinkFindFirstSuitableDevice(state, in_deviceRequirements, &deviceToBoot);
+        if(rc != X_LINK_SUCCESS)
+        {
+            return rc;
+        }
+
+        return mv_strcpy(name, nameSize, deviceToBoot.name) == EOK ? X_LINK_SUCCESS : X_LINK_ERROR;
+    }
+    else
+    {
+        deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {};
+        unsigned int numberOfDevices = 0;
+        XLinkError_t rc =
+            XLinkFindAllSuitableDevices(state, in_deviceRequirements,
+                                        deviceDescArray, XLINK_MAX_DEVICES, &numberOfDevices);
+        if(rc != X_LINK_SUCCESS)
+        {
+            return rc;
+        }
+
+        if((unsigned int)index >= numberOfDevices)
+        {
+            return X_LINK_DEVICE_NOT_FOUND;
+        }
+
+        return mv_strcpy(name, nameSize, deviceDescArray[index].name) == EOK ? X_LINK_SUCCESS : X_LINK_ERROR;
+    }
+}
+
+XLinkError_t XLinkGetDeviceName(int index, char* name, int nameSize)
+{
+    return getDeviceName(index, name, nameSize, X_LINK_ANY_PLATFORM, X_LINK_ANY_STATE);
+}
+XLinkError_t XLinkGetDeviceNameExtended(int index, char* name, int nameSize, int pid)
+{
+    XLinkDeviceState_t state = XLinkPlatformPidToState(pid);
+    XLinkPlatform_t platform = XLinkPlatformPidToPlatform(pid);
+
+    return getDeviceName(index, name, nameSize, platform, state);
+}
+
+XLinkError_t XLinkBootRemote(const char* deviceName, const char* binaryPath)
+{
+    ASSERT_X_LINK(deviceName != NULL);
+    ASSERT_X_LINK(binaryPath != NULL);
+
+    deviceDesc_t deviceDesc = {};
+    deviceDesc.protocol = glHandler != NULL ? glHandler->protocol : USB_VSC;
+    mv_strcpy(deviceDesc.name, XLINK_MAX_NAME_SIZE, deviceName);
+
+    return XLinkBoot(&deviceDesc, binaryPath);
+}
+
+XLinkError_t XLinkDisconnect(linkId_t id)
+{
+    xLinkDesc_t* link = getLinkById(id);
+    ASSERT_X_LINK(link != NULL);
+
+    link->hostClosedFD = 1;
+    return XLinkPlatformCloseRemote(&link->deviceHandle);
+}
+
+// ------------------------------------
+// Deprecated API. End.
+// ------------------------------------
+
 /* end of file */
index f996cac..42f99a9 100644 (file)
@@ -6,6 +6,7 @@
 /// @file
 /// @brief     Application configuration Leon header
 ///
+
 #ifndef _XLINK_H
 #define _XLINK_H
 #include "XLinkPublicDefines.h"
@@ -43,11 +44,19 @@ XLinkError_t XLinkGetAvailableStreams(linkId_t id);
 
 /**
  * @brief Return Myriad device description which meets the requirements
- * @param index a set of parameters that the device must comply with
- * @param index Return device on index from suitable devices list
  */
-XLinkError_t XLinkFindDevice(int index, XLinkDeviceState_t state,
-    deviceDesc_t* in_deviceRequirements, deviceDesc_t* out_foundDevice);
+XLinkError_t XLinkFindFirstSuitableDevice(XLinkDeviceState_t state,
+                                          const deviceDesc_t in_deviceRequirements,
+                                          deviceDesc_t *out_foundDevice);
+
+/**
+ * @brief Return Myriad device description which meets the requirements
+ */
+XLinkError_t XLinkFindAllSuitableDevices(XLinkDeviceState_t state,
+                                         const deviceDesc_t in_deviceRequirements,
+                                         deviceDesc_t *out_foundDevicesPtr,
+                                         const unsigned int devicesArraySize,
+                                         unsigned int *out_amountOfFoundDevices);
 
 // Send a package to initiate the writing of data to a remote stream
 // Note that the actual size of the written data is ALIGN_UP(size, 64)
@@ -72,7 +81,7 @@ XLinkError_t XLinkReleaseData(streamId_t streamId);
 XLinkError_t XLinkGetFillLevel(streamId_t streamId, int isRemote, int* fillLevel);
 
 // Boot the remote (This is intended as an interface to boot the Myriad from PC)
-XLinkError_t XLinkBootRemote(deviceDesc_t* deviceDesc, const char* binaryPath);
+XLinkError_t XLinkBoot(deviceDesc_t* deviceDesc, const char* binaryPath);
 
 // Reset the remote
 XLinkError_t XLinkResetRemote(linkId_t id);
@@ -87,6 +96,21 @@ XLinkError_t XLinkProfPrint();
 
 XLinkError_t XLinkWriteGraphData(streamId_t streamId, const uint8_t* buffer, int size);
 
+// ------------------------------------
+// Deprecated API. Begin.
+// ------------------------------------
+
+XLinkError_t XLinkGetDeviceName(int index, char* name, int nameSize);
+XLinkError_t XLinkGetDeviceNameExtended(int index, char* name, int nameSize, int pid);
+
+XLinkError_t XLinkBootRemote(const char* deviceName, const char* binaryPath);
+
+XLinkError_t XLinkDisconnect(linkId_t id);
+
+// ------------------------------------
+// Deprecated API. End.
+// ------------------------------------
+
 #ifdef __cplusplus
 }
 #endif
index 252755e..9927ee4 100644 (file)
@@ -7,27 +7,31 @@
 ///
 /// @brief     Application configuration Leon header
 ///
-
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE // fix for warning: implicit declaration of function â€˜pthread_setname_np’
 #endif
+
 #include "stdio.h"
 #include "stdint.h"
 #include "stdlib.h"
 #include "string.h"
-
 #include <assert.h>
 #include <stdlib.h>
+
 #if (defined(_WIN32) || defined(_WIN64))
-#include "win_pthread.h"
-#include "win_semaphore.h"
+# include "win_pthread.h"
+# include "win_semaphore.h"
 #else
-#include <pthread.h>
-#include <semaphore.h>
+# include <pthread.h>
+# ifndef __APPLE__
+#  include <semaphore.h>
+# endif
 #endif
+
 #include "XLinkDispatcher.h"
 #include "XLinkPrivateDefines.h"
 #include "XLink.h"
+#include "XLink_tool.h"
 
 #define MVLOG_UNIT_NAME xLink
 #include "mvLog.h"
@@ -42,11 +46,11 @@ typedef enum {
 
 typedef struct xLinkEventPriv_t {
     xLinkEvent_t packet;
+    xLinkEvent_t *retEv;
     xLinkEventState_t isServed;
     xLinkEventOrigin_t origin;
     sem_t* sem;
     void* data;
-    xLinkEvent_t * retEv;
     uint32_t pad;
 } xLinkEventPriv_t;
 
@@ -72,6 +76,8 @@ typedef struct {
     xLinkDeviceHandle_t deviceHandle; //will be device handler
     int schedulerId;
 
+    int queueProcPriority;
+
     sem_t addEventSem;
     sem_t notifyDispatcherSem;
     volatile uint32_t resetXLink;
@@ -83,22 +89,6 @@ typedef struct {
     localSem_t eventSemaphores[MAXIMUM_SEMAPHORES];
 } xLinkSchedulerState_t;
 
-
-#define CIRCULAR_INCREMENT(x, maxVal, base) \
-    { \
-        x++; \
-        if (x == maxVal) \
-            x = base; \
-    }
-//avoid problems with unsigned. first compare and then give the nuw value
-#define CIRCULAR_DECREMENT(x, maxVal, base) \
-{ \
-    if (x == base) \
-        x = maxVal - 1; \
-    else \
-        x--; \
-}
-
 extern char* TypeToStr(int type);
 
 #if (defined(_WIN32) || defined(_WIN64))
@@ -112,10 +102,11 @@ int numSchedulers;
 xLinkSchedulerState_t schedulerState[MAX_SCHEDULERS];
 sem_t addSchedulerSem;
 
+//below workaround for "C2088 '==': illegal for struct" error
 int pthread_t_compare(pthread_t a, pthread_t b)
 {
 #if (defined(_WIN32) || defined(_WIN64) )
-       return ((a.tid == b.tid));
+    return ((a.tid == b.tid));
 #else
     return  (a == b);
 #endif
@@ -186,7 +177,7 @@ static sem_t* createSem(xLinkSchedulerState_t* curr)
         }
         else
             return NULL;
-       return sem;
+        return sem;
     }
 }
 
@@ -199,7 +190,7 @@ static void* eventReader(void* ctx)
     xLinkSchedulerState_t *curr = (xLinkSchedulerState_t*)ctx;
     ASSERT_X_LINK_R(curr, NULL);
 
-    xLinkEvent_t event = { 0 };
+    xLinkEvent_t event = { 0 };// to fix error C4700 in win
     event.header.id = -1;
     event.deviceHandle = curr->deviceHandle;
 
@@ -208,28 +199,30 @@ static void* eventReader(void* ctx)
     while (!curr->resetXLink) {
         int sc = glControlFunc->eventReceive(&event);
         mvLog(MVLOG_DEBUG,"Reading %s (scheduler %d, fd %p, event id %d, event stream_id %u, event size %u)\n",
-            TypeToStr(event.header.type), curr->schedulerId, event.deviceHandle.xLinkFD, event.header.id, event.header.streamId, event.header.size);
+              TypeToStr(event.header.type), curr->schedulerId, event.deviceHandle.xLinkFD, event.header.id, event.header.streamId, event.header.size);
 
+#ifdef __PC__
         if (event.header.type == XLINK_RESET_RESP) {
             curr->resetXLink = 1;
             mvLog(MVLOG_INFO,"eventReader thread stopped: reset");
             break;
         }
+#endif
 
         if (sc) {
+            // Only run this logic on the host side, the FW does not need this logic
+#ifdef __PC__
             if (sem_post(&curr->notifyDispatcherSem)) {
                 mvLog(MVLOG_ERROR,"can't post semaphore\n"); // stop eventSchedulerRun thread
             }
             mvLog(MVLOG_ERROR,"eventReader thread stopped (err %d)", sc);
+#endif
             break;
         }
     }
-
     return 0;
 }
 
-
-
 static int isEventTypeRequest(xLinkEventPriv_t* event)
 {
     if (event->packet.header.type < XLINK_REQUEST_LAST)
@@ -248,9 +241,9 @@ static void markEventReady(xLinkEventPriv_t* event)
     event->isServed = EVENT_READY;
 }
 
-static void markEventServed(xLinkEventPriv_t* event)
+static void eventPost(xLinkEventPriv_t* event)
 {
-    if(event->retEv){
+    if (event->retEv){
         // the xLinkEventPriv_t slot pointed by "event" will be
         // re-cycled as soon as we mark it as EVENT_SERVED,
         // so before that, we copy the result event into XLink API layer
@@ -261,9 +254,13 @@ static void markEventServed(xLinkEventPriv_t* event)
             mvLog(MVLOG_ERROR,"can't post semaphore\n");
         }
     }
-    event->isServed = EVENT_SERVED;
 }
 
+static void markEventServed(xLinkEventPriv_t* event)
+{
+    eventPost(event);
+    event->isServed = EVENT_SERVED;
+}
 
 static int dispatcherRequestServe(xLinkEventPriv_t * event, xLinkSchedulerState_t* curr){
     ASSERT_X_LINK(curr != NULL);
@@ -271,10 +268,15 @@ static int dispatcherRequestServe(xLinkEventPriv_t * event, xLinkSchedulerState_
     xLinkEventHeader_t *header = &event->packet.header;
     if (header->flags.bitField.block){ //block is requested
         markEventBlocked(event);
-    }else if(header->flags.bitField.localServe == 1 ||
-             (header->flags.bitField.ack == 0
-             && header->flags.bitField.nack == 1)){ //this event is served locally, or it is failed
+    } else if(header->flags.bitField.localServe == 1 ||
+              (header->flags.bitField.ack == 0
+               && header->flags.bitField.nack == 1)){ //this event is served locally, or it is failed
+#ifdef __PC__
         markEventServed(event);
+#else
+        eventPost(event);
+        return 1;
+#endif
     }else if (header->flags.bitField.ack == 1
               && header->flags.bitField.nack == 0){
         event->isServed = EVENT_PENDING;
@@ -298,11 +300,11 @@ static int dispatcherResponseServe(xLinkEventPriv_t * event, xLinkSchedulerState
         xLinkEventHeader_t *evHeader = &event->packet.header;
 
         if (curr->lQueue.q[i].isServed == EVENT_PENDING &&
-                        header->id == evHeader->id &&
-                        header->type == evHeader->type - XLINK_REQUEST_LAST -1)
+            header->id == evHeader->id &&
+            header->type == evHeader->type - XLINK_REQUEST_LAST -1)
         {
             mvLog(MVLOG_DEBUG,"----------------------ISserved %s\n",
-                    TypeToStr(header->type));
+                  TypeToStr(header->type));
             //propagate back flags
             header->flags = evHeader->flags;
             markEventServed(&curr->lQueue.q[i]);
@@ -310,14 +312,14 @@ static int dispatcherResponseServe(xLinkEventPriv_t * event, xLinkSchedulerState
         }
     }
     if (i == MAX_EVENTS) {
-        mvLog(MVLOG_FATAL,"no request for this response: %s %d %d\n", TypeToStr(event->packet.header.type), event->origin, event->packet.header.id);
+        mvLog(MVLOG_FATAL,"no request for this response: %s %d\n", TypeToStr(event->packet.header.type), event->origin);
         printf("#### (i == MAX_EVENTS) %s %d %d\n", TypeToStr(event->packet.header.type), event->origin, (int)event->packet.header.id);
         for (i = 0; i < MAX_EVENTS; i++)
         {
             xLinkEventHeader_t *header = &curr->lQueue.q[i].packet.header;
 
             printf("%d) header->id %i, header->type %s(%i), curr->lQueue.q[i].isServed %i, EVENT_PENDING %i\n", i, (int)header->id
-                     , TypeToStr(header->type), header->type, curr->lQueue.q[i].isServed, EVENT_PENDING);
+                , TypeToStr(header->type), header->type, curr->lQueue.q[i].isServed, EVENT_PENDING);
 
         }
         ASSERT_X_LINK(0);
@@ -329,7 +331,7 @@ static inline xLinkEventPriv_t* getNextElementWithState(xLinkEventPriv_t* base,
                                                         xLinkEventPriv_t* start, xLinkEventState_t state){
     xLinkEventPriv_t* tmp = start;
     while (start->isServed != state){
-        CIRCULAR_INCREMENT(start, end, base);
+        CIRCULAR_INCREMENT_BASE(start, end, base);
         if(tmp == start){
             break;
         }
@@ -357,10 +359,10 @@ static xLinkEventPriv_t* searchForReadyEvent(xLinkSchedulerState_t* curr)
 
 static xLinkEventPriv_t* getNextQueueElemToProc(eventQueueHandler_t *q ){
     xLinkEventPriv_t* event = NULL;
-    event = getNextElementWithState(q->base, q->end, q->curProc, EVENT_ALLOCATED);
-    if(event != NULL) {
+    if (q->cur != q->curProc) {
+        event = getNextElementWithState(q->base, q->end, q->curProc, EVENT_ALLOCATED);
         q->curProc = event;
-        CIRCULAR_INCREMENT(q->curProc, q->end, q->base);
+        CIRCULAR_INCREMENT_BASE(q->curProc, q->end, q->base);
     }
     return event;
 }
@@ -375,7 +377,7 @@ static xLinkEvent_t* addNextQueueElemToProc(xLinkSchedulerState_t* curr,
     xLinkEvent_t* ev;
     xLinkEventPriv_t* eventP = getNextElementWithState(q->base, q->end, q->cur, EVENT_SERVED);
     if (eventP == NULL) {
-        mvLog(MVLOG_ERROR, "Can not get next element");
+        mvLog(MVLOG_ERROR, "getNextElementWithState returned NULL");
         return NULL;
     }
     mvLog(MVLOG_DEBUG, "Received event %s %d", TypeToStr(event->header.type), o);
@@ -385,7 +387,6 @@ static xLinkEvent_t* addNextQueueElemToProc(xLinkSchedulerState_t* curr,
             mvLog(MVLOG_WARN, "Failed to unref sem");
         }
     }
-
     eventP->sem = sem;
     eventP->packet = *event;
     eventP->origin = o;
@@ -395,10 +396,9 @@ static xLinkEvent_t* addNextQueueElemToProc(xLinkSchedulerState_t* curr,
     }else{
         eventP->retEv = NULL;
     }
-    // Mark eventP as ALLOCATED to prevent it from being allocated again
-    eventP->isServed = EVENT_ALLOCATED;
     q->cur = eventP;
-    CIRCULAR_INCREMENT(q->cur, q->end, q->base);
+    eventP->isServed = EVENT_ALLOCATED;
+    CIRCULAR_INCREMENT_BASE(q->cur, q->end, q->base);
     return ev;
 }
 
@@ -406,20 +406,26 @@ static xLinkEventPriv_t* dispatcherGetNextEvent(xLinkSchedulerState_t* curr)
 {
     ASSERT_X_LINK_R(curr != NULL, NULL);
 
+    if (XLinkWaitSem(&curr->notifyDispatcherSem)) {
+        mvLog(MVLOG_ERROR,"can't post semaphore\n");
+    }
+
     xLinkEventPriv_t* event = NULL;
     event = searchForReadyEvent(curr);
     if (event) {
         return event;
     }
-    if (XLinkWaitSem(&curr->notifyDispatcherSem)) {
-        mvLog(MVLOG_ERROR,"can't post semaphore\n");
-        return NULL;
-    }
-    event = getNextQueueElemToProc(&curr->lQueue);
+
+    eventQueueHandler_t* hPriorityQueue = curr->queueProcPriority ? &curr->lQueue : &curr->rQueue;
+    eventQueueHandler_t* lPriorityQueue = curr->queueProcPriority ? &curr->rQueue : &curr->lQueue;
+    curr->queueProcPriority = curr->queueProcPriority ? 0 : 1;
+
+    event = getNextQueueElemToProc(hPriorityQueue);
     if (event) {
         return event;
     }
-    event = getNextQueueElemToProc(&curr->rQueue);
+    event = getNextQueueElemToProc(lPriorityQueue);
+
     return event;
 }
 
@@ -436,7 +442,6 @@ static int isAvailableScheduler(xLinkSchedulerState_t* curr)
 
 static void closeDeviceFdAndResetScheduler(xLinkSchedulerState_t* curr)
 {
-
     mvLog(MVLOG_INFO, "Dispatcher Cleaning...");
     glControlFunc->closeDeviceFd(&curr->deviceHandle);
     curr->schedulerId = -1;
@@ -453,36 +458,34 @@ static void closeDeviceFdAndResetScheduler(xLinkSchedulerState_t* curr)
     }
     numSchedulers--;
     mvLog(MVLOG_INFO,"Cleaning Successfully\n");
-
 }
 
-
 static int dispatcherReset(xLinkSchedulerState_t* curr)
 {
     ASSERT_X_LINK(curr != NULL);
+#ifdef __PC__
     CHECK_MUTEX_SUCCESS_RC(pthread_mutex_lock(&reset_mutex), 1);
 
     if(!isAvailableScheduler(curr)) {
         CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&reset_mutex));
         return 1;
     }
+#endif
 
     mvLog(MVLOG_INFO, "Resetting...");
 
-    glControlFunc->closeLink(curr->deviceHandle.xLinkFD);
-
-    //notifyDispatcherSem +1 for NULL event, avoid dispatcher blocking.
+    glControlFunc->closeLink(curr->deviceHandle.xLinkFD, 1);
     if (sem_post(&curr->notifyDispatcherSem)) {
         mvLog(MVLOG_ERROR,"can't post semaphore\n"); //to allow us to get a NULL event
     }
-
     xLinkEventPriv_t* event = dispatcherGetNextEvent(curr);
     while (event != NULL) {
         mvLog(MVLOG_INFO, "dropped event is %s, status %d\n",
               TypeToStr(event->packet.header.type), event->isServed);
-        // although there is no no execution for this event, also mark it as being served without success
-        // caller will be informed and internal event memory slot will be de-allocated
+
+#ifdef __PC__
         markEventServed(event);
+#endif
         event = dispatcherGetNextEvent(curr);
     }
 
@@ -492,10 +495,20 @@ static int dispatcherReset(xLinkSchedulerState_t* curr)
         markEventServed(event);
         event = getNextElementWithState(curr->lQueue.base, curr->lQueue.end, curr->lQueue.base, EVENT_PENDING);
     }
+
+#ifdef __PC__
     closeDeviceFdAndResetScheduler(curr);
     CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&reset_mutex));
+#else
+    glControlFunc->closeDeviceFd(&curr->deviceHandle);
+    curr->schedulerId = -1;
+    numSchedulers--;
+#endif
+
+    mvLog(MVLOG_DEBUG,"Reset Successfully\n");
     return 0;
 }
+
 #if (defined(_WIN32) || defined(_WIN64))
 static void* __cdecl eventSchedulerRun(void* ctx)
 #else
@@ -512,11 +525,22 @@ static void* eventSchedulerRun(void* ctx)
     pthread_attr_t attr;
     int sc;
     int res;
-    if (pthread_attr_init(&attr) !=0) {
+    if (pthread_attr_init(&attr) != 0) {
         mvLog(MVLOG_ERROR,"pthread_attr_init error");
         return NULL;
     }
-
+#ifndef __PC__
+    if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED) != 0) {
+        pthread_attr_destroy(&attr);
+        mvLog(MVLOG_ERROR,"pthread_attr_setinheritsched error");
+        return NULL;
+    }
+    if (pthread_attr_setschedpolicy(&attr, SCHED_RR) != 0) {
+        pthread_attr_destroy(&attr);
+        mvLog(MVLOG_ERROR,"pthread_attr_setschedpolicy error");
+        return NULL;
+    }
+#endif
     sc = pthread_create(&readerThreadId, &attr, eventReader, curr);
     if (sc) {
         mvLog(MVLOG_ERROR, "Thread creation failed");
@@ -525,16 +549,21 @@ static void* eventSchedulerRun(void* ctx)
         }
         return NULL;
     }
-    char eventReaderThreadName[20];
+#ifndef __APPLE__
+    char eventReaderThreadName[MVLOG_MAXIMUM_THREAD_NAME_SIZE];
     snprintf(eventReaderThreadName, sizeof(eventReaderThreadName), "EventRead%.2dThr", schedulerId);
     sc = pthread_setname_np(readerThreadId, eventReaderThreadName);
     if (sc != 0) {
         perror("Setting name for event reader thread failed");
     }
+#endif
+#ifdef __PC__
     sc = pthread_attr_destroy(&attr);
     if (sc) {
         mvLog(MVLOG_WARN, "Thread attr destroy failed");
     }
+#endif
+
     xLinkEventPriv_t* event;
     xLinkEventPriv_t response;
 
@@ -542,10 +571,17 @@ static void* eventSchedulerRun(void* ctx)
 
     while (!curr->resetXLink) {
         event = dispatcherGetNextEvent(curr);
-        if (event == NULL) {
+        if(event == NULL)
+        {
+            mvLog(MVLOG_ERROR,"Dispatcher received NULL event!");
+            /// Skip the event instead of asserting, so only
+            /// the particular xlink chan will crash
+#ifdef __PC__
             break;
+#else
+            continue;
+#endif
         }
-
         ASSERT_X_LINK_R(event->packet.deviceHandle.xLinkFD == curr->deviceHandle.xLinkFD, NULL);
         getRespFunction getResp;
         xLinkEvent_t* toSend;
@@ -560,26 +596,28 @@ static void* eventSchedulerRun(void* ctx)
 
         res = getResp(&event->packet, &response.packet);
         if (isEventTypeRequest(event)){
+            int served = 0;
             if (event->origin == EVENT_LOCAL){ //we need to do this for locals only
-                dispatcherRequestServe(event, curr);
+                served = dispatcherRequestServe(event, curr);
             }
-            // For PCIE and in with Connect to booted option don't send reset request
-
-            if (res == 0 && event->packet.header.flags.bitField.localServe == 0){
-                // FIXME We shouldn't send reset request for PCIE and with turned on "NO_BOOT" cmake option
-                //  Also, we can't just close evenReader thread, as WinPthread don't have suitable function for this emergency exit,
-                //  so, let's pretend that would be ping request, and then we can correctly close eventReader thread
-
+            if (res == 0 && event->packet.header.flags.bitField.localServe == 0) {
+#ifndef __PC__
+                /*
+                 * Device part: reset device if sending failed
+                 */
+                ASSERT_X_LINK_R(glControlFunc->eventSend(toSend) == 0, NULL);
+#else
+                (void)served;
                 if (toSend->header.type == XLINK_RESET_REQ) {
                     if(toSend->deviceHandle.protocol == X_LINK_PCIE) {
                         toSend->header.type = XLINK_PING_REQ;
                         curr->resetXLink = 1;
-                        mvLog(MVLOG_INFO, "Request for reboot not sent");
+                        mvLog(MVLOG_DEBUG, "Request for reboot not sent, only ping event");
                     } else {
 #if defined(NO_BOOT)
                         toSend->header.type = XLINK_PING_REQ;
                         curr->resetXLink = 1;
-                        mvLog(MVLOG_INFO, "Request for reboot not sent");
+                        mvLog(MVLOG_INFO, "Request for reboot not sent, only ping event");
 #endif
                     }
                 }
@@ -587,14 +625,30 @@ static void* eventSchedulerRun(void* ctx)
                 if (glControlFunc->eventSend(toSend) != 0) {
                     mvLog(MVLOG_ERROR, "Event sending failed");
                 }
+#endif
+            }
+#ifndef __PC__
+            if (event->origin == EVENT_REMOTE || served) {
+                event->isServed = EVENT_SERVED;
             }
+#endif
         } else {
             if (event->origin == EVENT_REMOTE){ // match remote response with the local request
                 dispatcherResponseServe(event, curr);
             }
+#ifndef __PC__
+            event->isServed = EVENT_SERVED;
+#endif
         }
 
         //TODO: dispatcher shouldn't know about this packet. Seems to be easily move-able to protocol
+#ifndef __PC__
+        if (event->origin == EVENT_REMOTE) {
+            if (event->packet.header.type == XLINK_RESET_REQ) {
+                curr->resetXLink = 1;
+            }
+        }
+#else
         if (event->packet.header.type == XLINK_RESET_REQ) {
             curr->resetXLink = 1;
         }
@@ -603,13 +657,20 @@ static void* eventSchedulerRun(void* ctx)
         if (event->origin == EVENT_REMOTE){
             event->isServed = EVENT_SERVED;
         }
+#endif
     }
-
     sc = pthread_join(readerThreadId, NULL);
     if (sc) {
         mvLog(MVLOG_ERROR, "Waiting for thread failed");
     }
 
+#ifndef __PC__
+    if (pthread_attr_destroy(&attr) != 0) {
+        mvLog(MVLOG_ERROR,"pthread_attr_destroy error");
+        return NULL;
+    }
+#endif
+
     if (dispatcherReset(curr) != 0) {
         mvLog(MVLOG_WARN, "Failed to reset");
     }
@@ -655,7 +716,6 @@ xLinkEvent_t* dispatcherAddEvent(xLinkEventOrigin_t origin, xLinkEvent_t *event)
     if(curr->resetXLink) {
         return NULL;
     }
-
     mvLog(MVLOG_DEBUG, "Receiving event %s %d\n", TypeToStr(event->header.type), origin);
     if (XLinkWaitSem(&curr->addEventSem)) {
         mvLog(MVLOG_ERROR,"can't wait semaphore\n");
@@ -675,6 +735,7 @@ xLinkEvent_t* dispatcherAddEvent(xLinkEventOrigin_t origin, xLinkEvent_t *event)
             if (sem_post(&curr->addEventSem)) {
                 mvLog(MVLOG_ERROR,"can't post semaphore\n");
             }
+
             return NULL;
         }
         event->header.flags.raw = 0;
@@ -701,9 +762,12 @@ int dispatcherWaitEventComplete(xLinkDeviceHandle_t* deviceHandle, unsigned int
     if (id == NULL) {
         return -1;
     }
-
+#ifndef __PC__
+    (void)timeout;
+    return XLinkWaitSem(id);
+#else
     int rc = XLinkWaitSemUserMode(id, timeout);
-    if (rc && deviceHandle->protocol != X_LINK_PCIE) {
+    if (rc) {
         xLinkEvent_t event = {0};
         event.header.type = XLINK_RESET_REQ;
         event.deviceHandle = *deviceHandle;
@@ -716,6 +780,7 @@ int dispatcherWaitEventComplete(xLinkDeviceHandle_t* deviceHandle, unsigned int
     }
 
     return rc;
+#endif
 }
 
 int dispatcherUnblockEvent(eventId_t id, xLinkEventType_t type, streamId_t stream, void* xLinkFD)
@@ -731,13 +796,16 @@ int dispatcherUnblockEvent(eventId_t id, xLinkEventType_t type, streamId_t strea
     {
         if (blockedEvent->isServed == EVENT_BLOCKED &&
             ((blockedEvent->packet.header.id == id || id == -1)
-            && blockedEvent->packet.header.type == type
-            && blockedEvent->packet.header.streamId == stream))
+             && blockedEvent->packet.header.type == type
+             && blockedEvent->packet.header.streamId == stream))
         {
             mvLog(MVLOG_DEBUG,"unblocked**************** %d %s\n",
                   (int)blockedEvent->packet.header.id,
                   TypeToStr((int)blockedEvent->packet.header.type));
             markEventReady(blockedEvent);
+            if (sem_post(&curr->notifyDispatcherSem)){
+                mvLog(MVLOG_ERROR, "can't post semaphore\n");
+            }
             return 1;
         } else {
             mvLog(MVLOG_DEBUG,"%d %s\n",
@@ -762,10 +830,12 @@ int findAvailableScheduler()
  */
 int dispatcherStart(xLinkDeviceHandle_t* deviceHandle)
 {
+#ifdef __PC__
     if (deviceHandle->xLinkFD == NULL) {
         mvLog(MVLOG_ERROR, "Invalid device filedescriptor");
         return -1;
     }
+#endif
 
     pthread_attr_t attr;
     int eventIdx;
@@ -774,16 +844,16 @@ int dispatcherStart(xLinkDeviceHandle_t* deviceHandle)
         mvLog(MVLOG_ERROR,"Max number Schedulers reached!\n");
         return -1;
     }
-
     int idx = findAvailableScheduler();
-    if (idx < 0) {
-        mvLog(MVLOG_ERROR,"Available sheduler not found");
+    if (idx == -1) {
+        mvLog(MVLOG_ERROR,"Max number Schedulers reached!\n");
         return -1;
     }
 
     memset(&schedulerState[idx], 0, sizeof(xLinkSchedulerState_t));
 
     schedulerState[idx].semaphores = 0;
+    schedulerState[idx].queueProcPriority = 0;
 
     schedulerState[idx].resetXLink = 0;
     schedulerState[idx].deviceHandle = *deviceHandle;
@@ -811,7 +881,6 @@ int dispatcherStart(xLinkDeviceHandle_t* deviceHandle)
     }
     if (sem_init(&schedulerState[idx].notifyDispatcherSem, 0, 0)) {
         perror("Can't create semaphore\n");
-        return -1;
     }
     localSem_t* temp = schedulerState[idx].eventSemaphores;
     while (temp < schedulerState[idx].eventSemaphores + MAXIMUM_SEMAPHORES) {
@@ -820,9 +889,22 @@ int dispatcherStart(xLinkDeviceHandle_t* deviceHandle)
     }
     if (pthread_attr_init(&attr) != 0) {
         mvLog(MVLOG_ERROR,"pthread_attr_init error");
+#ifdef __PC__
         return -1;
+#endif
     }
 
+#ifndef __PC__
+    if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED) != 0) {
+        mvLog(MVLOG_ERROR,"pthread_attr_setinheritsched error");
+        pthread_attr_destroy(&attr);
+    }
+    if (pthread_attr_setschedpolicy(&attr, SCHED_RR) != 0) {
+        mvLog(MVLOG_ERROR,"pthread_attr_setschedpolicy error");
+        pthread_attr_destroy(&attr);
+    }
+#endif
+
     XLinkWaitSem(&addSchedulerSem);
     mvLog(MVLOG_DEBUG,"%s() starting a new thread - schedulerId %d \n", __func__, idx);
     int sc = pthread_create(&schedulerState[idx].xLinkThreadId,
@@ -834,22 +916,25 @@ int dispatcherStart(xLinkDeviceHandle_t* deviceHandle)
         if (pthread_attr_destroy(&attr) != 0) {
             perror("Thread attr destroy failed\n");
         }
+#ifdef __PC__
         return -1;
+#endif
     }
-
-    char schedulerThreadName[20];
+#ifndef __APPLE__
+    char schedulerThreadName[MVLOG_MAXIMUM_THREAD_NAME_SIZE];
     snprintf(schedulerThreadName, sizeof(schedulerThreadName), "Scheduler%.2dThr", schedulerState[idx].schedulerId);
     sc = pthread_setname_np(schedulerState[idx].xLinkThreadId, schedulerThreadName);
     if (sc != 0) {
         perror("Setting name for indexed scheduler thread failed");
     }
-
+#endif
+#ifdef __PC__
     pthread_detach(schedulerState[idx].xLinkThreadId);
-    numSchedulers++;
+#endif
 
-    sc = pthread_attr_destroy(&attr);
-    if (sc) {
-        perror("Thread attr destroy failed");
+    numSchedulers++;
+    if (pthread_attr_destroy(&attr) != 0) {
+        mvLog(MVLOG_ERROR,"pthread_attr_destroy error");
     }
 
     sem_post(&addSchedulerSem);
@@ -857,10 +942,9 @@ int dispatcherStart(xLinkDeviceHandle_t* deviceHandle)
     return 0;
 }
 
-/**
- * @brief Initialize dispatcher functions and reset all schedulers
- */
 int dispatcherInitialize(struct dispatcherControlFunctions* controlFunc) {
+    // create thread which will communicate with the pc
+
     int i;
     if (!controlFunc ||
         !controlFunc->eventReceive ||
@@ -879,7 +963,14 @@ int dispatcherInitialize(struct dispatcherControlFunctions* controlFunc) {
     for (i = 0; i < MAX_SCHEDULERS; i++){
         schedulerState[i].schedulerId = -1;
     }
+
+#ifndef __PC__
+    xLinkDeviceHandle_t temp = {0};
+    temp.protocol = X_LINK_ANY_PROTOCOL;
+    return dispatcherStart(&temp); //myriad has one
+#else
     return 0;
+#endif
 }
 
 int dispatcherClean(void* xLinkFD)
@@ -899,6 +990,4 @@ int dispatcherClean(void* xLinkFD)
     return 0;
 }
 
-
-
 /* end of file */
index df01d11..0b2bf15 100644 (file)
@@ -33,7 +33,7 @@ struct dispatcherControlFunctions {
                                 int (*eventReceive) (xLinkEvent_t*);
                                 getRespFunction localGetResponse;
                                 getRespFunction remoteGetResponse;
-                                void (*closeLink) (void* fd);
+                                void (*closeLink) (void* fd, int fullClose);
                                 void (*closeDeviceFd) (xLinkDeviceHandle_t* deviceHandle);
                                 };
 
@@ -45,4 +45,4 @@ int dispatcherClean(void* xLinkFD);
 }
 #endif
 
-#endif
+#endif
\ No newline at end of file
index 77acc3d..1de0b16 100644 (file)
@@ -2,6 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+///
+/// @brief     Application configuration Leon header
+///
+
 #ifndef _XLINK_LINKPLATFORM_H
 #define _XLINK_LINKPLATFORM_H
 
@@ -17,24 +21,40 @@ extern "C"
 #define MAX_POOLS_ALLOC 32
 #define PACKET_LENGTH (64*1024)
 
+typedef enum {
+    X_LINK_PLATFORM_SUCCESS = 0,
+    X_LINK_PLATFORM_DEVICE_NOT_FOUND = -1,
+    X_LINK_PLATFORM_ERROR = -2,
+    X_LINK_PLATFORM_TIMEOUT = -3,
+    X_LINK_PLATFORM_DRIVER_NOT_LOADED = -4
+} xLinkPlatformErrorCode_t;
+
+
 int XLinkWrite(xLinkDeviceHandle_t* deviceHandle, void* data, int size, unsigned int timeout);
 int XLinkRead(xLinkDeviceHandle_t* deviceHandle, void* data, int size, unsigned int timeout);
 int XLinkPlatformConnect(const char* devPathRead, const char* devPathWrite,
-    XLinkProtocol_t protocol, void** fd);
+                         XLinkProtocol_t protocol, void** fd);
 void XLinkPlatformInit();
 
 /**
- * @brief Return Myriad device name on index
+ * @brief Return Myriad device description which meets the requirements
  */
-int XLinkPlatformFindDeviceName(int index,
+xLinkPlatformErrorCode_t XLinkPlatformFindDeviceName(XLinkDeviceState_t state,
+                                                     const deviceDesc_t in_deviceRequirements,
+                                                     deviceDesc_t* out_foundDevice);
+
+xLinkPlatformErrorCode_t XLinkPlatformFindArrayOfDevicesNames(
     XLinkDeviceState_t state,
-    deviceDesc_t* in_deviceRequirements,
-    deviceDesc_t* out_foundDevice);
+    const deviceDesc_t in_deviceRequirements,
+    deviceDesc_t* out_foundDevicePtr,
+    const unsigned int devicesArraySize,
+    unsigned int *out_amountOfFoundDevices);
 
 int XLinkPlatformIsDescriptionValid(deviceDesc_t *in_deviceDesc);
 
-int XLinkPlatformToPid(const XLinkPlatform_t platform);
+int XLinkPlatformToPid(const XLinkPlatform_t platform, const XLinkDeviceState_t state);
 XLinkPlatform_t XLinkPlatformPidToPlatform(const int pid);
+XLinkDeviceState_t XLinkPlatformPidToState(const int pid);
 
 int XLinkPlatformBootRemote(deviceDesc_t* deviceDesc,
                             const char* binaryPath);
@@ -43,14 +63,6 @@ int XLinkPlatformCloseRemote(xLinkDeviceHandle_t* deviceHandle);
 void* allocateData(uint32_t size, uint32_t alignment);
 void deallocateData(void* ptr,uint32_t size, uint32_t alignment);
 
-typedef enum xLinkPlatformErrorCode {
-    X_LINK_PLATFORM_SUCCESS = 0,
-    X_LINK_PLATFORM_DEVICE_NOT_FOUND = -1,
-    X_LINK_PLATFORM_ERROR = -2,
-    X_LINK_PLATFORM_TIMEOUT = -3,
-    X_LINK_PLATFORM_DRIVER_NOT_LOADED = -4
-} xLinkPlatformErrorCode_t;
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/inference-engine/thirdparty/movidius/XLink/shared/XLinkPlatform_tool.h b/inference-engine/thirdparty/movidius/XLink/shared/XLinkPlatform_tool.h
new file mode 100644 (file)
index 0000000..52fb6e3
--- /dev/null
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef _XLINKPLATFORM_TOOL_H
+#define _XLINKPLATFORM_TOOL_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef NDEBUG  // Release configuration
+#ifndef __PC__
+        #define ASSERT_X_LINK_PLATFORM(x)   if(!(x)) { exit(EXIT_FAILURE); }
+        #define ASSERT_X_LINK_PLATFORM_R(x, r) ASSERT_X_LINK_PLATFORM(x)
+    #else
+        #define ASSERT_X_LINK_PLATFORM(x)   if(!(x)) { return X_LINK_PLATFORM_ERROR; }
+        #define ASSERT_X_LINK_PLATFORM_R(x, r)   if(!(x)) { return r; }
+    #endif
+#else   // Debug configuration
+#ifndef __PC__
+#define ASSERT_X_LINK_PLATFORM(x)   if(!(x)) { fprintf(stderr, "%s:%d:\n Assertion Failed: %s\n", __FILE__, __LINE__, #x); exit(EXIT_FAILURE); }
+#define ASSERT_X_LINK_PLATFORM_R(x, r) ASSERT_X_LINK_PLATFORM(x)
+#else
+#define ASSERT_X_LINK_PLATFORM(x)        if(!(x)) {  \
+            mvLog(MVLOG_ERROR, "%s:%d\n\t Assertion Failed: %s", __FILE__, __LINE__, #x);   \
+            return X_LINK_PLATFORM_ERROR;    \
+        }
+#define ASSERT_X_LINK_PLATFORM_R(x, r)   if(!(x)) {  \
+            mvLog(MVLOG_ERROR, "%s:%d\n\t Assertion Failed: %s", __FILE__, __LINE__, #x);   \
+            return r;               \
+        }
+#endif
+#endif //  NDEBUG
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_XLINKPLATFORM_TOOL_H
index 419136a..635bba3 100644 (file)
 #define _XLINKPRIVATEDEFINES_H
 
 #ifdef _XLINK_ENABLE_PRIVATE_INCLUDE_
-
-#include <stdint.h>
-#if (defined(_WIN32) || defined(_WIN64))
-#include "win_semaphore.h"
-#else
-#include <semaphore.h>
-#endif
+# if (defined(_WIN32) || defined(_WIN64))
+#  include "win_semaphore.h"
+# else
+#  ifdef __APPLE__
+#   include "pthread_semaphore.h"
+#  else
+#   include <semaphore.h>
+# endif
+# endif
 #include <XLinkPublicDefines.h>
 
 #ifdef __cplusplus
@@ -25,52 +27,11 @@ extern "C"
 {
 #endif
 
-#ifdef USE_USB_VSC
-#define HEADER_SIZE (64-12 -8)
-#else
 #define HEADER_SIZE (64-12 -8)
-#endif
 
 #define MAXIMUM_SEMAPHORES 32
 #define __CACHE_LINE_SIZE 64
 
-#ifdef NDEBUG  // Release configuration
-    #ifndef __PC__
-        #define ASSERT_X_LINK(x)   if(!(x)) { exit(EXIT_FAILURE); }
-        #define ASSERT_X_LINK_R(x, r) ASSERT_X_LINK(x)
-    #else
-        #define ASSERT_X_LINK(x)   if(!(x)) { return X_LINK_ERROR; }
-        #define ASSERT_X_LINK_R(x, r)   if(!(x)) { return r; }
-    #endif
-#else   // Debug configuration
-    #ifndef __PC__
-        #define ASSERT_X_LINK(x)   if(!(x)) { fprintf(stderr, "%s:%d:\n Assertion Failed: %s\n", __FILE__, __LINE__, #x); exit(EXIT_FAILURE); }
-        #define ASSERT_X_LINK_R(x, r) ASSERT_X_LINK(x)
-    #else
-        #define ASSERT_X_LINK(x)   if(!(x)) { fprintf(stderr, "%s:%d:\n Assertion Failed: %s\n", __FILE__, __LINE__, #x); return X_LINK_ERROR; }
-        #define ASSERT_X_LINK_R(x, r)   if(!(x)) { fprintf(stderr, "%s:%d:\n Assertion Failed: %s\n", __FILE__, __LINE__, #x); return r; }
-    #endif
-#endif //  NDEBUG
-
-#ifndef CHECK_MUTEX_SUCCESS
-#define CHECK_MUTEX_SUCCESS(call)  {                                \
-    int error;                                                      \
-    if ((error = (call))) {                                         \
-      mvLog(MVLOG_ERROR, "%s failed with error: %d", #call, error); \
-    }                                                               \
-}
-#endif  // CHECK_MUTEX_SUCCESS
-
-#ifndef CHECK_MUTEX_SUCCESS_RC
-#define CHECK_MUTEX_SUCCESS_RC(call, rc)  {                         \
-    int error;                                                      \
-    if ((error = (call))) {                                         \
-      mvLog(MVLOG_ERROR, "%s failed with error: %d", #call, error); \
-      return rc;                                                    \
-    }                                                               \
-}
-#endif  // CHECK_MUTEX_SUCCESS_RC
-
 typedef int32_t eventId_t;
 
 /**
@@ -80,7 +41,7 @@ typedef enum {
     XLINK_NOT_INIT,
     XLINK_UP,
     XLINK_DOWN,
-} xLinkState_t;
+}xLinkState_t;
 
 /**
  * @brief Device description
@@ -114,7 +75,7 @@ typedef struct{
     uint32_t closeStreamInitiated;
 
     sem_t sem;
-} streamDesc_t;
+}streamDesc_t;
 
 /**
  * @brief XLink primitive for each device
@@ -126,13 +87,18 @@ typedef struct xLinkDesc_t {
     xLinkState_t peerState;
     xLinkDeviceHandle_t deviceHandle;
     linkId_t id;
+
+    //Deprecated fields. Begin.
+    int hostClosedFD;
+    //Deprecated fields. End.
+
 } xLinkDesc_t;
 
 
 //events which are coming from remote
 typedef enum
 {
-    /*USB-PCIE related events*/
+    /*USB-X_LINK_PCIE related events*/
     XLINK_WRITE_REQ,
     XLINK_READ_REQ,
     XLINK_READ_REL_REQ,
@@ -151,7 +117,7 @@ typedef enum
     XLINK_RESET_RESP,
     XLINK_RESP_LAST,
 
-    /*IPC related events*/
+    /*X_LINK_IPC related events*/
     IPC_WRITE_REQ,
     IPC_READ_REQ,
     IPC_CREATE_STREAM_REQ,
@@ -169,9 +135,15 @@ typedef enum
     EVENT_REMOTE,
 } xLinkEventOrigin_t;
 
-#define MAX_EVENTS 64
+#ifdef __PC__
 #define MAX_LINKS 32
+#else
+#define MAX_LINKS 1
+#endif
+
+#define MAX_EVENTS 64
 #define MAX_SCHEDULERS MAX_LINKS
+#define XLINK_MAX_DEVICES MAX_LINKS
 
 typedef struct xLinkEventHeader_t{
     eventId_t           id;
@@ -201,7 +173,6 @@ typedef struct xLinkEvent_t {
 }xLinkEvent_t;
 
 int XLinkWaitSem(sem_t* sem);
-
 int XLinkWaitSemUserMode(sem_t* sem, unsigned int timeout);
 
 const char* XLinkErrorToStr(XLinkError_t rc);
index a6abf21..1f0b3e5 100644 (file)
@@ -62,8 +62,7 @@ typedef enum{
 typedef uint32_t streamId_t;
 typedef uint8_t linkId_t;
 
-typedef struct deviceDesc_t
-{
+typedef struct {
     XLinkProtocol_t protocol;
     XLinkPlatform_t platform;
     char name[XLINK_MAX_NAME_SIZE];
@@ -73,7 +72,6 @@ typedef struct streamPacketDesc_t
 {
     uint8_t* data;
     uint32_t length;
-
 } streamPacketDesc_t;
 
 typedef struct XLinkProf_t
@@ -90,16 +88,34 @@ typedef struct XLinkGlobalHandler_t
 {
     int profEnable;
     XLinkProf_t profilingData;
+
+    //Deprecated fields. Begin.
+    int loglevel;
+    int protocol;
+    //Deprecated fields. End.
 } XLinkGlobalHandler_t;
 
 typedef struct
 {
     char* devicePath;
     char* devicePath2;
-    linkId_t linkId;
+    int linkId;
     XLinkProtocol_t protocol;
 } XLinkHandler_t;
 
+
+//Deprecated defines. Begin.
+
+typedef enum{
+    USB_VSC = 0,
+    USB_CDC,
+    PCIE,
+    IPC,
+    NMB_OF_PROTOCOLS
+} XLinkProtocol_deprecated_t;
+
+//Deprecated defines. End.
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/inference-engine/thirdparty/movidius/XLink/shared/XLink_tool.h b/inference-engine/thirdparty/movidius/XLink/shared/XLink_tool.h
new file mode 100644 (file)
index 0000000..6752d05
--- /dev/null
@@ -0,0 +1,100 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef _XLINK_TOOL_H
+#define _XLINK_TOOL_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#ifdef NDEBUG  // Release configuration
+#ifndef __PC__
+            #define ASSERT_X_LINK(x)   if(!(x)) { exit(EXIT_FAILURE); }
+            #define ASSERT_X_LINK_R(x, r) ASSERT_X_LINK(x)
+        #else
+            #define ASSERT_X_LINK(x)   if(!(x)) { return X_LINK_ERROR; }
+            #define ASSERT_X_LINK_R(x, r)   if(!(x)) { return r; }
+        #endif
+#else   // Debug configuration
+
+#ifndef __PC__
+#define ASSERT_X_LINK(x)   if(!(x)) { fprintf(stderr, "%s:%d:\n Assertion Failed: %s\n", __FILE__, __LINE__, #x); exit(EXIT_FAILURE); }
+#define ASSERT_X_LINK_R(x, r) ASSERT_X_LINK(x)
+#else
+#define ASSERT_X_LINK(x)        if(!(x)) {  \
+            mvLog(MVLOG_ERROR, "%s:%d\n\t Assertion Failed: %s", __FILE__, __LINE__, #x);   \
+            return X_LINK_ERROR;    \
+        }
+#define ASSERT_X_LINK_R(x, r)   if(!(x)) {  \
+            mvLog(MVLOG_ERROR, "%s:%d\n\t Assertion Failed: %s", __FILE__, __LINE__, #x);   \
+            return r;               \
+        }
+#endif
+#endif //  NDEBUG
+
+#ifndef CHECK_MUTEX_SUCCESS
+#define CHECK_MUTEX_SUCCESS(call)  {                                \
+        int error;                                                      \
+        if ((error = (call))) {                                         \
+          mvLog(MVLOG_ERROR, "%s failed with error: %d", #call, error); \
+        }                                                               \
+    }
+#endif  // CHECK_MUTEX_SUCCESS
+
+#ifndef CHECK_MUTEX_SUCCESS_RC
+#define CHECK_MUTEX_SUCCESS_RC(call, rc)  {                         \
+        int error;                                                      \
+        if ((error = (call))) {                                         \
+          mvLog(MVLOG_ERROR, "%s failed with error: %d", #call, error); \
+          return rc;                                                    \
+        }                                                               \
+    }
+#endif  // CHECK_MUTEX_SUCCESS_RC
+
+#define CIRCULAR_INCREMENT(x, maxVal) \
+        { \
+             x++; \
+             if (x == maxVal) \
+                 x = 0; \
+        }
+
+//avoid problems with unsigned. first compare and then give the nuw value
+#define CIRCULAR_DECREMENT(x, maxVal) \
+    { \
+        if (x == 0) \
+            x = maxVal; \
+        else \
+            x--; \
+    }
+
+#define CIRCULAR_INCREMENT_BASE(x, maxVal, base) \
+        { \
+            x++; \
+            if (x == maxVal) \
+                x = base; \
+        }
+//avoid problems with unsigned. first compare and then give the nuw value
+#define CIRCULAR_DECREMENT_BASE(x, maxVal, base) \
+    { \
+        if (x == base) \
+            x = maxVal - 1; \
+        else \
+            x--; \
+    }
+
+#define EXTRACT_IDS(streamId, linkId) \
+    { \
+        linkId = (streamId >> 24) & 0XFF; \
+        streamId = streamId & 0xFFFFFF; \
+    }
+
+#define COMBIN_IDS(streamId, linkid) \
+         streamId = streamId | ((linkid & 0xFF) << 24);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_XLINK_TOOL_H
diff --git a/inference-engine/thirdparty/movidius/XLink/tests/CMakeLists.txt b/inference-engine/thirdparty/movidius/XLink/tests/CMakeLists.txt
deleted file mode 100644 (file)
index 49fc39a..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2018-2019 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-set(TARGET_NAME "XLinkTests")
-set(CMAKE_CXX_STANDARD 11)
-
-add_executable(${TARGET_NAME} XLink_tests.cpp)
-
-target_include_directories(${TARGET_NAME}
-        PRIVATE
-            ${IE_MAIN_SOURCE_DIR}/tests/libs/gtest/googletest/include
-            ${IE_MAIN_SOURCE_DIR}/tests/libs/gtest/googletest/
-            ../shared
-            ../pc)
-
-target_link_libraries(${TARGET_NAME}
-        PRIVATE
-            XLink gtest gtest_main)
-
-set_target_properties(${TARGET_NAME} PROPERTIES
-        POSITION_INDEPENDENT_CODE TRUE
-        COMPILE_PDB_NAME ${TARGET_NAME})
diff --git a/inference-engine/thirdparty/movidius/XLink/tests/XLink_tests.cpp b/inference-engine/thirdparty/movidius/XLink/tests/XLink_tests.cpp
deleted file mode 100644 (file)
index aa4622b..0000000
+++ /dev/null
@@ -1,390 +0,0 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <iostream>
-#include <chrono>
-#include <thread>
-#include <gtest/gtest.h>
-#include <XLinkPrivateDefines.h>
-#include "XLink.h"
-
-#define MAX_NAME_LENGTH 16
-#define MAX_DEVICES     32
-#define MAX_PATH        255
-
-#define MYRIADX         0x2485
-#define MYRIAD2         0x2150
-#define MYRIAD_BOOTED   0xf63b
-#define MYRIAD_UNBOOTED -1
-
-static XLinkGlobalHandler_t globalHandler;
-
-class XLinkTests : public ::testing::Test {
-public:
-    static void SetUpTestCase() {
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkInitialize(&globalHandler));
-        // Waiting for initialization
-        std::this_thread::sleep_for(std::chrono::seconds(1));
-    }
-protected:
-    virtual ~XLinkTests() {}
-
-    void getFirmwarePath(char* devAddr, char* firmwarePath) {
-        char* p = strchr(devAddr, '-');
-        if (p == nullptr) {
-            EXPECT_TRUE(false) << "Invalid device address";
-        }
-#if (!defined(_WIN32) && !defined(_WIN64))
-        snprintf(firmwarePath, 40, "./lib/MvNCAPI%s.mvcmd", p);
-#else
-        snprintf(firmwarePath, 40, "./MvNCAPI%s.mvcmd", p);
-#endif  // #if (!defined(_WIN32) && !defined(_WIN64))
-    }
-
-    void bootAnyDevice() {
-        char firmwarePath[MAX_PATH];
-        deviceDesc_t deviceDesc = {};
-        deviceDesc_t in_deviceDesc = {};
-        in_deviceDesc.protocol = X_LINK_USB_VSC;
-        in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-        // Get device name
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_UNBOOTED, &in_deviceDesc, &deviceDesc));
-        getFirmwarePath(deviceDesc.name, firmwarePath);
-
-        printf("Would boot (%s) device with firmware (%s) \n", deviceDesc.name, firmwarePath);
-
-        // Boot device
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkBootRemote(&deviceDesc, firmwarePath));
-        // FIXME: need to find a way to avoid this sleep
-        std::this_thread::sleep_for(std::chrono::seconds(2));
-
-        // Check, that device booted
-        deviceDesc_t bootedDeviceDesc = {};
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_BOOTED, &in_deviceDesc, &bootedDeviceDesc));
-    }
-
-    void closeDevice(char* bootedName) {
-        XLinkHandler_t *handler = (XLinkHandler_t *)malloc(sizeof(XLinkHandler_t));
-        handler->devicePath = bootedName;
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(handler));
-        // FIXME: need to find a way to avoid this sleep
-        std::this_thread::sleep_for(std::chrono::seconds(1));
-
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler->linkId));
-        free(handler);
-
-        std::this_thread::sleep_for(std::chrono::seconds(2));
-    }
-
-    void closeDeviceWithHandler(XLinkHandler_t* handler) {
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler->linkId));
-        free(handler);
-        std::this_thread::sleep_for(std::chrono::seconds(2));
-    }
-
-};
-
-TEST_F(XLinkTests, CanBootConnectAndResetDevice) {
-    char firmwarePath[MAX_PATH];
-    deviceDesc_t deviceDesc = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_UNBOOTED, &in_deviceDesc, &deviceDesc));
-    getFirmwarePath(deviceDesc.name, firmwarePath);
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkBootRemote(&deviceDesc, firmwarePath));
-    // FIXME: need to find a way to avoid this sleep
-    std::this_thread::sleep_for(std::chrono::seconds(2));
-
-    deviceDesc_t bootedDesc = {};
-    for (int i = 0; i < MAX_DEVICES; i++) {
-        if (X_LINK_SUCCESS == XLinkFindDevice(i, X_LINK_BOOTED, &in_deviceDesc, &bootedDesc)) {
-            break;
-        }
-    }
-
-    XLinkHandler_t *handler = (XLinkHandler_t *)malloc(sizeof(XLinkHandler_t));
-    handler->protocol = bootedDesc.protocol;
-    handler->devicePath = bootedDesc.name;
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(handler));
-    // FIXME: need to find a way to avoid this sleep
-    std::this_thread::sleep_for(std::chrono::seconds(1));
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler->linkId));
-    free(handler);
-    // FIXME: need to find a way to avoid this sleep
-    std::this_thread::sleep_for(std::chrono::seconds(2));
-}
-
-class XLinkOpenStreamTests : public XLinkTests {
-protected:
-    virtual ~XLinkOpenStreamTests() {
-
-    }
-    void SetUp() override {
-        deviceDesc_t deviceDesc = {};
-        deviceDesc_t in_deviceDesc = {};
-        in_deviceDesc.protocol = X_LINK_USB_VSC;
-        in_deviceDesc.platform = X_LINK_MYRIAD_X;
-
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_ANY_STATE, &in_deviceDesc, &deviceDesc));
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkBootRemote(&deviceDesc, "./lib/MvNCAPI-ma2480.mvcmd"));
-
-        std::this_thread::sleep_for(std::chrono::seconds(1));
-
-        deviceDesc_t bootedDesc = {};
-        for (int i = 0; i < MAX_DEVICES; i++) {
-            if (X_LINK_SUCCESS == XLinkFindDevice(i, X_LINK_BOOTED, &in_deviceDesc, &bootedDesc)) {
-                break;
-            }
-        }
-
-        handler = (XLinkHandler_t *)malloc(sizeof(XLinkHandler_t));
-        handler->protocol = bootedDesc.protocol;
-        handler->devicePath = bootedDesc.name;
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(handler));
-        std::this_thread::sleep_for(std::chrono::seconds(1));
-    }
-
-    void TearDown() override {
-        ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler->linkId));
-        free(handler);
-        // FIXME: need to find a way to avoid this sleep
-        std::this_thread::sleep_for(std::chrono::seconds(2));
-    }
-
-    XLinkHandler_t *handler;
-};
-
-TEST_F(XLinkOpenStreamTests, CanOpenAndCloseStream) {
-    streamId_t stream = XLinkOpenStream(handler->linkId, "mySuperStream", 1024);
-    ASSERT_NE(INVALID_STREAM_ID, stream);
-    ASSERT_NE(INVALID_STREAM_ID_OUT_OF_MEMORY, stream);
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream));
-}
-
-TEST_F(XLinkOpenStreamTests, CannotOpenStreamMoreThanMemoryOnDevice) {
-    const int _512MB = 512 * 1024 * 1024;
-    streamId_t stream = XLinkOpenStream(handler->linkId, "mySuperStream", _512MB);
-    ASSERT_EQ(INVALID_STREAM_ID_OUT_OF_MEMORY, stream);
-}
-
-// FIXME: the test doesn't work
-// TODO: is it correct behavior, should we accept the same names
-TEST_F(XLinkOpenStreamTests, DISABLED_CannotOpenTwoStreamsWithTheSameName) {
-    const int _1KB = 1 * 1024;
-    const char streamName[] = "mySuperStream";
-    streamId_t stream0 = XLinkOpenStream(handler->linkId, streamName, _1KB);
-    ASSERT_NE(INVALID_STREAM_ID, stream0);
-
-    streamId_t stream1 = XLinkOpenStream(handler->linkId, streamName, _1KB);
-    ASSERT_EQ(INVALID_STREAM_ID, stream1);
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream0));
-}
-
-// FIXME: XLinkOpenStream doesn't allocate any memory on device
-TEST_F(XLinkOpenStreamTests, DISABLED_CannotOpenStreamsMoreThanMemoryOnDevice) {
-    const int _256MB = 256 * 1024 * 1024;
-    streamId_t stream0 = XLinkOpenStream(handler->linkId, "mySuperStream0", _256MB);
-    ASSERT_NE(INVALID_STREAM_ID, stream0);
-
-    streamId_t stream1 = XLinkOpenStream(handler->linkId, "mySuperStream1", _256MB);
-    ASSERT_EQ(INVALID_STREAM_ID, stream1);
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream0));
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream1));
-}
-
-/**
- * @brief XLinkGetDeviceName function tests
- */
-class XLinkGetDeviceNameTests : public XLinkTests {
-protected:
-    ~XLinkGetDeviceNameTests() override = default;
-};
-
-// TODO Can compose list of all devices
-
-/**
- * @brief XLinkGetDeviceName should return error if index argument is invalid
- */
-TEST_F(XLinkGetDeviceNameTests, ReturnErrorOnIncorrectIndex) {
-    deviceDesc_t deviceDesc = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-    ASSERT_EQ(X_LINK_ERROR, XLinkFindDevice(-1, X_LINK_ANY_STATE, &in_deviceDesc, &deviceDesc));
-    ASSERT_TRUE(strlen(deviceDesc.name) == 0);
-}
-
-/**
- * @brief XLinkGetDeviceName should return device name in AUTO_PID mode (pid = 0)
- */
-TEST_F(XLinkGetDeviceNameTests, ReturnAnyDeviceName) {
-    deviceDesc_t deviceDesc = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_ANY_STATE, &in_deviceDesc, &deviceDesc));
-    ASSERT_TRUE(strlen(deviceDesc.name) > 2);
-}
-
-/**
- * @brief XLinkGetDeviceName should return M2 device name if pid = MYRIAD2 (0x2150)
- */
-TEST_F(XLinkGetDeviceNameTests, ReturnCorrectM2DeviceName) {
-    deviceDesc_t deviceDesc = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_MYRIAD_2;
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_ANY_STATE, &in_deviceDesc, &deviceDesc));
-    ASSERT_TRUE(strstr(deviceDesc.name, "ma2450") != nullptr);
-}
-
-/**
- * @brief XLinkGetDeviceName should return MX device name if pid = MYRIADX (0x2485)
- */
-TEST_F(XLinkGetDeviceNameTests, ReturnCorrectMXDeviceName) {
-    deviceDesc_t deviceDesc = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_MYRIAD_X;
-
-    ASSERT_EQ(X_LINK_SUCCESS,XLinkFindDevice(0, X_LINK_ANY_STATE, &in_deviceDesc, &deviceDesc));
-    ASSERT_TRUE(strstr(deviceDesc.name, "ma2480") != nullptr);
-}
-
-/**
- * @brief XLinkGetDeviceName should return booted MX device name if pid = MYRIAD_BOOTED (0xf63b)
- */
-TEST_F(XLinkGetDeviceNameTests, ReturnCorrectBootedDeviceName) {
-    bootAnyDevice();
-
-    deviceDesc_t bootedDeviceDescr = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_BOOTED, &in_deviceDesc, &bootedDeviceDescr));
-    ASSERT_TRUE(strstr(bootedDeviceDescr.name, "ma2480") == nullptr);
-    ASSERT_TRUE(strstr(bootedDeviceDescr.name, "ma2450") == nullptr);
-
-    closeDevice(bootedDeviceDescr.name);
-}
-
-/**
- * @brief XLinkResetAll function tests
- */
-class XLinkResetAllTests : public XLinkTests {
-protected:
-    ~XLinkResetAllTests() override = default;
-};
-
-/**
- * @brief XLinkResetAll function should reset all booted devices
- */
-TEST_F(XLinkResetAllTests, ResetBootedDevice) {
-    // TODO Boot all available devices
-    bootAnyDevice();
-
-    // Without connection to device XLinkResetAll doesn't work
-    // Connect to device
-    deviceDesc_t bootedDeviceDescr = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_BOOTED, &in_deviceDesc, &bootedDeviceDescr));
-
-    XLinkHandler_t *handler = (XLinkHandler_t *)malloc(sizeof(XLinkHandler_t));
-    handler->protocol = bootedDeviceDescr.protocol;
-    handler->devicePath = bootedDeviceDescr.name;
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(handler));
-
-    // Try to reset device
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkResetAll());
-    std::this_thread::sleep_for(std::chrono::seconds(2));
-
-    // No one booted device should be found
-    deviceDesc_t afterResetBootedDescr = {};
-    ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND, XLinkFindDevice(0, X_LINK_BOOTED, &in_deviceDesc, &afterResetBootedDescr));
-}
-
-/**
- * @brief XLinkConnect function tests
- */
-class XLinkConnectTests : public XLinkTests {
-protected:
-    ~XLinkConnectTests() override = default;
-};
-
-TEST_F(XLinkConnectTests, InvalidHanler) {
-    ASSERT_EQ(X_LINK_ERROR, XLinkConnect(nullptr));
-}
-
-TEST_F(XLinkConnectTests, ConnectToDevice) {
-    bootAnyDevice();
-
-    deviceDesc_t bootedDeviceDescr = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_BOOTED, &in_deviceDesc, &bootedDeviceDescr));
-
-    XLinkHandler_t *handler = (XLinkHandler_t *)malloc(sizeof(XLinkHandler_t));
-    handler->protocol = bootedDeviceDescr.protocol;
-    handler->devicePath = bootedDeviceDescr.name;
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(handler));
-
-    closeDeviceWithHandler(handler);
-}
-
-class XLinkBootRemoteTests: public XLinkTests {
-public:
-    ~XLinkBootRemoteTests() override = default;
-};
-
-TEST_F(XLinkBootRemoteTests, USBDeviceNameChangedAfterBoot) {
-    deviceDesc_t unbootedDeviceDescr = {};
-    deviceDesc_t in_deviceDesc = {};
-    in_deviceDesc.protocol = X_LINK_USB_VSC;
-    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-    char firmwarePath[MAX_PATH];
-
-    // Get device name
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_UNBOOTED, &in_deviceDesc, &unbootedDeviceDescr));
-    getFirmwarePath(unbootedDeviceDescr.name, firmwarePath);
-
-    // Boot device
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkBootRemote(&unbootedDeviceDescr, firmwarePath));
-    std::this_thread::sleep_for(std::chrono::seconds(2));
-
-    // Booted device appear
-    deviceDesc_t bootedDeviceDesc = {};
-    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindDevice(0, X_LINK_BOOTED, &in_deviceDesc, &bootedDeviceDesc));
-
-    // Previous device don't disappear
-    bool before_booted_found = false;
-    deviceDesc_t deviceDesc = {};
-    for (int i = 0; i < MAX_DEVICES; i++) {
-        if (X_LINK_SUCCESS == XLinkFindDevice(i, X_LINK_UNBOOTED, &in_deviceDesc, &deviceDesc)) {
-            if (strcmp(deviceDesc.name, unbootedDeviceDescr.name) == 0) {
-                before_booted_found = true;
-            }
-            break;
-        }
-    }
-
-    ASSERT_FALSE(before_booted_found);
-
-    closeDevice(bootedDeviceDesc.name);
-}
\ No newline at end of file
index 8d61aae..9fb2e99 100644 (file)
@@ -18,11 +18,12 @@ endif()
 file(GLOB MVNC_SOURCES "include/*" "src/*")
 file(GLOB WATCHDOG_SOURCES "../watchdog/*")
 
-# FIXME: WIN_PTHREAD also should be built as a library
 if(WIN32)
     file(GLOB USB_WIN_SOURCES "../USB_WIN/*")
     file(GLOB WIN_PTHREAD_SOURCES "../WinPthread/*")
     list(APPEND ${MVNC_SOURCES} ${USB_WIN_SOURCES} ${WIN_PTHREAD_SOURCES})
+else()
+    list(APPEND ${MVNC_SOURCES}  "../WinPthread/pthread_semaphore.c")
 endif()
 
 add_library(${TARGET_NAME} STATIC ${MVNC_SOURCES} ${WATCHDOG_SOURCES})
@@ -43,6 +44,7 @@ endif()
 if(UNIX)
     target_include_directories(${TARGET_NAME}
         PRIVATE
+            "../WinPthread"
             "${LIBUSB_INCLUDE_DIR}")
 endif()
 
index 5dc90c7..0b0ca75 100644 (file)
@@ -135,6 +135,8 @@ typedef enum {
     NC_RO_DEVICE_ID = 2016,                     // returns device id
     NC_RO_DEVICE_PLATFORM = 2017,               // returns device platform (MyriadX, Myriad2)
     NC_RO_DEVICE_PROTOCOL = 2018,               // returns device protocol (USB, PCIe)
+    NC_RW_DEVICE_POWER_CONFIG = 2400,           // writes config for the power manager to device
+    NC_RW_DEVICE_POWER_CONFIG_RESET = 2401,     // resets power manager config on device
 } ncDeviceOption_t;
 
 typedef enum {
index 07c8990..7b8838c 100644 (file)
@@ -1,3 +1,7 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
 #ifndef _MVNC_DATA_H
 #define _MVNC_DATA_H
 
index 431eec9..f83a12b 100644 (file)
@@ -1,3 +1,7 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
 #ifndef _MVNC_TOOL_H
 #define _MVNC_TOOL_H
 
index e2c8de8..46e46c1 100644 (file)
@@ -155,12 +155,19 @@ typedef enum {
     CLASS3_SET_LOG_LEVEL_XLINK,
 } deviceOptionClass3;
 
+typedef enum {
+    CLASS4_SET_POWER_CONFIG = 0,
+    CLASS4_GET_POWER_CONFIG,
+    CLASS4_RESET_POWER_CONFIG,
+} deviceOptionClass4;
+
 typedef struct {
     union {
         deviceOptionClass0 c0;
         deviceOptionClass1 c1;
         deviceOptionClass2 c2;
         deviceOptionClass3 c3;
+        deviceOptionClass4 c4;
     } type;
     uint32_t optionClass;
     uint32_t data;
index a91a0c8..a7c4380 100644 (file)
 #include "watchdog.h"
 
 typedef enum {
-    NC_OPTION_CLASS0 = 0,
-    NC_OPTION_CLASS1 = 1,
-    NC_OPTION_CLASS2 = 2,
-    NC_OPTION_CLASS3 = 3,
+    NC_OPTION_CLASS0     = 0,
+    NC_OPTION_CLASS1     = 1,
+    NC_OPTION_CLASS2     = 2,
+    NC_OPTION_CLASS3     = 3,
+    NC_OPTION_CLASS4     = 4,
+    NC_OPTION_LAST       = 4,   // Last configuration option available
+    NC_OPTION_GRAPH_LAST = 2,   // Last configuration option available for graph 
 } ncOptionClass_t;
 
 typedef enum {
index af6f9ce..94638c8 100644 (file)
 #define MAX_RELATED_PATH_LENGTH   100
 
 //      Timeouts
-#define STATUS_WAIT_TIMEOUT     15
-#define DEVICE_APPEAR_TIMEOUT_ON_OPEN   (2)
-#define DEVICE_APPEAR_TIMEOUT_ON_CLOSE   (10)
+#define DEVICE_CONNECT_TIMEOUT              (2)
+#define PCIE_DEVICE_CONNECT_TIMEOUT         (10)
+#define DEVICE_APPEAR_TIMEOUT_ON_OPEN       (2)
+#define DEVICE_APPEAR_TIMEOUT_ON_CLOSE      (10)
 
 #define SLEEP_MS        250
 #define MAX_ITERATIONS  20
@@ -94,19 +95,20 @@ static int global_lock_fd = -1;
  * @param errorMsg Message to be written in case of error. It is a format string
  */
 #ifndef CHECK_STREAM_ID
-#define CHECK_STREAM_ID(id, callReleasingResources, errorMsg) {                                                   \
-    char errorMsgWithReason[255];                                                                                  \
-    if (id == INVALID_STREAM_ID_OUT_OF_MEMORY) {                                                                   \
-        snprintf(errorMsgWithReason, 255, "%s %s", errorMsg, "due to not enough memory on device");                \
-        mvLog(MVLOG_ERROR, errorMsgWithReason);                                                                    \
-        callReleasingResources;                                                                                        \
-        return NC_OUT_OF_MEMORY;                                                                                   \
-    } else if (id == INVALID_STREAM_ID) {                                                                          \
-         snprintf(errorMsgWithReason, 255, "%s %s", errorMsg, "due to unknown error");              \
-         callReleasingResources;                                                                                       \
-         return NC_ERROR;                                                                                          \
-    }                                                                                                              \
-    mvLog(MVLOG_DEBUG, "Stream opened");                                                                           \
+#define CHECK_STREAM_ID(id, callReleasingResources, errorMsg) {                                                     \
+    char errorMsgWithReason[255];                                                                                   \
+    if (id == INVALID_STREAM_ID_OUT_OF_MEMORY) {                                                                    \
+        snprintf(errorMsgWithReason, 255, "%s %s", errorMsg, "due to not enough memory on device");                 \
+        mvLog(MVLOG_ERROR, errorMsgWithReason);                                                                     \
+        callReleasingResources;                                                                                     \
+        return NC_OUT_OF_MEMORY;                                                                                    \
+    } else if (id == INVALID_STREAM_ID) {                                                                           \
+         snprintf(errorMsgWithReason, 255, "%s %s", errorMsg, "due to unknown error");                              \
+         mvLog(MVLOG_ERROR, errorMsgWithReason);                                                                    \
+         callReleasingResources;                                                                                    \
+         return NC_ERROR;                                                                                           \
+    }                                                                                                               \
+    mvLog(MVLOG_DEBUG, "Stream opened");                                                                            \
 }
 #endif // CHECK_STREAM_ID
 
@@ -206,7 +208,7 @@ static void sleepForSeconds(const unsigned int seconds) {
 
 static char* getProductName(const char* name) {
 
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(PCIE_NAME_STR)
+#if (defined(_WIN32) || defined(_WIN64))
     const char PCIeName[] = "mxlink";
 #else
     const char PCIeName[] = "mxlk";
@@ -284,80 +286,71 @@ static void resetAll()
 #if defined(NO_BOOT)
     mvLog(MVLOG_INFO, "Devices will not be restarted for this configuration (NO_BOOT)");
 #else
-    int index = 0;
-    int stalled_count = 0;
-    int iters = 0;
-    int bootrom_count = 0;
-    int after_reset_count = 0;
-    XLinkError_t rc;
-    deviceDesc_t out_deviceDesc;
+    // Reset only USB devices
     deviceDesc_t in_deviceDesc = {
         .protocol = X_LINK_USB_VSC,
-        .platform = NC_ANY_PLATFORM
+        .platform = X_LINK_ANY_PLATFORM
     };
 
-    double waittm = timeInSeconds() + STATUS_WAIT_TIMEOUT;
-    while (timeInSeconds() < waittm) {
-        rc = XLinkFindDevice(index, X_LINK_ANY_STATE, &in_deviceDesc, &out_deviceDesc);
-        if (rc != X_LINK_SUCCESS)
-            break; //no more devices found
+    unsigned int stalled_count = 0;
+    deviceDesc_t stalledDevices[NC_MAX_DEVICES] = {};
 
-        if (strlen(getProductName(out_deviceDesc.name)) == 1 &&
-            out_deviceDesc.protocol != X_LINK_PCIE) { //name doesn't have product number
-            //device is already booted, need to reset
-            mvLog(MVLOG_DEBUG,"Found stalled device %s\n", out_deviceDesc.name);
-            XLinkHandler_t* handler = calloc(1, sizeof(XLinkHandler_t));
+    unsigned int stalled_count_after_reboot = 0;
+
+
+    double waittm = timeInSeconds() + DEVICE_APPEAR_TIMEOUT_ON_OPEN;
+    do {
+        // Find stalled devices
+        stalled_count = 0;
+        XLinkFindAllSuitableDevices(
+                X_LINK_BOOTED, in_deviceDesc, stalledDevices, NC_MAX_DEVICES, &stalled_count);
+
+        if (stalled_count) {
+            mvLog(MVLOG_INFO, "%d stalled devices found, Resetting...", stalled_count);
+        } else {
+            mvLog(MVLOG_DEBUG, "Stalled devices not found");
+            return;
+        }
+
+        // Try to reboot them
+        int i;
+        for (i = 0; i < stalled_count; ++i) {
+            mvLog(MVLOG_DEBUG, "Found stalled device %s", stalledDevices[i].name);
 
+            XLinkHandler_t* handler = calloc(1, sizeof(XLinkHandler_t));
             if (!handler){
                 mvLog(MVLOG_ERROR, "Memory allocation failed");
-                break;
+                return;
             }
-            handler->protocol = out_deviceDesc.protocol;
-            handler->devicePath = (char*)out_deviceDesc.name;
-            rc = XLinkConnect(handler);
+
+            handler->protocol = stalledDevices[i].protocol;
+            handler->devicePath = (char*)stalledDevices[i].name;
+            XLinkError_t rc = XLinkConnect(handler);
             if (rc) {
                 mvLog(MVLOG_ERROR," Failed to connect to stalled device, rc: %s", XLinkErrorToStr(rc));
+            } else {
+
             }
-            stalled_count++;
             free(handler);
-
-        } else {
-            bootrom_count++;
         }
-        index++;
-    }
 
-    if (stalled_count) {
-        mvLog(MVLOG_INFO,"Stalled devices found, Reseting...");
-        rc = XLinkResetAll();
+        // This command will reset all previously connected devices
+        XLinkError_t rc = XLinkResetAll();
         if (rc) {
             mvLog(MVLOG_WARN,"Failed to reset all device, rc: %s", XLinkErrorToStr(rc));
         }
 
-        iters = 0;
-
-        while ((after_reset_count < bootrom_count + stalled_count) &&
-                iters < MAX_ITERATIONS) {
-            usleep(SLEEP_MS*1000);
-            after_reset_count = 0;
-            index = 0;
-            waittm = timeInSeconds() + STATUS_WAIT_TIMEOUT;
-            while (timeInSeconds() < waittm) {
-                XLinkError_t rc = XLinkFindDevice(index, X_LINK_ANY_STATE, &in_deviceDesc, &out_deviceDesc);
-                if (rc != X_LINK_SUCCESS)
-                break; //no more devices found
+        // Check that all devices are rebooted
+        stalled_count_after_reboot = 0;
+        deviceDesc_t stalledDevicesAfterReboot[NC_MAX_DEVICES] = {};
+        XLinkFindAllSuitableDevices(
+                X_LINK_BOOTED, in_deviceDesc,
+                stalledDevicesAfterReboot, NC_MAX_DEVICES, &stalled_count_after_reboot);
 
-                if (strlen(getProductName(out_deviceDesc.name)) > 1 &&
-                    out_deviceDesc.protocol != X_LINK_PCIE) { //name has product number
-                    after_reset_count++;
-                }
-                index++;
-            }
-            iters++;
-            mvLog(MVLOG_INFO,"...");
-        }
+        mvLog(MVLOG_INFO,"...");
         usleep(SLEEP_MS*1000);
-    }
+
+    } while (stalled_count_after_reboot > 0 && timeInSeconds() < waittm);
 #endif
 }
 
@@ -380,7 +373,7 @@ ncStatus_t ncDeviceResetAll() {
 
 static ncStatus_t initializeXLink()
 {
-    XLinkSetCommonTimeOutMsec(3 * 60 * 10000);
+    XLinkSetCommonTimeOutMsec(60 * 1000);
     // We sanitize the situation by trying to reset the devices that have been left open
     initialized = 1;
     devices = NULL;
@@ -399,17 +392,6 @@ static ncStatus_t initializeXLink()
     return NC_OK;
 }
 
-static int isDeviceOpened(const char *name)
-{
-    struct _devicePrivate_t *d = devices;
-    while (d) {
-        if (strcmp(d->dev_addr, name) == 0)
-            return 0;
-        d = d->next;
-    }
-    return -1;
-}
-
 /**
  * @brief Check is path exists (directory or file)
  */
@@ -564,7 +546,11 @@ static ncStatus_t destroyDeviceHandle(struct ncDeviceHandle_t **deviceHandlePtr)
 
 ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory) {
-    deviceDesc_t out_deviceDesc = {0};
+
+    //----------------------------------------------------------
+    //      Check input
+
+    deviceDesc_t deviceDescToBoot = {0};
     deviceDesc_t in_deviceDesc = {0};
     copyNcDeviceDescrToXLink(&in_ncDeviceDesc, &in_deviceDesc);
 
@@ -595,7 +581,8 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
         return NC_OK;
     }
 
-    // Initialize handler
+    //--------------------------------------------------------
+    //      Initialize global mutex and mutex for deviceOpen
 
     if (!initialized) {
 #if (defined(_WIN32) || defined(_WIN64))
@@ -626,7 +613,13 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     }
 
     GLOBAL_LOCK();
-    CHECK_MUTEX_SUCCESS_RC(pthread_mutex_lock(&deviceOpenMutex), NC_ERROR);
+    int error = pthread_mutex_lock(&deviceOpenMutex);
+    if (error) {
+        GLOBAL_UNLOCK();
+        mvLog(MVLOG_ERROR, "pthread_mutex_lock(&deviceOpenMutex) failed with error: %d", error);
+        return NC_ERROR;
+    }
+
     if (!initialized) {
         ncStatus_t sc;
         if ((sc = initializeXLink()) != 0) {
@@ -636,18 +629,19 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
         }
     }
 
+    //--------------------------------------------------------
+    //      Search for device
+
 #if defined(NO_BOOT)
     XLinkDeviceState_t state = X_LINK_BOOTED;
 #else
     XLinkDeviceState_t state = X_LINK_UNBOOTED;
 #endif
 
-    // Find any unbooted device or booted device and create deviceHandle
-    // TODO: PCIE could be found at once. Otherwise, it would cause a lot of errors about the opening file error.
     XLinkError_t rc = X_LINK_ERROR;
     double waittm = timeInSeconds() + DEVICE_APPEAR_TIMEOUT_ON_OPEN;
     while ((rc != X_LINK_SUCCESS) && (timeInSeconds() < waittm)) {
-        rc = XLinkFindDevice(0, state, &in_deviceDesc, &out_deviceDesc);
+        rc = XLinkFindFirstSuitableDevice(state, in_deviceDesc, &deviceDescToBoot);
     }
 
     if (rc != X_LINK_SUCCESS) {
@@ -665,15 +659,16 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
         return parseXLinkError(NC_ERROR);
     }
 
-    // Allocate handler
+    //--------------------------------------------------------
+    //      Allocate device handler
 
     struct ncDeviceHandle_t *dH = calloc(1, sizeof(*dH));
     struct _devicePrivate_t *d = calloc(1, sizeof(*d));
 
     if (dH && d) {
         dH->private_data = d;
-        d->protocol = out_deviceDesc.protocol;
-        d->dev_addr = strdup(out_deviceDesc.name);
+        d->protocol = deviceDescToBoot.protocol;
+        d->dev_addr = strdup(deviceDescToBoot.name);
         d->device_mon_stream_id = INVALID_LINK_ID;
         d->graph_monitor_stream_id = INVALID_LINK_ID;
         d->wd_interval = watchdogInterval;
@@ -694,7 +689,9 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
         return NC_OUT_OF_MEMORY;
     }
 
-    // Boot device
+    //--------------------------------------------------------
+    //      Boot device
+
     XLinkHandler_t* handler = calloc(1, sizeof(XLinkHandler_t));
     if (!handler) {
         mvLog(MVLOG_ERROR, "Memory allocation failed");
@@ -716,9 +713,9 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     rc = XLinkConnect(handler);
 #else
     if (handler->protocol == X_LINK_PCIE) {          // PCIe
-#if (!defined(_WIN32) && !defined(_WIN64))
         ncStatus_t sc;
         char mv_cmd_file_path[MAX_PATH_LENGTH] = {};
+#if (!defined(_WIN32) && !defined(_WIN64))
         if (customFirmwareDirectory && strnlen(customFirmwareDirectory, MAX_PATH_LENGTH) > 1) {
             mv_strncpy(mv_cmd_file_path, MAX_PATH_LENGTH, customFirmwareDirectory, MAX_PATH_LENGTH - 1);
             addEndPathSeparator(mv_cmd_file_path);
@@ -733,7 +730,8 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
             GLOBAL_UNLOCK();
             return NC_MVCMD_NOT_FOUND;
         }
-        rc = XLinkBootRemote(&out_deviceDesc, mv_cmd_file_path);
+#endif
+        rc = XLinkBoot(&deviceDescToBoot, mv_cmd_file_path);
         if (rc) {
             mvLog(MVLOG_WARN, "%s() XLinkBootRemote returned error %s for %s",
                   __func__, XLinkErrorToStr(rc), d->dev_addr);
@@ -741,12 +739,36 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
             mvLog(MVLOG_INFO, "%s() XLinkBootRemote returned success %s for %s",
                   __func__, XLinkErrorToStr(rc), d->dev_addr);
         }
-#endif
+        // Search for booted device
+        deviceDesc_t tempDeviceDesc = {};
+        waittm = timeInSeconds() + DEVICE_APPEAR_TIMEOUT_ON_CLOSE;
+        do {
+            rc = XLinkFindFirstSuitableDevice(X_LINK_BOOTED, deviceDescToBoot, &tempDeviceDesc);
+        } while (rc != X_LINK_SUCCESS && timeInSeconds() < waittm);
+
+        if (rc != X_LINK_SUCCESS) {
+            mvLog(MVLOG_ERROR, "Device doesn't appear after boot");
+            free(handler);
+            destroyDeviceHandle(deviceHandlePtr);
+            CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
+            GLOBAL_UNLOCK();
+            return NC_ERROR;
+        }
+
         d->protocol_booted = d->protocol;
         d->dev_addr_booted = strdup(d->dev_addr);
         handler->protocol = d->protocol_booted;
         handler->devicePath = d->dev_addr_booted;
-        rc = XLinkConnect(handler);
+
+        // FIXME BSOD
+#if (defined(_WIN32) || defined(_WIN64))
+        sleepForSeconds(5);
+#endif
+        // Connect to booted
+        waittm = timeInSeconds() + PCIE_DEVICE_CONNECT_TIMEOUT;
+        do {
+            rc = XLinkConnect(handler);
+        } while(rc != X_LINK_SUCCESS && timeInSeconds() < waittm);
     } else {                                        // USB
         // Find firmware and boot device with it
         char mv_cmd_file_path[MAX_PATH_LENGTH] = {};
@@ -770,20 +792,19 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
 
         mvLog(MVLOG_INFO, "%s() XLinkBootRemote is running for %s...\n", __func__, d->dev_addr);
 
-        // remember all currently available devices
+        // Remember all currently available devices
         deviceDesc_t beforeBootDevices[NC_MAX_DEVICES] = {{0}};
-        deviceDesc_t simpleDeviceDesc = {
-            .platform = NC_ANY_PLATFORM,
-            .protocol = convertProtocolToXlink(in_ncDeviceDesc.protocol)
+        unsigned int numberOfDevicesBeforeBoot = 0;
+        deviceDesc_t deviceDesc = {
+            .platform = X_LINK_ANY_PLATFORM,
+            .protocol = X_LINK_USB_VSC
         };
 
-        int n = 0;
-        for (; n < NC_MAX_DEVICES; ++n) {
-            if (XLinkFindDevice(n, X_LINK_ANY_STATE, &simpleDeviceDesc, &beforeBootDevices[n]))
-                break;
-        }
+        XLinkFindAllSuitableDevices(X_LINK_ANY_STATE, deviceDesc, beforeBootDevices,
+                NC_MAX_DEVICES, &numberOfDevicesBeforeBoot);
 
-        rc = XLinkBootRemote(&out_deviceDesc, mv_cmd_file_path);
+        // Boot device
+        rc = XLinkBoot(&deviceDescToBoot, mv_cmd_file_path);
         if (rc) {
             mvLog(MVLOG_WARN, "%s() XLinkBootRemote returned error %s for %s",
                   __func__, XLinkErrorToStr(rc), d->dev_addr);
@@ -792,99 +813,139 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
                   __func__, XLinkErrorToStr(rc), d->dev_addr);
         }
 
-        deviceDesc_t booted_device = {0};
+        // After boot name should change. Find
+        deviceDesc_t foundBootedDevice = {0};
+        int found_new_booted_device = 0;
 
-        // After boot name should change
-        double waittm = timeInSeconds() + STATUS_WAIT_TIMEOUT;
-        int deviceBooted = 0;
-        while ((timeInSeconds() < waittm) && !deviceBooted) {
-            int dev_indx = 0;
-            for (; dev_indx < NC_MAX_DEVICES; ++dev_indx) {
-                rc = XLinkFindDevice(dev_indx, X_LINK_ANY_STATE, &simpleDeviceDesc, &booted_device);
-                booted_device.name[NC_MAX_NAME_SIZE - 1] = 0;
-                if (rc != X_LINK_SUCCESS)
-                    break;
+        deviceDesc_t afterBootDevices[NC_MAX_DEVICES] = {{0}};
+        unsigned int numberOfDevicesAfterBoot = 0;
+
+        waittm = timeInSeconds() + DEVICE_APPEAR_TIMEOUT_ON_OPEN;
+        do {
+            XLinkFindAllSuitableDevices(X_LINK_ANY_STATE, deviceDesc, afterBootDevices,
+                                        NC_MAX_DEVICES, &numberOfDevicesAfterBoot);
+            if (numberOfDevicesAfterBoot != numberOfDevicesBeforeBoot) {
+                continue;
+            }
+            deviceDesc_t tempDevicDescr = {};
 
-                // if beforeBootDevices contains booted_name this is not a device we are looking for
-                int not_found = 0;
-                n = 0;
-                for (; n < NC_MAX_DEVICES; ++n) {
-                    if (strcmp(booted_device.name, beforeBootDevices[n].name) == 0 ||
-                        booted_device.protocol == X_LINK_PCIE) {
-                        not_found = 1;
-                        break;
+            // Device should disappear from unbooted list
+            if (X_LINK_DEVICE_NOT_FOUND != XLinkFindFirstSuitableDevice(
+                                                    X_LINK_ANY_STATE,
+                                                    deviceDescToBoot,
+                                                    &tempDevicDescr)) {
+                continue;
+            }
+            int i, j;
+            for (i = 0; i < numberOfDevicesAfterBoot; ++i) {
+                int found_in_before_boot_list = 0;
+                for (j = 0; j < numberOfDevicesBeforeBoot; ++j) {
+                    if(strcmp(afterBootDevices[i].name, beforeBootDevices[j].name) == 0) {
+                        found_in_before_boot_list = 1;
                     }
                 }
-
-                if (not_found)
-                    continue;
-                handler->protocol = booted_device.protocol;
-                handler->devicePath = (char *) booted_device.name;
-
-                rc = XLinkConnect(handler);
-                // Device mustn't be in devices pool
-                if (isDeviceOpened(booted_device.name) < 0 && rc == X_LINK_SUCCESS) {
-                    deviceBooted = 1;
-                    d->protocol_booted = booted_device.protocol;
-                    d->dev_addr_booted = strdup(booted_device.name);
+                if (!found_in_before_boot_list) {
+                    mv_strcpy(foundBootedDevice.name, XLINK_MAX_NAME_SIZE,
+                                        afterBootDevices[i].name);
+                    foundBootedDevice.platform = afterBootDevices[i].platform;
+                    foundBootedDevice.protocol = afterBootDevices[i].protocol;
+                    found_new_booted_device = 1;
                     break;
                 }
             }
+        } while (!found_new_booted_device && timeInSeconds() < waittm);
+
+        if (!found_new_booted_device) {
+            mvLog(MVLOG_ERROR, "Device doesn't appear after boot");
+            free(handler);
+            destroyDeviceHandle(deviceHandlePtr);
+            CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
+            GLOBAL_UNLOCK();
+            return NC_ERROR;
+        }
+
+        // Connect to booted
+        waittm = timeInSeconds() + DEVICE_CONNECT_TIMEOUT;
+
+        d->protocol_booted = d->protocol;
+        d->dev_addr_booted = strdup(foundBootedDevice.name);
+
+        handler->protocol = foundBootedDevice.protocol;
+        handler->devicePath = (char *) foundBootedDevice.name;
+        do {
+            rc = XLinkConnect(handler);
+        } while(rc != X_LINK_SUCCESS && timeInSeconds() < waittm);
+
+        if (rc != X_LINK_SUCCESS) {
+            mvLog(MVLOG_ERROR, "Device doesn't appear after boot");
+            free(handler);
+            destroyDeviceHandle(deviceHandlePtr);
+            CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
+            GLOBAL_UNLOCK();
+            return NC_ERROR;
         }
     }
 #endif
 
     if (rc != X_LINK_SUCCESS) {
-        // If PCIE device was booted then we will find it but can not connect.
-        mvLog_t logLevel = MVLOG_ERROR;
-        if(in_deviceDesc.protocol == X_LINK_PCIE) {
-            logLevel = MVLOG_WARN;
-        }
-
-        mvLog(logLevel, "Failed connection to device (%s) with error %d", d->dev_addr, rc);
+        mvLog(MVLOG_ERROR, "Failed connection to device (%s) with error %d", d->dev_addr, rc);
         free(handler);
         destroyDeviceHandle(deviceHandlePtr);
         CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
         GLOBAL_UNLOCK();
         return parseXLinkError(rc);
     }
+
+    // After this line calling free(handler) and destroyDeviceHandle after each other is double-free corruption
+    d->xlink = handler;
+    d->next = devices;
+
+    // Check device handle
+    if (d->dev_addr == NULL || d->dev_addr_booted == NULL || d->xlink == NULL) {
+        mvLog(MVLOG_ERROR, "device is invalid");
+        destroyDeviceHandle(deviceHandlePtr);
+        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
+        GLOBAL_UNLOCK();
+        return NC_INVALID_HANDLE;
+    }
+
+    devices = d;
+
     mvLog(MVLOG_INFO, "XLinkConnect done - link Id %d\n", handler->linkId);
 
-    int error = 0;
     if ((error = pthread_mutex_init(&d->dev_data_m, NULL)) != 0) {
         mvLog(MVLOG_ERROR, "pthread_mutex_init (dev_data_m) failed with error: %d", error);
-        free(handler);
         destroyDeviceHandle(deviceHandlePtr);
+        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
+        GLOBAL_UNLOCK();
         return NC_ERROR;
     }
     // If current mutex initialization failed, destroy previous
     if ((error = pthread_mutex_init(&d->dev_stream_m, NULL)) != 0) {
         mvLog(MVLOG_ERROR, "pthread_mutex_init (dev_stream_m) failed with error: %d", error);
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
-        free(handler);
         destroyDeviceHandle(deviceHandlePtr);
+        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
+        GLOBAL_UNLOCK();
         return NC_ERROR;
     }
     if ((error = pthread_mutex_init(&d->graph_stream_m, NULL)) != 0) {
         mvLog(MVLOG_ERROR, "pthread_mutex_init (graph_stream_m) failed with error: %d", error);
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
         CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
-        free(handler);
         destroyDeviceHandle(deviceHandlePtr);
+        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
+        GLOBAL_UNLOCK();
         return NC_ERROR;
     }
 
-    d->xlink = handler;
-    d->next = devices;
-    devices = d;
-
     if (handler->protocol != X_LINK_PCIE) {
         mvLog(MVLOG_INFO, "Booted %s (%s) -> %s\n",
               d->dev_addr, d->dev_addr_booted,
               d->dev_file ? d->dev_file : "VSC");
     } else {
         mvLog(MVLOG_INFO, "Booted %s -> %s\n",
-              d->dev_addr, d->dev_file ? d->dev_file : "X_LINK_PCIE");
+              d->dev_addr, d->dev_file ? d->dev_file : "PCIe");
     }
 
     sleepForSeconds(1);
@@ -892,17 +953,22 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&deviceOpenMutex));
     GLOBAL_UNLOCK();
 
-    streamId_t streamId = XLinkOpenStream(d->xlink->linkId, "deviceMonitor", CONFIG_STREAM_SIZE);
-    CHECK_STREAM_ID(streamId, {}, "can't open deviceMonitor stream");
+    streamId_t deviceMonitorStreamId = XLinkOpenStream(d->xlink->linkId, "deviceMonitor", CONFIG_STREAM_SIZE);
+    CHECK_STREAM_ID(
+        deviceMonitorStreamId,
+        {
+            CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
+            CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
+            CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
+            destroyDeviceHandle(deviceHandlePtr);
+        },
+        "can't open deviceMonitor stream");
 
-    d->device_mon_stream_id = streamId;
+    d->device_mon_stream_id = deviceMonitorStreamId;
 
 #if !(defined(NO_BOOT))
-    if(d->protocol != X_LINK_PCIE)
-    {
-        watchdog_init_context(&d->watchdog_ctx);
-        watchdog_register_device(&d->watchdog_ctx, d);
-    }
+    watchdog_init_context(&d->watchdog_ctx);
+    watchdog_register_device(&d->watchdog_ctx, d);
 #endif
 
     getDevAttributes(d);
@@ -911,18 +977,40 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
     printfOverXLinkOpen(d);
 #endif
 
-    streamId = XLinkOpenStream(d->xlink->linkId, "graphMonitor",
-                                BLOB_STREAM_SIZE);
+    streamId_t graphMonitorStreamId = XLinkOpenStream(d->xlink->linkId, "graphMonitor", BLOB_STREAM_SIZE);
 
 #if (!defined(_WIN32) && !defined(_WIN64))
-    CHECK_STREAM_ID(streamId, {
-           printfOverXLinkClose(d);
+    CHECK_STREAM_ID(graphMonitorStreamId, {
+        printfOverXLinkClose(d);
+        // TODO NO_BOOT case
+        watchdog_unregister_device(&d->watchdog_ctx);
+        CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
+        CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
+        CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
+        XLinkError_t closed = XLinkCloseStream(deviceMonitorStreamId);
+        if (closed != X_LINK_SUCCESS) {
+            mvLog(MVLOG_ERROR, "Failed to close deviceMonitor stream");
+        }
+
+        destroyDeviceHandle(deviceHandlePtr);
     }, "can't open graphMonitor stream");
 #else
-    CHECK_STREAM_ID(streamId, {}, "can't open graphMonitor stream");
+    CHECK_STREAM_ID(graphMonitorStreamId, {
+        // TODO NO_BOOT case
+        watchdog_unregister_device(&d->watchdog_ctx);
+        CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
+        CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
+        CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
+        XLinkError_t closed = XLinkCloseStream(deviceMonitorStreamId);
+        if (closed != X_LINK_SUCCESS) {
+            mvLog(MVLOG_ERROR, "Failed to close deviceMonitor stream");
+        }
+
+        destroyDeviceHandle(deviceHandlePtr);
+    }, "can't open graphMonitor stream");
 #endif
 
-    d->graph_monitor_stream_id = streamId;
+    d->graph_monitor_stream_id = graphMonitorStreamId;
     d->state = NC_DEVICE_OPENED;
 
     return NC_OK;
@@ -930,7 +1018,6 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
 
 ncStatus_t ncAvailableDevices(struct ncDeviceDescr_t *deviceDescrPtr,
                               int maxDevices, int* out_countDevices) {
-    //TODO: PCIe device support can be performed after #-17972 is completed
     CHECK_HANDLE_CORRECT(deviceDescrPtr);
     CHECK_HANDLE_CORRECT(out_countDevices);
 
@@ -938,20 +1025,20 @@ ncStatus_t ncAvailableDevices(struct ncDeviceDescr_t *deviceDescrPtr,
     memset(deviceDescrPtr, 0, maxDevices * sizeof(struct ncDeviceDescr_t));
 
     deviceDesc_t in_deviceDsc = {
-        .platform = NC_ANY_PLATFORM,
-        .protocol = X_LINK_USB_VSC
+        .platform = X_LINK_ANY_PLATFORM,
+        .protocol = X_LINK_ANY_PROTOCOL
     };
 
-    int n = 0;
-    for (; n < maxDevices; ++n) {
-        deviceDesc_t deviceDsc = {0};
-        if (XLinkFindDevice(n, X_LINK_UNBOOTED, &in_deviceDsc, &deviceDsc))
-            break;
-
-        copyXLinkDeviceDescrToNc(&deviceDsc, &deviceDescrPtr[n]);
+    deviceDesc_t deviceDescArray[NC_MAX_DEVICES] = {};
+    unsigned int amountOfFoundDevices = 0;
+    XLinkFindAllSuitableDevices(
+            X_LINK_UNBOOTED, in_deviceDsc, deviceDescArray, NC_MAX_DEVICES, &amountOfFoundDevices);
+    int i;
+    for (i = 0; i < amountOfFoundDevices; ++i) {
+        copyXLinkDeviceDescrToNc(&deviceDescArray[i], &deviceDescrPtr[i]);
     }
 
-    *out_countDevices = n;
+    *out_countDevices = amountOfFoundDevices;
     return NC_OK;
 }
 
@@ -963,11 +1050,11 @@ ncStatus_t ncDeviceLoadFirmware(const ncDevicePlatform_t devicePlatform, const c
     // Find device with specific platform
     deviceDesc_t deviceDesc = {0};
     deviceDesc_t in_deviceDesc = {
-        .platform = devicePlatform,
+        .platform = convertPlatformToXlink(devicePlatform),
         .protocol = X_LINK_USB_VSC
     };
 
-    rc = XLinkFindDevice(0, X_LINK_UNBOOTED, &in_deviceDesc, &deviceDesc);
+    rc = XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &deviceDesc);
     if (rc) {
         mvLog(MVLOG_WARN, "Failed to find (%s) platform device", ncPlatformToStr(devicePlatform));
         return NC_DEVICE_NOT_FOUND;
@@ -995,12 +1082,12 @@ ncStatus_t ncDeviceLoadFirmware(const ncDevicePlatform_t devicePlatform, const c
     }
 
     mvLog(MVLOG_INFO, "Trying to boot %s device", deviceDesc.name);
-    rc = XLinkBootRemote(&deviceDesc, mv_cmd_file_path);
+    rc = XLinkBoot(&deviceDesc, mv_cmd_file_path);
     if (rc) {
         mvLog(MVLOG_WARN, "%s() XLinkBootRemote returned error %s\n", __func__, XLinkErrorToStr(rc));
     } else {
         mvLog(MVLOG_INFO, "%s() XLinkBootRemote returned success %s\n", __func__, XLinkErrorToStr(rc));
-             sleepForSeconds(DEVICE_APPEAR_TIMEOUT_ON_OPEN);
+          sleepForSeconds(DEVICE_APPEAR_TIMEOUT_ON_OPEN);
     }
 
     return parseXLinkError(rc);
@@ -1087,78 +1174,6 @@ static ncStatus_t getThermalStats(struct _devicePrivate_t *d){
     return NC_OK;
 }
 
-static ncStatus_t getDeviceFrequency(struct _devicePrivate_t *d){
-    deviceCommand_t config;
-    config.type.c0 = CLASS0_DEVICE_QUERY_CLOCKS;
-    config.optionClass = NC_OPTION_CLASS0;
-    CHECK_MUTEX_SUCCESS_RC(pthread_mutex_lock(&d->dev_stream_m), NC_ERROR);
-    XLinkError_t rc = X_LINK_SUCCESS;
-    rc = XLinkWriteData(d->device_mon_stream_id, (const uint8_t*)&config, sizeof(config));
-    if (rc != X_LINK_SUCCESS) {
-        mvLog(MVLOG_ERROR, "Failed to write data, rc: %s", XLinkErrorToStr(rc));
-        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&d->dev_stream_m));
-        return parseXLinkError(rc);
-    }
-    streamPacketDesc_t* packet = 0;
-
-    rc = XLinkReadData(d->device_mon_stream_id, &packet);
-    if (rc != X_LINK_SUCCESS || !packet) {
-        mvLog(MVLOG_ERROR, "Failed to read data, rc: %s", XLinkErrorToStr(rc));
-        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&d->dev_stream_m));
-        return parseXLinkError(rc);
-    }
-
-    if( packet->length != sizeof(uint32_t)) {
-        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&d->dev_stream_m));
-        return NC_ERROR;
-    }
-    mvnc_memcpy(&d->deviceFreq, sizeof(d->deviceFreq), packet->data, packet->length);
-    rc = XLinkReleaseData(d->device_mon_stream_id);
-    CHECK_MUTEX_SUCCESS_RC(pthread_mutex_unlock(&d->dev_stream_m), NC_ERROR);
-    if (rc != X_LINK_SUCCESS) {
-        mvLog(MVLOG_WARN,"Failed to release data, rc: %s", XLinkErrorToStr(rc));
-    }
-    return NC_OK;
-}
-
-static ncStatus_t getDeviceProfilingData(struct _devicePrivate_t *d){
-    deviceCommand_t config;
-    config.type.c0 = CLASS0_DEVICE_PROFILING_DATA;
-    config.optionClass = NC_OPTION_CLASS0;
-    CHECK_MUTEX_SUCCESS_RC(pthread_mutex_lock(&d->dev_stream_m), NC_ERROR);
-    XLinkError_t rc = X_LINK_SUCCESS;
-    rc = XLinkWriteData(d->device_mon_stream_id, (const uint8_t*)&config, sizeof(config));
-    if (rc != X_LINK_SUCCESS) {
-        mvLog(MVLOG_ERROR, "Failed to write data, rc: %s", XLinkErrorToStr(rc));
-        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&d->dev_stream_m));
-        return parseXLinkError(rc);
-    }
-    streamPacketDesc_t* packet = 0;
-
-    rc = XLinkReadData(d->device_mon_stream_id, &packet);
-    if (rc != X_LINK_SUCCESS || !packet) {
-        mvLog(MVLOG_ERROR, "Failed to read data, rc: %s", XLinkErrorToStr(rc));
-        CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&d->dev_stream_m));
-        return parseXLinkError(rc);
-    }
-
-    d->receivedData = packet->length;
-    if (d->profilingBuffer == 0) {
-        d->profilingBuffer = (uint8_t*) malloc(profUpperBound);
-    }
-
-    if( packet->length > profUpperBound) {
-        d->receivedData = profUpperBound;
-    }
-    mvnc_memcpy(d->profilingBuffer, profUpperBound, packet->data, d->receivedData);
-    rc = XLinkReleaseData(d->device_mon_stream_id);
-    CHECK_MUTEX_SUCCESS_RC(pthread_mutex_unlock(&d->dev_stream_m), NC_ERROR);
-    if (rc != X_LINK_SUCCESS) {
-        mvLog(MVLOG_WARN,"Failed to release data, rc: %s", XLinkErrorToStr(rc));
-    }
-    return NC_OK;
-}
-
 static ncStatus_t deviceGetDeviceMemory(struct _devicePrivate_t *d,
                                         uint32_t * mem)
 {
@@ -1236,7 +1251,9 @@ static void fprintfsock( int s, const char* fmt, ... ) {
     }
 
     if(s < 0) {
-        (void) write( 1, ptext, len );
+        if(write( 1, ptext, len) != len) {
+            fprintf(stderr, "Error in fprintfsock: write failed\n");
+        }
     } else {
         if(send( s, ptext, len, 0 ) < 0)
         {
@@ -1566,7 +1583,9 @@ ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) {
     int wasConnectedToBooted = 0;
     if (d->dev_addr != NULL && d->dev_addr_booted != NULL &&
         strncmp(d->dev_addr, d->dev_addr_booted, NC_MAX_NAME_SIZE) == 0) {
-        wasConnectedToBooted = 1;       // For PCIE that also would work
+        // PCIe device have same booted and unbooted addr
+        if (d->protocol != X_LINK_PCIE)
+            wasConnectedToBooted = 1;
     }
 
     GLOBAL_LOCK();
@@ -1619,17 +1638,29 @@ ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) {
     printfOverXLinkClose(d);
 #endif
 
+#if !defined(NO_BOOT)
+    watchdog_unregister_device(&d->watchdog_ctx);
+#endif
+
+    // Save all devices before reset
+    deviceDesc_t in_deviceDesc = {
+            .platform = X_LINK_ANY_PLATFORM,
+            .protocol = d->protocol
+    };
+    deviceDesc_t beforeResetDevices[NC_MAX_DEVICES] = {{0}};
+    unsigned int foundDevicesBeforeReset = 0;
+    XLinkFindAllSuitableDevices(X_LINK_ANY_STATE, in_deviceDesc, beforeResetDevices,
+                                NC_MAX_DEVICES, &foundDevicesBeforeReset);
+
     if (d->state != NC_DEVICE_FAILED) {
         // #17801
 #if !defined(NO_BOOT)
-        if (d->device_mon_stream_id != INVALID_LINK_ID &&
-            d->protocol != X_LINK_PCIE) {
-            rc = XLinkCloseStream(d->device_mon_stream_id);
-            if (rc)
-                mvLog(MVLOG_WARN,"Failed to close stream, rc: %s", XLinkErrorToStr(rc));
-        }
-        if (d->graph_monitor_stream_id != INVALID_LINK_ID &&
-            d->protocol != X_LINK_PCIE) {
+        if (d->graph_monitor_stream_id != INVALID_LINK_ID) {
+            if (d->device_mon_stream_id != INVALID_LINK_ID) {
+                rc = XLinkCloseStream(d->device_mon_stream_id);
+                if (rc)
+                    mvLog(MVLOG_WARN,"Failed to close stream, rc: %s", XLinkErrorToStr(rc));
+            }
             rc = XLinkCloseStream(d->graph_monitor_stream_id);
             if (rc)
                 mvLog(MVLOG_WARN,"Failed to close stream, rc: %s", XLinkErrorToStr(rc));
@@ -1648,12 +1679,6 @@ ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) {
         }
     }
 
-#if !defined(NO_BOOT)
-    if(d->protocol != X_LINK_PCIE) {
-        watchdog_unregister_device(&d->watchdog_ctx);
-    }
-#endif
-
     d->state = NC_DEVICE_CLOSED;
 
     CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
@@ -1662,48 +1687,53 @@ ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) {
     CHECK_MUTEX_SUCCESS(pthread_mutex_unlock(&d->dev_data_m));
     CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
 
-    if (!wasConnectedToBooted) {
-        int device_appear_after_reboot = 0;
+
+    if (!wasConnectedToBooted && d->protocol != X_LINK_PCIE) {
+        deviceDesc_t bootedDeviceDesc = {
+                .protocol = d->protocol,
+                .platform = X_LINK_ANY_PLATFORM
+        };
+        mv_strcpy(bootedDeviceDesc.name, XLINK_MAX_NAME_SIZE, d->dev_addr_booted);
+
+        int booted_disappeared = 0;
+        int unbooted_appeared = 0;
 
         //  Wait for unbooted device appear in usb list
         double waittm = timeInSeconds() + DEVICE_APPEAR_TIMEOUT_ON_CLOSE;
-        while (timeInSeconds() < waittm) {
-            // check current devices
-            // wait for booted name to disappear
-            // wait for unbooted name to appear
-            // sometimes both names can be present in the list of usb devices
-            deviceDesc_t device_desc = {0};
-            deviceDesc_t in_deviceDesc = {
-                .platform = NC_ANY_PLATFORM,
-                .protocol = d->protocol
-            };
-
-            int booted_disappeared = 1;
-            int unbooted_appeared = 0;
-
-            int n = 0;
-            while (XLinkFindDevice(n++, X_LINK_ANY_STATE, &in_deviceDesc, &device_desc) == X_LINK_SUCCESS) {
-                if (d->dev_addr_booted != NULL &&
-                    strcmp(device_desc.name, d->dev_addr_booted) == 0) {
-                    booted_disappeared = 0;
-                    break;
-                }
 
-                if (d->dev_addr != NULL &&
-                    strcmp(device_desc.name, d->dev_addr) == 0) {
-                    unbooted_appeared = 1;
-                }
+        deviceDesc_t afterResetDevices[NC_MAX_DEVICES] = {{0}};
+        unsigned int foundDevicesAfterReset = 0;
+        do {
+            XLinkFindAllSuitableDevices(X_LINK_ANY_STATE, in_deviceDesc, afterResetDevices,
+                                        NC_MAX_DEVICES, &foundDevicesAfterReset);
+            if (foundDevicesAfterReset != foundDevicesBeforeReset) {
+                continue;
             }
 
-            if (!(booted_disappeared && unbooted_appeared)) {
+            deviceDesc_t deviceDesc = {};
+            rc = XLinkFindFirstSuitableDevice(X_LINK_BOOTED, bootedDeviceDesc, &deviceDesc);
+            if (rc == X_LINK_SUCCESS) {
                 continue;
             } else {
-                device_appear_after_reboot = 1;
-                break;
+                booted_disappeared = 1;
             }
-        }
+            int i, j;
+            for (i = 0; i < foundDevicesAfterReset; ++i) {
+                int found_in_before_reset_list = 0;
+                for (j = 0; j < foundDevicesBeforeReset; ++j) {
+                    if(strcmp(beforeResetDevices[i].name, afterResetDevices[j].name) == 0) {
+                        found_in_before_reset_list = 1;
+                    }
+                }
+                if (!found_in_before_reset_list) {
+                    unbooted_appeared = 1;
+                }
+            }
+
 
-        if (device_appear_after_reboot == 0) {
+        } while (!(booted_disappeared && unbooted_appeared) && timeInSeconds() < waittm);
+
+        if (!booted_disappeared || !unbooted_appeared) {
             mvLog(MVLOG_ERROR, "Device didn't appear after reboot");
         }
     } else {
@@ -2106,7 +2136,7 @@ ncStatus_t ncGraphSetOption(struct ncGraphHandle_t * graphHandle,
         return NC_INVALID_PARAMETERS;
     }
     if (option < GRAPH_CLASS0_BASE ||
-        option > (GRAPH_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_CLASS3)) {
+        option > (GRAPH_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_GRAPH_LAST)) {
         mvLog(MVLOG_ERROR, "Option %d is invalid", option);
         return NC_INVALID_PARAMETERS;
     }
@@ -2415,7 +2445,7 @@ ncStatus_t ncGraphGetOption(struct ncGraphHandle_t * graphHandle,
     }
 
     if (option < GRAPH_CLASS0_BASE ||
-        option > (GRAPH_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_CLASS3)) {
+        option > (GRAPH_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_GRAPH_LAST)) {
         mvLog(MVLOG_ERROR, "Option %d is invalid", option);
         return NC_INVALID_PARAMETERS;
     }
@@ -2775,9 +2805,36 @@ static ncStatus_t getDeviceOptionClass0(struct _devicePrivate_t *d,
     return rc;
 }
 
+static ncStatus_t setDeviceOptionClass4(struct _devicePrivate_t *d,
+                                        ncDeviceOption_t option,
+                                        const void *data, unsigned int dataLength){
+    XLinkError_t rc = X_LINK_SUCCESS;
+    deviceCommand_t config;
+
+    if (option != NC_RW_DEVICE_POWER_CONFIG_RESET && option != NC_RW_DEVICE_POWER_CONFIG) {
+        mvLog(MVLOG_ERROR, "No such option");
+        return NC_INVALID_PARAMETERS;
+    }
+
+    config.type.c4 = (option == NC_RW_DEVICE_POWER_CONFIG ? CLASS4_SET_POWER_CONFIG : CLASS4_RESET_POWER_CONFIG);
+    config.optionClass = NC_OPTION_CLASS4;
+    config.data = *(uint32_t*)data;
+
+    rc = XLinkWriteData(d->device_mon_stream_id, (const uint8_t *)&config, sizeof(config));
+
+    if (rc != X_LINK_SUCCESS)
+    {
+        mvLog(MVLOG_ERROR, "Failed to write data, rc: %s", XLinkErrorToStr(rc));
+        return parseXLinkError(rc);
+    }
+
+    return NC_OK;
+}
+
 ncStatus_t ncDeviceSetOption(struct ncDeviceHandle_t *deviceHandle,
-                             ncDeviceOption_t option,
-                             const void *data, unsigned int dataLength){
+                            ncDeviceOption_t option,
+                            const void *data, unsigned int dataLength){
+    ncStatus_t rc = NC_OK;
     if (!deviceHandle || !data){
         mvLog(MVLOG_ERROR, "Some of the parameters are NULL");
         return NC_INVALID_PARAMETERS;
@@ -2788,7 +2845,7 @@ ncStatus_t ncDeviceSetOption(struct ncDeviceHandle_t *deviceHandle,
     }
 
     if (option < DEVICE_CLASS0_BASE ||
-        option > (DEVICE_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_CLASS3)) {
+        option > (DEVICE_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_LAST)) {
         mvLog(MVLOG_ERROR, "Option %d is invalid", option);
         return NC_INVALID_PARAMETERS;
     }
@@ -2809,14 +2866,22 @@ ncStatus_t ncDeviceSetOption(struct ncDeviceHandle_t *deviceHandle,
 
         return NC_INVALID_HANDLE;
     }
-    GLOBAL_UNLOCK();
+
     if (opClass > d->dev_attr.max_device_opt_class) {
         mvLog(MVLOG_ERROR, "This device FW does not support NC_OPTION_CLASS%d",
               opClass);
         return NC_UNAUTHORIZED;
     }
 
-    return NC_INVALID_PARAMETERS;
+    switch (opClass) {
+    case NC_OPTION_CLASS4:
+        rc = setDeviceOptionClass4(d, option, data, dataLength);
+        break;
+    default:
+        rc = NC_INVALID_PARAMETERS;
+    }
+    GLOBAL_UNLOCK();
+    return rc;
 }
 
 //static options can be read before device is open
@@ -2844,7 +2909,7 @@ ncStatus_t ncDeviceGetOption(struct ncDeviceHandle_t * deviceHandle,
     }
 
     if (option < DEVICE_CLASS0_BASE ||
-        option > (DEVICE_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_CLASS3)) {
+        option > (DEVICE_CLASS0_BASE + OPTION_CLASS_SIZE * NC_OPTION_LAST)) {
         mvLog(MVLOG_ERROR, "Option %d is invalid", option);
         return NC_INVALID_PARAMETERS;
     }
index befa347..56bf3aa 100644 (file)
@@ -1,6 +1,20 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+* Copyright 2017-2019 Intel Corporation.
+* The source code, information and material ("Material") contained herein is
+* owned by Intel Corporation or its suppliers or licensors, and title to such
+* Material remains with Intel Corporation or its suppliers or licensors.
+* The Material contains proprietary information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright laws and treaty
+* provisions.
+* No part of the Material may be used, copied, reproduced, modified, published,
+* uploaded, posted, transmitted, distributed or disclosed in any way without
+* Intel's prior express written permission. No license under any patent,
+* copyright or other intellectual property rights in the Material is granted to
+* or conferred upon you, either expressly, by implication, inducement, estoppel
+* or otherwise.
+* Any license under such intellectual property rights must be express and
+* approved by Intel in writing.
+*/
 
 #include <string.h>
 #include "mvnc_data.h"
index 4a9ba73..6b3168f 100644 (file)
@@ -39,6 +39,28 @@ TEST_F(MvncTestsCommon, AvailableDevicesSholdReturnErrorIfCountPtrIsNULL) {
     ASSERT_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, NULL));
 }
 
+TEST_F(MvncTestsCommon, CanGetPCIeAndUSB) {
+    if (!(getAmountOfUSBDevices() > 0 && getAmountOfPCIeDevices()))
+        GTEST_SKIP_("USB and PCIe not available");
+
+    struct ncDeviceDescr_t act_devices[NC_MAX_DEVICES] = {};
+    int act_devicesCount = 0;
+    ASSERT_NO_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, &act_devicesCount));
+
+    bool usb_device_found = false;
+    bool pcie_device_found = false;
+
+    for (int i = 0; i < act_devicesCount; ++i) {
+        if (isMyriadUSBDevice(act_devices[i].name)) {
+            usb_device_found = true;
+        } else if (isMyriadPCIeDevice(act_devices[i].name)) {
+            pcie_device_found = true;
+        }
+    }
+
+    EXPECT_TRUE(usb_device_found);
+    EXPECT_TRUE(pcie_device_found);
+}
 
 //  ***********************************************  //
 //             Tests using both platforms            //
@@ -47,6 +69,11 @@ TEST_F(MvncTestsCommon, AvailableDevicesSholdReturnErrorIfCountPtrIsNULL) {
  * @brief Test that USB and PCIe works at the same time. USB first
  */
 TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) {
+    if (getAmountOfPCIeDevices() == 0)
+        GTEST_SKIP() << "PCIe devices not found";
+    if (getAmountOfUSBDevices() == 0)
+        GTEST_SKIP() << "USB devices not found";
+
     ncDeviceHandle_t *deviceHandle_USB = nullptr;
     ncDeviceHandle_t *deviceHandle_PCIe = nullptr;
     std::string actDeviceName;
@@ -54,9 +81,6 @@ TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) {
     deviceDesc.protocol = NC_USB;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_TRUE(getAmountOfPCIeDevices() > 0) << "PCIe devices not found";
-    ASSERT_TRUE(getAmountOfUSBDevices() > 0) << "USB devices not found";
-
     ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, watchdogInterval, firmwarePath));
 
     actDeviceName = deviceHandle_USB->private_data->dev_addr;
@@ -80,6 +104,11 @@ TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) {
  * @brief Test that USB and PCIe works at the same time. PCIe first
  */
 TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) {
+    if (getAmountOfPCIeDevices() == 0)
+        GTEST_SKIP() << "PCIe devices not found";
+    if (getAmountOfUSBDevices() == 0)
+        GTEST_SKIP() << "USB devices not found";
+
     ncDeviceHandle_t *deviceHandle_USB = nullptr;
     ncDeviceHandle_t *deviceHandle_PCIe = nullptr;
     std::string actDeviceName;
@@ -87,9 +116,6 @@ TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) {
     deviceDesc.protocol = NC_PCIE;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_TRUE(getAmountOfPCIeDevices() > 0) << "PCIe devices not found";
-    ASSERT_TRUE(getAmountOfUSBDevices() > 0) <<"USB devices not found";
-
     // Open PCIe device
     ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc,
             watchdogInterval, firmwarePath));
@@ -126,8 +152,6 @@ protected:
 
         _deviceProtocol = GetParam();
         available_devices = getAmountOfDevices(_deviceProtocol);
-        ASSERT_TRUE(available_devices > 0) << ncProtocolToStr(_deviceProtocol)
-                << " devices not found";
     }
 
     ncDeviceProtocol_t _deviceProtocol = NC_ANY_PROTOCOL;
@@ -137,14 +161,16 @@ protected:
 * @brief Open any device and close it
 */
 TEST_P(MvncOpenDevice, OpenAndClose) {
+    if (available_devices == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
     ncDeviceHandle_t*   deviceHandle = nullptr;
     std::string         deviceName;
     ncDeviceDescr_t deviceDesc = {};
     deviceDesc.protocol = _deviceProtocol;
     deviceDesc.platform = NC_ANY_PLATFORM;
 
-    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
-            watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
 
     ASSERT_TRUE(deviceHandle != nullptr);
     ASSERT_TRUE(deviceHandle->private_data != nullptr);
@@ -162,6 +188,9 @@ TEST_P(MvncOpenDevice, OpenAndClose) {
  * @brief Check that all field of deviceHandle would be initialized
  */
 TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) {
+    if (available_devices == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
     ncDeviceHandle_t*   deviceHandle = nullptr;
     ncDeviceDescr_t deviceDesc = {};
     deviceDesc.protocol = _deviceProtocol;
@@ -187,6 +216,9 @@ TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) {
  * already has allocated device
 */
 TEST_P(MvncOpenDevice, OpenTwiceSameHandler) {
+    if (available_devices == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
     ncDeviceHandle_t *deviceHandle = nullptr;
     ncDeviceDescr_t deviceDesc = {};
     deviceDesc.protocol = _deviceProtocol;
@@ -219,6 +251,8 @@ TEST_P(MvncOpenDevice, OpenTwiceSameHandler) {
  */
  // Fixme Test only for one device
 TEST_P(MvncOpenDevice, DISABLED_OpenSameDeviceTwiceDifferentHandlers) {
+    if (available_devices == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
 
     ncDeviceHandle_t *deviceHandle1 = nullptr;
     ncDeviceHandle_t *deviceHandle2 = nullptr;
@@ -243,6 +277,9 @@ TEST_P(MvncOpenDevice, DISABLED_OpenSameDeviceTwiceDifferentHandlers) {
  * @note Mostly this test important for PCIe and connect to booted option, as in that cases XLinkReset have another behavior
  */
 TEST_P(MvncOpenDevice, OpenTwiceWithOneXLinkInitializion) {
+    if (available_devices == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
     ncDeviceHandle_t *deviceHandle = nullptr;
     std::string actDeviceName;
 
@@ -301,6 +338,9 @@ protected:
 };
 
 TEST_P(MvncLoggingTests, ShouldNotPrintErrorMessagesIfCanNotOpenDevice) {
+    if (available_devices == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
     setLogLevel(MVLOG_ERROR);
     ncDeviceHandle_t * deviceHandle = nullptr;
 
@@ -312,6 +352,9 @@ TEST_P(MvncLoggingTests, ShouldNotPrintErrorMessagesIfCanNotOpenDevice) {
 }
 
 TEST_P(MvncLoggingTests, ShouldPrintWarningMessagesIfCanNotOpenDeviceAndMvLogLevelIsInfo) {
+    if (available_devices == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
     setLogLevel(MVLOG_INFO);
     ncDeviceHandle_t * deviceHandle = nullptr;
 
@@ -367,7 +410,7 @@ protected:
 /**
  * @brief Allocate graph for one device
  */
-TEST_P(MvncGraphAllocations, OneGraph) {
+TEST_P(MvncGraphAllocations, DISABLED_OneGraph) {
     if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
     openDevices(1, _deviceHandle, _bootedDevices);
 
@@ -385,8 +428,9 @@ TEST_P(MvncGraphAllocations, OneGraph) {
 /**
  * @brief Allocate graphs for 2 device (serial)
  */
-TEST_P(MvncGraphAllocations, AllocateGraphsOn2DevicesSerial) {
-    if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
+TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesSerial) {
+    if (!blobLoaded)
+        GTEST_SKIP_("Blob for test is not loaded\n");
     openDevices(2, _deviceHandle, _bootedDevices);
 
     // Create graphs handlers
@@ -418,7 +462,7 @@ TEST_P(MvncGraphAllocations, AllocateGraphsOn2DevicesSerial) {
 * @warning It's depend on USBLINK_TRANSFER_SIZE constant from UsbLinkPlatform.c file
 * @warning Need blob to use this tests
 */
-TEST_P(MvncGraphAllocations, AllocateGraphsOn2DevicesParallel) {
+TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesParallel) {
     if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
     openDevices(2, _deviceHandle, _bootedDevices);
 
@@ -485,12 +529,39 @@ TEST_F(MvncCloseDevice, EmptyFieldsOfDeviceHandle) {
     ASSERT_EQ(ncDeviceClose(&deviceHandlePtr), NC_INVALID_PARAMETERS);
 }
 
+#if (!(defined(_WIN32) || defined(_WIN64)))
+TEST_F(MvncCloseDevice, USBDeviceWillBeAvailableRightAfterClosing) {
+
+    ncDeviceHandle_t*   deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_USB;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(
+            &deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+
+    ASSERT_TRUE(deviceHandle);
+    deviceDesc_t toFindDeviceDescr = {
+            .protocol = X_LINK_USB_VSC,
+            .platform = X_LINK_ANY_PLATFORM
+    };
+    strcpy(deviceDesc.name, deviceHandle->private_data->dev_addr);
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+    deviceDesc_t foundDevice = {};
+    XLinkError_t rc = XLinkFindFirstSuitableDevice(
+            X_LINK_UNBOOTED, toFindDeviceDescr, &foundDevice);
+    ASSERT_EQ(X_LINK_SUCCESS, rc);
+}
+#endif
+
 //  *************************************************** //
 //              TESTS WITH INFERENCE                    //
 
 using MvncInference = MvncGraphAllocations;
 
-TEST_P(MvncInference, DoOneIterationOfInference) {
+TEST_P(MvncInference, DISABLED_DoOneIterationOfInference) {
     if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
     openDevices(1, _deviceHandle, _bootedDevices);
 
index 176373d..8731186 100644 (file)
@@ -97,26 +97,19 @@ public:
      * @brief Get amount of all currently connected Myriad devices
      * @param[in] deviceProtocol Count only platform specific devices
      */
-    static int getAmountOfDevices(const ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL) {
-        int amount = 0;
-        deviceDesc_t deviceDesc = {};
-        deviceDesc_t in_deviceDesc = {};
-        in_deviceDesc.protocol = convertProtocolToXlink(deviceProtocol);
-        in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-        if(in_deviceDesc.protocol == X_LINK_USB_VSC) {
-            for (; amount < MAX_DEVICES; ++amount) {
-                if (XLinkFindDevice(amount, X_LINK_ANY_STATE, &in_deviceDesc, &deviceDesc))
-                    break;
-            }
-            return amount;
-        }
-
-        if (XLinkFindDevice(amount, X_LINK_ANY_STATE, &in_deviceDesc, &deviceDesc) == X_LINK_SUCCESS) {
-            return ++amount;
-        }
-
-        return amount;
+    static int getAmountOfDevices(const ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL,
+                                  const ncDevicePlatform_t devicePlatform = NC_ANY_PLATFORM,
+                                  const XLinkDeviceState_t state = X_LINK_ANY_STATE) {
+        deviceDesc_t req_deviceDesc = {};
+        req_deviceDesc.protocol = convertProtocolToXlink(deviceProtocol);
+        req_deviceDesc.platform = convertPlatformToXlink(devicePlatform);
+
+        deviceDesc_t deviceDescArray[NC_MAX_DEVICES] = {};
+        unsigned int foundDevices = 0;
+        XLinkFindAllSuitableDevices(
+                state, req_deviceDesc, deviceDescArray, NC_MAX_DEVICES, &foundDevices);
+
+        return foundDevices;
     }
 
     /**
@@ -161,25 +154,26 @@ public:
     /**
      * @brief Get list of all currently connected Myriad devices
      */
-    static std::vector<std::string> getDevicesList() {
-        std::vector < std::string > devName;
-        deviceDesc_t tempDeviceDesc = {};
-        deviceDesc_t in_deviceDesc = {};
-        in_deviceDesc.protocol = X_LINK_USB_VSC;
-        in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
-
-        for (int i = 0; i < MAX_DEVICES; ++i) {
-            if (XLinkFindDevice(i, X_LINK_ANY_STATE, &in_deviceDesc, &tempDeviceDesc))
-                break;
-            devName.emplace_back(tempDeviceDesc.name);
+    static std::vector<std::string> getDevicesList(
+            const ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL,
+            const ncDevicePlatform_t devicePlatform = NC_ANY_PLATFORM,
+            const XLinkDeviceState_t state = X_LINK_ANY_STATE) {
+
+        deviceDesc_t req_deviceDesc = {};
+        req_deviceDesc.protocol = convertProtocolToXlink(deviceProtocol);
+        req_deviceDesc.platform = convertPlatformToXlink(devicePlatform);
+
+        deviceDesc_t deviceDescArray[NC_MAX_DEVICES] = {};
+        unsigned int foundDevices = 0;
+        XLinkFindAllSuitableDevices(
+                state, req_deviceDesc, deviceDescArray, NC_MAX_DEVICES, &foundDevices);
+
+        std::vector < std::string > devNames;
+        for (int i = 0; i < foundDevices; ++i) {
+            devNames.emplace_back(deviceDescArray[i].name);
         }
 
-        in_deviceDesc.protocol = X_LINK_PCIE;
-        /// PCIe don't use indexes and always return same device
-        if (XLinkFindDevice(0, X_LINK_ANY_STATE, &in_deviceDesc, &tempDeviceDesc) == 0)
-            devName.emplace_back(tempDeviceDesc.name);
-
-        return devName;
+        return devNames;
     }
 
     static bool isMyriadXUSBDevice(const std::string &deviceName) {
@@ -228,8 +222,8 @@ public:
     /**
     * @brief Check that device matches the specified protocol
     */
-    static bool isSamePlatformDevice(const std::string &deviceName,
-                                     const ncDevicePlatform_t expectedPlatform) {
+    static bool isSamePlatformUSBDevice(const std::string &deviceName,
+                                        const ncDevicePlatform_t expectedPlatform) {
         switch (expectedPlatform) {
             case NC_MYRIAD_2:  return isMyriad2USBDevice(deviceName);
             case NC_MYRIAD_X:  return isMyriadXUSBDevice(deviceName);
@@ -242,22 +236,19 @@ public:
     }
 
     static long getAmountOfMyriadXDevices() {
-        auto devName = getDevicesList();
-        return count_if(devName.begin(), devName.end(), isMyriadXUSBDevice);
+        return getAmountOfDevices(NC_ANY_PROTOCOL, NC_MYRIAD_X);
     }
 
     static long getAmountOfMyriad2Devices() {
-        auto devName = getDevicesList();
-        return count_if(devName.begin(), devName.end(), isMyriad2USBDevice);
+        return getAmountOfDevices(NC_ANY_PROTOCOL, NC_MYRIAD_2);
     }
 
-    static long getAmountOfBootedDevices() {
-        auto devName = getDevicesList();
-        return count_if(devName.begin(), devName.end(), isMyriadBootedUSBDevice);
+    static long getAmountOfBootedDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL) {
+        return getAmountOfDevices(deviceProtocol, NC_ANY_PLATFORM, X_LINK_BOOTED);
     }
 
-    static long getAmountOfNotBootedDevices() {
-        return (getAmountOfMyriadXDevices() + getAmountOfMyriad2Devices());
+    static long getAmountOfNotBootedDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL) {
+        return getAmountOfDevices(deviceProtocol, NC_ANY_PLATFORM, X_LINK_UNBOOTED);
     }
 
     static long getAmountOfPCIeDevices() {
@@ -276,7 +267,7 @@ public:
      */
     bool readBINFile(const std::string& fileName, std::vector<char>& buf) {
         std::ifstream file(fileName, std::ios_base::binary | std::ios_base::ate);
-        if (!file.is_open()) {
+        if (file.fail()) {
             std::cout << "Can't open file!" << std::endl;
             return false;
         }
index b3be2ff..e08953b 100644 (file)
@@ -14,8 +14,9 @@ public:
 protected:
     ~MvncOpenUSBDevice() override = default;
     void SetUp() override {
+        ncDeviceResetAll();
         MvncTestsCommon::SetUp();
-        available_devices = getAmountOfNotBootedDevices();
+        available_devices = getAmountOfNotBootedDevices(NC_USB);
         ASSERT_TRUE(available_devices > 0);
     }
 };
@@ -201,8 +202,6 @@ protected:
         available_myriadX = getAmountOfMyriadXDevices();
         available_myriad2 = getAmountOfMyriad2Devices();
 
-        ASSERT_TRUE(available_myriadX > 0);
-        ASSERT_TRUE(available_myriad2 > 0);
         devicePlatform = GetParam();
     }
 };
@@ -211,6 +210,9 @@ protected:
 * @brief Open specified device and close it
 */
 TEST_P(MvncDevicePlatform, OpenAndClose) {
+    if (available_myriad2 == 0 || available_myriadX == 0)
+        GTEST_SKIP();
+
     ncDeviceHandle_t *deviceHandle = nullptr;
     ncDeviceDescr_t deviceDesc = {};
     deviceDesc.protocol = NC_USB;
@@ -222,7 +224,7 @@ TEST_P(MvncDevicePlatform, OpenAndClose) {
     unsigned int size = MAX_DEV_NAME;
     ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME, deviceName, &size));
 
-    EXPECT_TRUE(isSamePlatformDevice(deviceName, devicePlatform));
+    EXPECT_TRUE(isSamePlatformUSBDevice(deviceName, devicePlatform));
 
     ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
 
index 4ecb5da..0a85158 100644 (file)
@@ -99,6 +99,10 @@ extern int pthread_getname_np (pthread_t , char *, size_t);
 #define MVLOG_FATAL_COLOR ANSI_COLOR_RED
 #endif
 
+#ifndef MVLOG_MAXIMUM_THREAD_NAME_SIZE
+#define MVLOG_MAXIMUM_THREAD_NAME_SIZE 20
+#endif
+
 typedef enum mvLog_t{
     MVLOG_DEBUG = 0,
     MVLOG_INFO,
index 51b0d0e..d9a632a 100644 (file)
@@ -1,6 +1,20 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+* Copyright 2017-2019 Intel Corporation.
+* The source code, information and material ("Material") contained herein is
+* owned by Intel Corporation or its suppliers or licensors, and title to such
+* Material remains with Intel Corporation or its suppliers or licensors.
+* The Material contains proprietary information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright laws and treaty
+* provisions.
+* No part of the Material may be used, copied, reproduced, modified, published,
+* uploaded, posted, transmitted, distributed or disclosed in any way without
+* Intel's prior express written permission. No license under any patent,
+* copyright or other intellectual property rights in the Material is granted to
+* or conferred upon you, either expressly, by implication, inducement, estoppel
+* or otherwise.
+* Any license under such intellectual property rights must be express and
+* approved by Intel in writing.
+*/
 
 #ifndef MVSTRINGUTILS_H__
 #define MVSTRINGUTILS_H__
index 4cefaf6..8008b02 100644 (file)
@@ -1,6 +1,20 @@
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+* Copyright 2017-2019 Intel Corporation.
+* The source code, information and material ("Material") contained herein is
+* owned by Intel Corporation or its suppliers or licensors, and title to such
+* Material remains with Intel Corporation or its suppliers or licensors.
+* The Material contains proprietary information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright laws and treaty
+* provisions.
+* No part of the Material may be used, copied, reproduced, modified, published,
+* uploaded, posted, transmitted, distributed or disclosed in any way without
+* Intel's prior express written permission. No license under any patent,
+* copyright or other intellectual property rights in the Material is granted to
+* or conferred upon you, either expressly, by implication, inducement, estoppel
+* or otherwise.
+* Any license under such intellectual property rights must be express and
+* approved by Intel in writing.
+*/
 
 #include "mvStringUtils.h"
 
index e8d3485..87db12c 100644 (file)
@@ -24,6 +24,7 @@
 #include <list>
 #define _XLINK_ENABLE_PRIVATE_INCLUDE_
 #include <XLinkPrivateDefines.h>
+#include "XLink_tool.h"
 
 namespace {
 
@@ -230,7 +231,11 @@ public:
             threadRunning = true;
 
             poolThread = std::thread([this]() {
-                if (pthread_setname_np(pthread_self(), "WatchdogThread") != 0) {
+                if (pthread_setname_np(
+#ifndef __APPLE__
+                pthread_self(),
+#endif
+                "WatchdogThread") != 0) {
                     perror("Setting name for watchdog thread failed");
                 }
                 watchdog_routine();
@@ -273,7 +278,10 @@ public:
                                     return std::get<0>(item)->getHandle() == ptr->actual->getHandle();
                                 });
         bool bFound = idx != std::end(watchedDevices);
-        watchedDevices.erase(idx);
+        if(bFound) {
+            watchedDevices.erase(idx);
+            delete ptr;
+        }
 
         // wake up thread since we might select removed device as nex to be ping, and there is no more devices available
         notificationReason = WAKE_UP_THREAD;
@@ -283,17 +291,6 @@ public:
         return bFound;
     }
 
-    void clear() {
-        {
-            mvLog(MVLOG_INFO, "clear\n");
-            auto __locker = lock();
-            watchedDevices.clear();
-            notificationReason = WAKE_UP_THREAD;
-        }
-        // wake up thread
-        wakeUpPingThread.notify_one();
-    }
-
  private:
     std::unique_lock<std::mutex> lock() {
         return std::unique_lock<std::mutex>(devicesListAcc);
index 026f409..f4c4994 100644 (file)
@@ -35,7 +35,8 @@ typedef enum {
 WD_API wd_error_t watchdog_init_context(wd_context *ctx);
 
 /**
- * @brief creates watchdog thread, if not created, and registers new watchee device, and initialise opaque handle to it
+ * @brief Creates watchdog thread, if not created, and registers new watchee device, and initialise opaque handle to it.
+ *        To avoid a memory leak, the registered device must be unregister with watchdog_unregister_device().
  * @param d - newly connected device descriptor
  * @return
  */
index a930d12..673f806 100644 (file)
@@ -26,8 +26,11 @@ if (WIN32)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4308")
 endif()
 
-set(NGRAPH_TOOLS_ENABLE FALSE)
-set(NGRAPH_STATIC_LIB_ENABLE TRUE)
+set(NGRAPH_TOOLS_ENABLE OFF)
+set(NGRAPH_STATIC_LIB_ENABLE ON)
+set(NGRAPH_JSON_ENABLE OFF)
+set(NGRAPH_ADDRESS_SANITIZER OFF)
+
 set(NGRAPH_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ngraph/src/ngraph")
 include_directories("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/src"
     "${NGRAPH_SOURCE_DIR}")
@@ -46,6 +49,11 @@ if (HAS_MAYBE_UNINITIALIZED)
             set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-maybe-uninitialized -Wno-return-type")
         endif()
 endif()
+
+if(UNIX AND CMAKE_CXX_COMPILER_ID MATCHES Intel)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-warning=1011")
+endif()
+
 # WA for GCC 7.0
 if (UNIX)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type")
index 46cb6cb..e476f67 100644 (file)
@@ -1,6 +1,18 @@
-# Copyright (C) 2018 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
+#===============================================================================
+# Copyright (C) 2018-2019 Intel Corporation
 #
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
 
 set(TARGET stb_image)
 
index fddfa28..3d80df3 100644 (file)
@@ -32,4 +32,3 @@ if (ENABLE_OPENCV)
 endif()
 
 add_subdirectory(vpu)
-
index 3d4fc2b..6ce9721 100644 (file)
@@ -14,6 +14,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 
-from openvino.tools.accuracy_checker.accuracy_checker.main import main
+from accuracy_checker.main import main
 
 main()
index 5313d71..7e024e1 100644 (file)
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 
-from openvino.tools.accuracy_checker.accuracy_checker.annotation_converters.convert import main
+from accuracy_checker.annotation_converters.convert import main
 
 if __name__ == '__main__':
     main()
index bf11be2..af4c2e3 100644 (file)
-# OpenVINOâ„¢ Benchmark Tool
-Inference Engine Benchmark Tool is a Python\* command-line tool, which measures latency for synchronous mode.
+# Benchmark Python* Tool
 
-Please, refer to https://docs.openvinotoolkit.org for details.
+This topic demonstrates how to run the Benchmark Python* Tool, which performs inference using convolutional networks. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
 
-## Usage
+> **NOTE:** This topic describes usage of Python implementation of the Benchmark Tool. For the C++ implementation, refer to [Benchmark C++ Tool](./inference-engine/samples/benchmark_app/README.md).
 
-In general, the Benchmark Tool is configured in the same way as the Accuracy Checker. You can also use additional command line arguments to define benchmark-specific parameters:
+## How It Works
 
-| Argument                                     | Type   | Description                                              |
-| -------------------------------------------- | ------ | -------------------------------------------------------- |
-| -c, --config                                 | string | Required. Path to the YML file with local configuration  |
-| -ic, --benchmark_iterations_count            | string | Optional. Benchmark itertations count. (1000 is default) |
+Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend on the mode defined with the `-api` command-line parameter.
 
-## Hardware requirements
-Hardware requirements depend on a model. Typically for public models RAM memory size has to be not less then 16Gb independently on operation system.
\ No newline at end of file
+> **NOTE**: By default, Inference Engine samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
+### Synchronous API
+
+For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values:
+* Number of iterations defined with the `-niter` command-line argument
+* Time duration specified with the `-t` command-line argument
+* Both of them (execution will continue until both conditions are met)
+* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
+
+During the execution, the application collects two types of metrics:
+* Latency for each infer request executed with `Infer` method
+* Duration of all executions
+
+Reported latency value is calculated as mean value of all collected latencies. Reported throughput value is a derivative from reported latency and additionally depends on batch size.
+
+### Asynchronous API
+For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of executions is defined by one of the two values:
+* Number of iterations defined with the `-niter` command-line argument
+* Time duration specified with the `-t` command-line argument
+* Both of them (execution will continue until both conditions are met)
+* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
+
+The infer requests are executed asynchronously. Callback is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration.
+
+## Run the Tool
+Notice that the benchmark_app usually produces optimal performance for any device out of the box.
+
+**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, for example, for CPU:
+```sh
+python3 benchmark_app.py -m <model> -i <input> -d CPU
+```
+
+But it is still may be non-optimal for some cases, especially for very small networks. More details can read in [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md).
+
+Running the application with the `-h` or `--help`' option yields the following usage message:
+
+```
+usage: benchmark_app.py [-h] [-i PATH_TO_INPUT] -m PATH_TO_MODEL
+                        [-d TARGET_DEVICE]
+                        [-l PATH_TO_EXTENSION] [-c PATH_TO_CLDNN_CONFIG]
+                        [-api {sync,async}] [-niter NUMBER_ITERATIONS]
+                        [-b BATCH_SIZE]
+                        [-stream_output [STREAM_OUTPUT]] [-t TIME]
+                        [-progress [PROGRESS]] [-nstreams NUMBER_STREAMS]
+                        [-nthreads NUMBER_THREADS] [-pin {YES,NO}]
+                        [--exec_graph_path EXEC_GRAPH_PATH]
+                        [-pc [PERF_COUNTS]]
+
+Options:
+  -h, --help            Show this help message and exit.
+  -i PATH_TO_INPUT, --path_to_input PATH_TO_INPUT
+                        Optional. Path to a folder with images and/or binaries
+                        or to specific image or binary file.
+  -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL
+                        Required. Path to an .xml file with a trained model.
+  -d TARGET_DEVICE, --target_device TARGET_DEVICE
+                        Optional. Specify a target device to infer on: CPU,
+                        GPU, FPGA, HDDL or MYRIAD.
+                        Use "-d HETERO:<comma separated devices list>" format to specify HETERO plugin.
+                        Use "-d MULTI:<comma separated devices list>" format to specify MULTI plugin.
+                        The application looks for a suitable plugin for the specified device.
+  -l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION
+                        Optional. Required for CPU custom layers. Absolute
+                        path to a shared library with the kernels
+                        implementations.
+  -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG
+                        Optional. Required for GPU custom kernels. Absolute
+                        path to an .xml file with the kernels description.
+  -api {sync,async}, --api_type {sync,async}
+                        Optional. Enable using sync/async API. Default value
+                        is async.
+  -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS
+                        Optional. Number of iterations. If not specified, the
+                        number of iterations is calculated depending on a
+                        device.
+  -b BATCH_SIZE, --batch_size BATCH_SIZE
+                        Optional. Batch size value. If not specified, the
+                        batch size value is determined from IR
+  -stream_output [STREAM_OUTPUT]
+                        Optional. Print progress as a plain text. When
+                        specified, an interactive progress bar is replaced
+                        with a multiline output.
+  -t TIME, --time TIME  Optional. Time in seconds to execute topology.
+  -progress [PROGRESS]  Optional. Show progress bar (can affect performance
+                        measurement). Default values is "False".
+  -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
+                       Optional. Number of streams to use for inference on the CPU/GPU in throughput mode
+                       (for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
+                       Default value is determined automatically for a device. 
+                       Please note that although the automatic selection usually provides a reasonable performance, 
+                       it still may be non-optimal for some cases, especially for very small networks.
+  -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
+                        Number of threads to use for inference on the CPU
+                        (including HETERO  and MULTI cases).
+  -pin {YES,NO}, --infer_threads_pinning {YES,NO}
+                        Optional. Enable ("YES" is default value) or disable
+                        ("NO")CPU threads pinning for CPU-involved inference.
+  --exec_graph_path EXEC_GRAPH_PATH
+                        Optional. Path to a file where to store executable
+                        graph information serialized.
+  -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
+                        Optional. Report performance counters.
+
+```
+
+Running the application with the empty list of options yields the usage message given above and an error message.
+
+Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values.
+If a model has only image input(s), please a provide folder with images or a path to an image as input.
+If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input.
+If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
+
+To run the tool, you can use public or Intel's pre-trained models. To download the models, use the OpenVINO [Model Downloader](./tools/downloader/README.md) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the tool with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+## Examples of Running the Tool
+
+This section provides step-by-step instructions on how to run the Benchmark Tool with the `googlenet-v1` public model on CPU or FPGA devices. As an input, the `car.png` file from the `<INSTALL_DIR>/deployment_tools/demo/` directory is used.  
+
+> **NOTE:** The Internet access is required to execute the following steps successfully. If you have access to the Internet through the proxy server only, please make sure that it is configured in your OS environment.
+
+1. Download the model. Go to the the Model Downloader directory and run the `downloader.py` script with specifying the model name and directory to download the model to:
+   ```sh
+   cd <INSTAL_DIR>/deployment_tools/open_model_zoo/tools/downloader
+   ```
+   ```sh
+   python3 downloader.py --name googlenet-v1 -o <models_dir>
+   ```
+2. Convert the model to the Inference Engine IR format. Go to the Model Optimizer directory and run the `mo.py` script with specifying the path to the model, model format (which must be FP32 for CPU and FPG) and output directory to generate the IR files:
+   ```sh
+   cd <INSTALL_DIR>/deployment_tools/model_optimizer
+   ```
+   ```sh
+   python3 mo.py --input_model <models_dir>/public/googlenet-v1/googlenet-v1.caffemodel --data_type FP32 --output_dir <ir_dir>
+   ```     
+3. Run the tool with specifying the `<INSTALL_DIR>/deployment_tools/demo/car.png` file as an input image, the IR of the `googlenet-v1` model and a device to perform inference on. The following commands demonstrate running the Benchmark Tool in the asynchronous mode on CPU and FPGA devices:
+   
+   * On CPU:
+   ```sh
+    python3 benchmark_app.py -m <ir_dir>/googlenet-v1.xml -d CPU -api async -i <INSTALL_DIR>/deployment_tools/demo/car.png --progress true -b 1
+   ```
+   * On FPGA:
+   ```sh
+   python3 benchmark_app.py -m <ir_dir>/googlenet-v1.xml -d HETERO:FPGA,CPU -api async -i <INSTALL_DIR>/deployment_tools/demo/car.png --progress true -b 1
+   ```
+
+The application outputs number of executed iterations, total duration of execution, latency and throughput.
+Additionally, if you set the `-pc` parameter, the application outputs performance counters.
+If you set `-exec_graph_path`, the application reports executable graph information serialized.
+
+Below are fragments of sample output for CPU and FPGA devices: 
+* For CPU:
+   ```
+   [Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
+   Progress: |................................| 100.00%
+
+   [Step 9/9] Dumping statistics report
+   Progress: |................................| 100.00%
+
+   Count:      4408 iterations
+   Duration:   60153.52 ms
+   Latency:    51.8244 ms
+   Throughput: 73.28 FPS
+   ```
+* For FPGA:
+   ```
+   [Step 10/11] Measuring performance (Start inference asyncronously, 5 inference requests using 1 streams for CPU, limits: 120000 ms duration)
+   Progress: |................................| 100%
+
+   [Step 11/11] Dumping statistics report
+   Count:      98075 iterations
+   Duration:   120011.03 ms
+   Latency:    5.65 ms
+   Throughput: 817.22 FPS
+   ```
+
+## See Also
+* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](./tools/downloader/README.md)
\ No newline at end of file
diff --git a/inference-engine/tools/benchmark_tool/benchmark.py b/inference-engine/tools/benchmark_tool/benchmark.py
deleted file mode 100644 (file)
index 0e5280f..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-"""
-Copyright (C) 2018-2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import openvino.tools.benchmark as benchmark
-
-if __name__ == '__main__':
-    config = benchmark.CommandLineReader.read()
-    result = benchmark.Benchmark(config).run()
-    print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
\ No newline at end of file
diff --git a/inference-engine/tools/benchmark_tool/benchmark_app.py b/inference-engine/tools/benchmark_tool/benchmark_app.py
new file mode 100644 (file)
index 0000000..8c91b98
--- /dev/null
@@ -0,0 +1,200 @@
+import os
+import sys
+from datetime import datetime
+
+from parameters import parse_args
+from openvino.tools.benchmark.benchmark import Benchmark
+from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME
+from openvino.tools.benchmark.utils.infer_request_wrap import InferRequestsQueue
+from openvino.tools.benchmark.utils.inputs_filling import get_inputs
+from openvino.tools.benchmark.utils.logging import logger
+from openvino.tools.benchmark.utils.progress_bar import ProgressBar
+from openvino.tools.benchmark.utils.utils import next_step, read_network, config_network_inputs, get_number_iterations, \
+    process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
+    get_command_line_arguments
+from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
+
+def main(args):
+    statistics = None
+    try:
+        if args.number_streams is None:
+                logger.warn(" -nstreams default value is determined automatically for a device. "
+                            "Although the automatic selection usually provides a reasonable performance, "
+                            "but it still may be non-optimal for some cases, for more information look at README. ")
+
+        if args.report_type:
+          statistics = StatisticsReport(StatisticsReport.Config(args.report_type, args.report_folder))
+          statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, get_command_line_arguments(sys.argv))
+
+
+        # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
+        next_step()
+
+        device_name = args.target_device.upper()
+
+        benchmark = Benchmark(args.target_device, args.number_infer_requests,
+                              args.number_iterations, args.time, args.api_type)
+
+        benchmark.add_extension(args.path_to_extension, args.path_to_cldnn_config)
+
+        version = benchmark.get_version_info()
+
+        logger.info(version)
+
+        # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
+        next_step()
+
+        start_time = datetime.now()
+        ie_network = read_network(args.path_to_model)
+        duration_ms = "{:.2f}".format((datetime.now() - start_time).total_seconds() * 1000)
+        if statistics:
+            logger.info("Read network took {} ms".format(duration_ms))
+            statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                      [
+                                          ('read network time (ms)', duration_ms)
+                                      ])
+
+        # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
+
+        next_step()
+        if args.batch_size and args.batch_size != ie_network.batch_size:
+            benchmark.reshape(ie_network, args.batch_size)
+        batch_size = ie_network.batch_size
+        logger.info('Network batch size: {}, precision: {}'.format(ie_network.batch_size, ie_network.precision))
+
+        # --------------------- 5. Configuring input of the model ------------------------------------------------------
+        next_step()
+
+        config_network_inputs(ie_network)
+
+        # --------------------- 6. Setting device configuration --------------------------------------------------------
+        next_step()
+        benchmark.set_config(args.number_streams, args.api_type, args.number_threads,
+                             args.infer_threads_pinning)
+
+        # --------------------- 7. Loading the model to the device -----------------------------------------------------
+        next_step()
+
+        start_time = datetime.now()
+        perf_counts = True if args.perf_counts or \
+                              args.report_type in [ averageCntReport, detailedCntReport ] or \
+                              args.exec_graph_path else False
+        exe_network = benchmark.load_network(ie_network, perf_counts, args.number_infer_requests)
+        duration_ms = "{:.2f}".format((datetime.now() - start_time).total_seconds() * 1000)
+        if statistics:
+            logger.info("Load network took {} ms".format(duration_ms))
+            statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                      [
+                                          ('load network time (ms)', duration_ms)
+                                      ])
+
+        # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
+        next_step()
+
+        # Number of requests
+        infer_requests = exe_network.requests
+        benchmark.nireq = len(infer_requests)
+
+        # Iteration limit
+        benchmark.niter = get_number_iterations(benchmark.niter, len(exe_network.requests), args.api_type)
+
+        # ------------------------------------ 9. Creating infer requests and filling input blobs ----------------------
+        next_step()
+
+        request_queue = InferRequestsQueue(infer_requests)
+
+        path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None
+        requests_input_data = get_inputs(path_to_input, batch_size, ie_network.inputs, infer_requests)
+
+        if statistics:
+            statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
+                                      [
+                                          ('topology', ie_network.name),
+                                          ('target device', device_name),
+                                          ('API', args.api_type),
+                                          ('precision', str(ie_network.precision)),
+                                          ('batch size', str(ie_network.batch_size)),
+                                          ('number of iterations', str(benchmark.niter) if benchmark.niter else "0"),
+                                          ('number of parallel infer requests', str(benchmark.nireq)),
+                                          ('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))),
+                                       ])
+
+            for nstreams in benchmark.device_number_streams.items():
+                statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
+                                         [
+                                            ("number of {} streams".format(nstreams[0]), str(nstreams[1])),
+                                         ])
+
+        # ------------------------------------ 10. Measuring performance -----------------------------------------------
+
+        output_string = process_help_inference_string(benchmark)
+
+        next_step(output_string)
+        progress_bar_total_count = 10000
+        if benchmark.niter and not benchmark.duration_seconds:
+            progress_bar_total_count = benchmark.niter
+
+        progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress)
+
+        fps, latency_ms, total_duration_sec, iteration = benchmark.infer(request_queue, requests_input_data,
+                                                                         batch_size, progress_bar)
+
+        # ------------------------------------ 11. Dumping statistics report -------------------------------------------
+        next_step()
+
+        if args.exec_graph_path:
+            dump_exec_graph(exe_network, args.exec_graph_path)
+
+        if perf_counts:
+            perfs_count_list = []
+            for ni in range(int(benchmark.nireq)):
+                perfs_count_list.append(exe_network.requests[ni].get_perf_counts())
+            if args.perf_counts:
+                print_perf_counters(perfs_count_list)
+            if statistics:
+              statistics.dump_performance_counters(perfs_count_list)
+
+        if statistics:
+            statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                      [
+                                          ('total execution time (ms)', '{:.2f}'.format(get_duration_in_milliseconds(total_duration_sec))),
+                                          ('total number of iterations', str(iteration)),
+                                      ])
+            if MULTI_DEVICE_NAME not in device_name:
+                statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                          [
+                                              ('latency (ms)', '{:.2f}'.format(latency_ms)),
+                                          ])
+
+            statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                      [
+                                          ('throughput', '{:.2f}'.format(fps)),
+                                      ])
+
+        if statistics:
+          statistics.dump()
+
+        print('Count:      {} iterations'.format(iteration))
+        print('Duration:   {:.2f} ms'.format(get_duration_in_milliseconds(total_duration_sec)))
+        if MULTI_DEVICE_NAME not in device_name:
+            print('Latency:    {:.2f} ms'.format(latency_ms))
+        print('Throughput: {:.2f} FPS'.format(fps))
+
+        del exe_network
+
+        next_step.step_id = 0
+    except Exception as e:
+        logger.exception(e)
+
+        if statistics:
+            statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                      [
+                                          ('error', str(e)),
+                                      ])
+            statistics.dump()
+
+if __name__ == "__main__":
+    # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
+    next_step()
+
+    main(parse_args())
diff --git a/inference-engine/tools/benchmark_tool/parameters.py b/inference-engine/tools/benchmark_tool/parameters.py
new file mode 100644 (file)
index 0000000..cf92b5b
--- /dev/null
@@ -0,0 +1,98 @@
+import argparse
+from fnmatch import fnmatch
+
+from openvino.tools.benchmark.utils.constants import XML_EXTENSION_PATTERN
+
+
+def str2bool(v):
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
+def validate_args(args):
+    if args.number_iterations is not None and args.number_iterations < 0:
+        raise Exception("Number of iterations should be positive (invalid -niter option value)")
+    if args.number_infer_requests and args.number_infer_requests < 0:
+        raise Exception("Number of inference requests should be positive (invalid -nireq option value)")
+    if not fnmatch(args.path_to_model, XML_EXTENSION_PATTERN):
+        raise Exception('Path {} is not xml file.')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=False)
+    args = parser.add_argument_group('Options')
+    args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS,
+                      help='Show this help message and exit.')
+    args.add_argument('-i', '--path_to_input', type=str, required=False,
+                      help='Optional. '
+                           'Path to a folder with images and/or binaries or to specific image or binary file.')
+    args.add_argument('-m', '--path_to_model', type=str, required=True,
+                      help='Required. Path to an .xml file with a trained model.')
+    args.add_argument('-d', '--target_device', type=str, required=False, default='CPU',
+                      help='Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. '
+                           'Use \'-d HETERO:<comma separated devices list>\' format to specify HETERO plugin. '
+                           'Use \'-d MULTI:<comma separated devices list>\' format to specify MULTI plugin. '
+                           'The application looks for a suitable plugin for the specified device.')
+    args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None,
+                      help='Optional. Required for CPU custom layers. '
+                           'Absolute path to a shared library with the kernels implementations.')
+    args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False,
+                      help='Optional. Required for GPU custom kernels. Absolute path to an .xml file with the '
+                           'kernels description.')
+    args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'],
+                      help='Optional. Enable using sync/async API. Default value is async.')
+    args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None,
+                      help='Optional. Number of iterations. '
+                           'If not specified, the number of iterations is calculated depending on a device.')
+    args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=None,
+                      help='Optional. Number of infer requests. Default value is determined automatically for device.')
+    args.add_argument('-b', '--batch_size', type=int, required=False, default=None,
+                      help='Optional. ' +
+                           'Batch size value. ' +
+                           'If not specified, the batch size value is determined from Intermediate Representation')
+    args.add_argument('-stream_output', type=str2bool, required=False, default=False, nargs='?', const=True,
+                      help='Optional. '
+                           'Print progress as a plain text. '
+                           'When specified, an interactive progress bar is replaced with a multi-line output.')
+    args.add_argument('-t', '--time', type=int, required=False, default=None,
+                      help='Optional. Time in seconds to execute topology.')
+    args.add_argument('-progress', type=str2bool, required=False, default=False, nargs='?', const=True,
+                      help='Optional. '
+                           'Show progress bar (can affect performance measurement). Default values is \'False\'.')
+    args.add_argument('-nstreams', '--number_streams', type=str, required=False, default=None,
+                      help='Optional. Number of streams to use for inference on the CPU/GPU in throughput mode '
+                           '(for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> '
+                           'or just <nstreams>). '
+                           'Default value is determined automatically for a device. Please note that although the automatic selection '
+                           'usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small networks. '
+                           'See samples README for more details.')
+
+    args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None,
+                      help='Number of threads to use for inference on the CPU '
+                           '(including HETERO and MULTI cases).')
+    args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES', choices=['YES', 'NO'],
+                      help='Optional. Enable (\'YES\' is default value) or disable (\'NO\')'
+                           'CPU threads pinning for CPU-involved inference.')
+    args.add_argument('--exec_graph_path', type=str, required=False,
+                      help='Optional. Path to a file where to store executable graph information serialized.')
+    args.add_argument('-pc', '--perf_counts', type=str2bool, required=False, default=False, nargs='?', const=True,
+                      help='Optional. Report performance counters.', )
+    args.add_argument('--report_type', type=str, required=False,
+                      choices=['no_counters', 'average_counters', 'detailed_counters'],
+                      help="Optional. Enable collecting statistics report. \"no_counters\" report contains "
+                           "configuration options specified, resulting FPS and latency. \"average_counters\" "
+                           "report extends \"no_counters\" report and additionally includes average PM "
+                           "counters values for each layer from the network. \"detailed_counters\" report "
+                           "extends \"average_counters\" report and additionally includes per-layer PM "
+                           "counters and latency for each executed infer request.")
+    args.add_argument('--report_folder', type=str, required=False, default='',
+                      help="Optional. Path to a folder where statistics report is stored.")
+    parsed_args = parser.parse_args()
+
+    validate_args(parsed_args)
+
+    return parsed_args
index 9b6a061..7042cb2 100644 (file)
@@ -1,17 +1,4 @@
-joblib==0.13.2
-nibabel==2.4.1
-numpy==1.16.4
-opencv-python==4.1.0.25
-Pillow==6.0.0
-pkg-resources==0.0.0
-progress==1.5
-py-cpuinfo==5.0.0
-PyYAML==5.1.1
-scikit-learn==0.21.2
-scipy==1.3.0
-Shapely==1.6.4.post2
-six==1.12.0
-sklearn==0.0
-tqdm==4.32.2
-xmltodict==0.12.0
-yamlloader==0.5.5
+py-cpuinfo
+numpy
+progress
+opencv-python
\ No newline at end of file
index 219bc0b..06193db 100644 (file)
 # Python* Calibration Tool
 
-The Python* Calibration Tool calibrates a given FP32 model so that you can run calibrated model in low-precision 8-bit integer mode while keeping the input data of this model in the original precision.
-The Calibration Tool is a Python\* command-line tool, which imports Python types from the `openvino.tools.calibration` package.
+## Introduction
+
+The Calibration Tool quantizes a given FP16 or FP32 model and produces a low-precision 8-bit integer (INT8) model with keeping model inputs in the original precision. To learn more about benefits of inference in INT8 precision, refer to [Using Low-Precision 8-bit Integer Inference](./docs/IE_DG/Int8Inference.md).
 
 > **NOTE**: INT8 models are currently supported only by the CPU plugin. For the full list of supported configurations, see the [Supported Devices](./docs/IE_DG/supported_plugins/Supported_Devices.md) topic.
 
-## Hardware requirements
-Hardware requirements depend on a model. Typically for public models RAM memory size has to be not less then 16Gb, drive has to have not less then 30 GB free space independently on operation system. Temporary directory is used to cache layers output during calibration.
+You can run the Calibration Tool in two modes: 
+
+* The **standard mode** performs quantization with the minimal accuracy drop within specified threshold compared to accuracy of the original model. This mode utilizes the [Accuracy Checker tool](./tools/accuracy_checker/README.md) to measure accuracy during the calibration process. Use this mode to obtain an INT8 IR that can be directly used in your application
+
+* The **simplified mode** produces the IR that contains plain statistics for each layer which is collected without any accuracy check, meaning that the accuracy of the new IR with statistics might be dramatically low. Therefore, all layers are considered to be executed in INT8. Use this mode to understand a potential performance gain of model conversion to INT8 precision and make a conclusion about running the standard mode routine.
+
+The Calibration Tool is a Python\* command-line tool, which imports Python types from the `openvino.tools.calibration` package.
+
+## System Requirements
+Hardware requirements depend on a model. Typically for public models RAM memory size has to be not less then 16GB, drive has to have not less then 30 GB free space independently on operation system. Temporary directory is used to cache layers output during calibration.
 
 ## Usage
-The Calibration Tool is configured in the same way as the Accuracy Checker. You can also use additional command-line arguments to define calibration-specific parameters.
+You can run the Calibration Tool in either standard or simplified mode with an appropriate set of configuration parameters. 
+
+### Standard Mode
+In the standard mode, the Calibration Tool is configured in the same way as the Accuracy Checker.
+
+> **NOTE**: For consistency reasons, a part of arguments have the same name and meaning as in the Accuracy Checker and can be reused for running the Accuracy Checker. 
+
+For configuring the tool, you can use the following command-line arguments:
+
+**Command-Line arguments common for the Calibration Tool and Accuracy Checker**
 
-### Command-Line Arguments for the Accuracy Checker Tool reused in Calibration Tool
 | Argument                                     | Type   | Description                                             |
 | -------------------------------------------- | ------ | ------------------------------------------------------- |
-| -c, --config                                 | string | Optional. Path to the YML file with local configuration |
-| -d, --definitions                            | string | Optional. Path to the YML file with definitions         |
-| -m, --models                                 | string | Optional. Prefix path to the models and weights. In the simplified mode, it is the path to IR .xml file   |
-| -s, --source                                 | string | Optional. Prefix path to the data source. In the simplified mode, it is the path to a folder with images   |
-| -a, --annotations                            | string | Optional. Prefix path to the converted annotations and datasets meta data |
-| -e, --extensions                             | string | Optional. Prefix path to extensions folder. In simplified mode is a path to extensions library |
-| --cpu_extensions_mode, --cpu-extensions-mode | string | Optional. specified preferable set of processor instruction for automatic searching the CPU extension lib: `avx2` or `sse4` |
-| -C, --converted_models, --converted-models   | string | Optional. Directory to store Model Optimizer converted models. Used for DLSDK launcher only |
-| -M, --model_optimizer, --model-optimizer     | string | Optional. Path to model optimizer Caffe* directory       |
-| --tf_custom_op_config_dir, --tf-custom-op-config-dir | string | Optional. Path to directory with TensorFlow* custom operation configuration files for model optimizer |
-| --tf_obj_detection_api_pipeline_config_path, --tf-obj-detection-api-pipeline-config-path | string | Optional. Path to directory with TensorFlow object detection API pipeline configuration files for the Model Optimizer |
-| --progress                                   | string | Optional. Progress reporter: `bar`, `print` or `None`   |
-| -td, --target_devices, --target-devices      | string | Optional. Space-separated list of devices for infer     |
-| -tt, --target_tags, --target-tags | string   | Optional. Space-separated list of launcher tags for infer        |
-
-### Specific Command Line Arguments for Calibration Tool
+| `-c`, `--config`                                 | string | Required. Path to the YML file with local configuration. |
+| `-d`, `--definitions`                            | string | Optional. Path to the YML file with definitions.         |
+| `-m`, `--models`                                 | string | Optional. Prefix path to the models and weights.   |
+| `-s`, `--source`                                 | string | Optional. Prefix path to the data source.    |
+| `-a`, `--annotations`                            | string | Optional. Prefix path to the converted annotations and datasets meta data. |
+| `-e`, `--extensions`                             | string | Optional. Prefix path to extensions folder.              |
+| `--cpu_extensions_mode`, `--cpu-extensions-mode` | string | Optional. Preferable set of processor instruction for automatic searching the CPU extension lib: `avx2` or `sse4`. |
+| `-C`, `--converted_models`, `--converted-models`   | string | Optional. Directory to store Model Optimizer converted models.|
+| `-M`, `--model_optimizer`, `--model-optimizer`     | string | Optional. Path to model optimizer Caffe* directory.       |
+| `--tf_custom_op_config_dir`, `--tf-custom-op-config-dir` | string | Optional. Path to directory with TensorFlow* custom operation configuration files for model optimizer. |
+| `--tf_obj_detection_api_pipeline_config_path`, `--tf-obj-detection-api-pipeline-config-path` | string | Optional. Path to directory with TensorFlow object detection API pipeline configuration files for the Model Optimizer. |
+| `--progress`                                   | string | Optional. Progress reporter: `bar`, `print` or `None`   |
+| `-td`, `--target_devices`, `--target-devices`      | string | Optional. Space-separated list of devices for infer     |
+| `-tt`, `--target_tags`, `--target-tags`            | string | Optional. Space-separated list of launcher tags for infer        |
+
+**Command Line Arguments specific for Calibration Tool**
+
 | Argument                          | Type   | Description                                               |
 | --------------------------------- | ------ | --------------------------------------------------------- |
-| -p, --precision                   | string | Optional. Precision to calibrate. Default value is INT8. In the simplified mode, determines output IR precision   |
-| --ignore_layer_types, --ignore-layer-types | string | Optional. Layer types list which will be skipped during quantization |
-| --ignore_layer_types_path, --ignore-layer-types-path | string | Optional. Ignore layer types file path |
-| --ignore_layer_names, --ignore-layer-names | string | Optional. Layer names list which will be skipped during quantization |
-| --ignore_layer_names_path, --ignore-layer-names-path | string | Optional. Ignore layer names file path |
-| --batch_size, --batch-size        | integer| Optional. Batch size value. If not specified, the batch size value is determined from IR |
-| -th, --threshold                  | float | Optional. Accuracy drop of quantized model should not exceed this threshold. Should be pointer in percents without percent sign. (1% is default) |
-| -ic, --benchmark_iterations_count, --benchmark-iterations-count | integer | Optional. Benchmark iterations count (1 is default). |
-| -mn, --metric_name, --metric-name | string | Optional. Metric name used during calibration |
-| -mt, --metric_type, --metric-type | string | Optional. Metric type used during calibration |
-| -o, --output_dir, --output-dir    | string | Optional. Directory to store converted models. Original model directory is used if not defined |
-
-### Simplified mode
+| `-p`, `--precision`                   | string | Optional. Precision to calibrate. Default value is INT8. In the simplified mode, determines output IR precision.   |
+| `--ignore_layer_types`, `--ignore-layer-types` | string | Optional. Layer types list which will be skipped during quantization. |
+| `--ignore_layer_types_path`, `--ignore-layer-types-path` | string | Optional. Ignore layer types file path. |
+| `--ignore_layer_names`, `--ignore-layer-names` | string | Optional. Layer names list which will be skipped during quantization. |
+| `--ignore_layer_names_path`, `--ignore-layer-names-path` | string | Optional. Ignore layer names file path. |
+| `--batch_size`, `--batch-size`        | integer| Optional. Batch size value. If not specified, the batch size value is determined from IR. |
+| `-th`, `--threshold`                  | float | Optional. Accuracy drop of quantized model should not exceed this threshold. Should be pointer in percents without percent sign. (1% is default). |
+| `-ic`, `--benchmark_iterations_count`, `--benchmark-iterations-count` | integer | Optional. Benchmark iterations count (1 is default). |
+| `-mn`, `--metric_name`, `--metric-name` | string | Optional. Metric name used during calibration. |
+| `-mt`, `--metric_type`, `--metric-type` | string | Optional. Metric type used during calibration. |
+| `-o`, `--output_dir`, `--output-dir`    | string | Optional. Directory to store converted models. Original model directory is used if not defined. |
+
+### Simplified Mode
+
+The tool in this mode does not use the Accuracy Checker, configuration and annotation files, but you are required to specify paths to an IR .xml file and a dataset folder. Optionally, you can specify a prefix path to an extensions folder and the number of images from the dataset folder:
+
 | Argument                          | Type   | Description                                               |
 | --------------------------------- | ------ | --------------------------------------------------------- |
-| -sm, --simplified_mode, --simplified-mode |   | Optional. If specified, the Calibration Tool collects statistics without searching for optimal data thresholds. |
-| -ss, --subset                     | integer | Optional. This option is used only with --simplified_mode. Specifies a number of images from a folder that is set using `-s` option. |
+| `-sm`, `--simplified_mode`, `--simplified-mode` |   | Required. If specified, the Calibration Tool runs in the simplified mode to collects statistics without searching for optimal data thresholds. |
+| `-m`                                 | string | Required. Path to the IR .xml file.   |
+| `-s`, `--source`                      | string | Optional. Path to a folder with images.  | 
+| `-ss`, `--subset`                     | integer | Optional. This option is used only with `--simplified_mode`. Specifies a number of images from a folder that is set using `-s` option. |
+| `-e`, `--extensions`                             | string | Optional. Prefix path to extensions folder.              |
+| `-td`, `--target_devices`, `--target-devices`      | string | Optional. Space-separated list of devices for infer.     |
+| `-p`, `--precision`                   | string | Optional. Precision to calibrate. Default value is INT8. In the simplified mode, determines output IR precision.   |
+| `-o`, `--output_dir`, `--output-dir`    | string | Optional. Directory to store converted models. Original model directory is used if not defined. |
 
-## Model Calibration Flow
+## Typical Workflow Samples (Standard Mode)
 
 ### Introduction
-The calibration tool read original FP32 model, calibration dataset and create low precision model. Low precision model has two differences from original model:
+The calibration tool reads original FP16 or FP32 models, calibration dataset and creates a low precision model. The low precision model has two differences from the original model:
 1. Per channel statistics are defined. Statistics have minimum and maximum values for each layer and each channel. Model statistics are stored in Inference Engine intermediate representation file (IR) in XML format.
 2. `quantization_level` layer attribute is defined. The attribute defines precision which is used during inference.
 
 ### Prerequisites
-* Model: Tensorflow\* Inception v1. You can download the model from here: https://github.com/tensorflow/models/tree/master/research/slim
+* Model: TensorFlow* Inception v1. You can download the model from here: https://github.com/tensorflow/models/tree/master/research/slim
 * Dataset: ImageNet. You can download ImageNet from here: http://www.image-net.org/download.php
 * YML configuration files: you can find YML configuration files and YML definition file which are used below in `configs` directory:
   - `definitions.yml` - definition file
-  - `inception_v1.yml` - configuration file for Tensorflow\* Inception v1 model
-  - `ncf_config.yml` - configuration file for NCF model in OpenVINO\* Inference Engine Intermediate Representation format
-  - `ssd_mobilenet_v1_coco.yml` - configuration file for Tensorflow\* SSD Mobilenet v1 model
-  - `unet2d.yml` - configuration file for Unet2D mode in in OpenVINO\* Inference Engine Intermediate Representation format
+  - `inception_v1.yml` - configuration file for TensorFlow* Inception v1 model
+  - `ncf_config.yml` - configuration file for NCF model in OpenVINO Inference Engine Intermediate Representation format
+  - `ssd_mobilenet_v1_coco.yml` - configuration file for TensorFlow* SSD Mobilenet v1 model
+  - `unet2d.yml` - configuration file for Unet2D mode in in OpenVINO Inference Engine Intermediate Representation format
 
-If you have custom topology with not supported accuracy metric or not suported custom dataset then you should add some components implementation in `openvino.tools.accuracy_checker` Python\* package yourself. Refer to `openvino.tools.accuracy_checker` documentation how to implement metric and dataset support. 
+If your custom topology does not support accuracy metric or a custom dataset, add some components implementation in `openvino.tools.accuracy_checker` Python\* package yourself. For more information about metric implementation and dataset support, go to the [Accuracy Checker documentation](./tools/accuracy_checker/README.md).
 
 There are steps to calibrate and evaluate result model:
-- Step #1. Convert data annotation files
-- Optional step for low precision model performance estimation.
-- Step #2. Calibration
-- Step #3. Result model evaluation
+1. Convert data annotation files.
+2. (Optional) Estimate low precision model performance.
+3. Calibrate the model.
+4. Evaluate the resulting model.
 
 Additional optional step before calibration is available to rough estimate possible INT8 performance.
 
-### Step #1. Convert Data Annotation Files
+### Convert Data Annotation Files
 Calibration dataset is subset of training dataset. Use Convert Annotation Tool to convert ImageNet\* dataset to Calibration Tool readable data annotation files. Data annotation files describe subset of images which are used during calibration. Command line:
 ```sh
 python convert_annotation.py imagenet --annotation_file /datasets/ImageNet/val.txt --labels_file /datasets/ImageNet/synset_words.txt -ss 2000 -o ~/annotations -a imagenet.pickle -m imagenet.json
 ```
 
-> **NOTE:** For simplicity all command line tools in below steps use the same command line arguments. In practice [Collect Statistics Tool](./inference-engine/tools/collect_statistics_tool/README.md) uses calibration dataset, but [Accuracy Checker Tool](./inference-engine/tools/accuracy_checker_tool/README.md) has to use whole validation dataset.
+> **NOTE:** For simplicity, all command line tools in the steps below use the same command line arguments. In practice [Collect Statistics Tool](./inference-engine/tools/collect_statistics_tool/README.md) uses calibration dataset, but [Accuracy Checker Tool](./tools/accuracy_checker/README.md) has to use the whole validation dataset.
 
 
 | Argument           | Type   | Description                                                                       |
@@ -93,11 +120,11 @@ python convert_annotation.py imagenet --annotation_file /datasets/ImageNet/val.t
 | -M                 | string | Path to model optimizer directory                                                 |
 | --models           | string | Prefix path to the models and weights                                             |
 | --source           | string | Prefix path to the data source                                                    |
-| --annotations      | string | Pefix path to the converted annotations and datasets meta data                    |
-| --converted_models | string | Directory to store Model Optimizer converted models. Used for DLSDK launcher only |
+| --annotations      | string | Prefix path to the converted annotations and datasets meta data                    |
+| --converted_models | string | Directory to store Model Optimizer converted models |
 
 
-### Optional Step for Low Precision Model Performance Estimation
+### (Optional) Estimate Low-Precision Model Performance
 
 Before calibration, you can roughly estimate low precision performance with [Collect Statistics Tool](./inference-engine/tools/collect_statistics_tool/README.md).
 
@@ -109,14 +136,14 @@ Command line:
 python collect_statistics.py --config ~/inception_v1.yml -d ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations --converted_models ~/models
 ```
 
-Result model has statistics which allow you to infer this model in INT8 precision. To measure performance you can use [Benchmark Tool](./inference-engine/tools/benchmark_tool/README.md).
+Result model has statistics which allow you to infer this model in INT8 precision. To measure performance, you can use the [Benchmark App](./inference-engine/ie_bridges/python/sample/benchmark_app/README.md).
 
-### Step #2. Calibration
-During calibration process, the model is ajusted for efficient quantization and minimization of accuracy drop on calibration dataset. Calibration tool produces calibrated model which will be executed in low precision 8 bit quantzed mode after loading into CPU plugin.
+### Calibrate the Model
+During calibration process, the model is adjusted for efficient quantization and minimization of accuracy drop on calibration dataset. Calibration tool produces calibrated model which will be executed in low precision 8-bit quantized mode after loading into CPU plugin.
 
 [Calibration Tool](./inference-engine/tools/calibration_tool/README.md) has flexible and extensible mechanism of enabling new data set and metrics. Each network has its own dedicated network metric and dataset where network was trained. Dataset description and network metrics can be reused for different network.
 
-To plug new dataset you need to develop YML file. To develop new metric you need to develop Python\* module implementing metric and describe in YML. Please, refer to [Accuracy Checker Tool](./inference-engine/tools/accuracy_checker_tool/README.md) for details.
+To plug new dataset you need to develop YML file. To develop new metric you need to develop Python\* module implementing metric and describe in YML. Please, refer to [Accuracy Checker Tool](./tools/accuracy_checker/README.md) for details.
 
 
 Command line example:
@@ -124,17 +151,17 @@ Command line example:
 python calibrate.py --config ~/inception_v1.yml --definition ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --tf_custom_op_config_dir ~/tf_custom_op_configs --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations
 ```
 
-### Step #3. Result model evaluation
-After calibration of the model it worse to evaluate network accuracy on whole validation set using [Accuracy Checker Tool](./inference-engine/tools/accuracy_checker_tool/README.md).
+### Evaluate the Resulting Model 
+After calibration of the model it worse to evaluate network accuracy on whole validation set using [Accuracy Checker Tool](./tools/accuracy_checker/README.md).
 
-#### Step #3.1 Check accuracy
+#### Check accuracy
 Command line:
 ```sh
 python accuracy_check.py --config ~/inception_v1.yml -d ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --tf_custom_op_config_dir ~/tf_custom_op_configs --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations -tf dlsdk -td CPU
 ```
 
-#### Step #3.2 Check performance
-Use `benchmark_app` command line tool to measure latency and throughput for synchronous and asynchronous modes. Note, please, `benchmark_app` command line tool uses converted OpenVINO\* Intermediate Representation model.
+#### Check performance
+Use the [Benchmark App](./inference-engine/samples/benchmark_app/README.md) command line tool to measure latency and throughput for synchronous and asynchronous modes. Note, the Benchmark App command line tool uses converted OpenVINO* Intermediate Representation model.
 
 Command line for synchronous mode:
 
@@ -147,21 +174,10 @@ Command line for the asynchronous mode:
 ./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/inception_v1.xml -d CPU -api async
 ```
 
-#### Optional step to check performance
-You can use Python\* [Benchmark Tool](./inference-engine/tools/benchmark_tool/README.md) command line tool to quickly check performance with the same command line arguments and configuration YML files as for [Calibration Tool](./inference-engine/tools/calibration_tool/README.md).
-
-Command line:
-```sh
-python benchmark.py --config ~/inception_v1.yml -d ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --tf_custom_op_config_dir ~/tf_custom_op_configs --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations --converted_models ~/models
-```
-
-## Simplified Mode Flow
-
-The Calibration Tool in the simplified mode helps to quickly estimate performance of a model. It converts all possible layers into INT8 and collects statistics without achieving needed accuracy. The tool generates new IR, which is used in performance tests. Therefore, the tool in this mode does not use Accuracy Checker, configuration and annotation files, but you should specify paths to an IR .xml file and a dataset folder. Optionally, you can specify a path to an extensions library and the number of images from the dataset folder. In simplified mode path to an extensions is a path to extensions library.
+## Typical Workflow Samples (Simplified Mode)
 
 To run the Calibration Tool in the simplified mode, use the following command:
 ```sh
-python3 calibrate.py -sm -m <path-to-ir.xml> -s <path-to-dataset> -ss img_num -e <path-to-extensions-library> -td target_device
+python3 calibrate.py -sm -m <path-to-ir.xml> -s <path-to-dataset> -ss <images-number> -e <path-to-extensions-folder> -td <target-device> -precision <output-ir-precision> --output-dir <output-directory-path>
 ```
-It accepts models with FP32, FP16 precisions and images as dataset.
-
+It accepts models with FP32, FP16 precisions and image files as the dataset.
\ No newline at end of file
index ca082f5..a1c9168 100644 (file)
@@ -53,4 +53,8 @@ target_link_libraries(${TARGET_NAME} ${TARGET_NAME_LIB} gflags)
 if(UNIX)
     target_link_libraries(${TARGET_NAME} ${LIB_DL} pthread)
 endif()
-set_ie_threading_interface_for(${TARGET_NAME_LIB})
\ No newline at end of file
+set_ie_threading_interface_for(${TARGET_NAME_LIB})
+
+# export for python
+
+export(TARGETS ${TARGET_NAME_LIB} NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
index 0b5ca96..31c7ebc 100644 (file)
@@ -2,11 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <limits>
 #include <stdlib.h>
 #include <cfloat>
 #include <cmath>
 #include <iostream>
-#include <limits>
 #include <vector>
 #include <algorithm>
 #include <stdint.h>
index 8f770c5..85f91a8 100644 (file)
@@ -4,13 +4,13 @@
 
 #pragma once
 
-#include <limits>
 #include <list>
 #include <map>
 #include <mutex>
 #include <unordered_map>
 #include <string>
 #include <vector>
+#include <limits>
 
 struct TensorStatistic {
     TensorStatistic(float* data, size_t count, size_t nbuckets = 1000);
@@ -51,7 +51,7 @@ public:
     void getDataMinMax(const std::string& name, size_t channel, float& min, float& max, float threshold = 100.f);
 protected:
     struct statsPair {
-        float _min = std::numeric_limits<float>::max();;
+        float _min = std::numeric_limits<float>::max();
         float _max = std::numeric_limits<float>::min();
     };
     std::unordered_map<std::string, std::unordered_map<size_t, statsPair>> _data;
index 40c21b4..8a0cbda 100644 (file)
@@ -35,7 +35,7 @@ static const char model_message[] = "Required. Path to an .xml file with a train
 static const char plugin_message[] = "Plugin name. For example, CPU. If this parameter is passed, "
                                      "the sample looks for a specified plugin only.";
 /// @brief Message for assigning cnn calculation to device
-static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA or MYRIAD."
+static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD."
                                             " The application looks for a suitable plugin for the specified device.";
 /// @brief Message for batch argument type
 static const char batch_message[] = "Batch size value. If not specified, the batch size value is taken from IR";
@@ -178,15 +178,10 @@ int main(int argc, char *argv[]) {
         showUsage();
         return ex.exitCode();
     } catch (const UserExceptions& ex) {
-        if (ex.list().size() == 1) {
-            slog::err << "Input problem: " << ex.what() << slog::endl;
-            showUsage();
-            return ex.list().begin()->exitCode();
-        } else {
-            slog::err << "Input problems: \n" << ex.what() << slog::endl;
-            showUsage();
+        slog::err << "Input problems: \n" << ex.what() << slog::endl;
+        showUsage();
+        if (!ex.list().empty())
             return ex.list().begin()->exitCode();
-        }
     } catch (const std::exception& ex) {
         slog::err << ex.what() << slog::endl;
         return 1;
index 4f4f48e..7390c21 100644 (file)
@@ -295,7 +295,10 @@ void StatisticsCollector::fillBlobs(StatisticsCollector* collectorInstance) {
         progress_step = 100lu;
     collectorInstance->_consoleProgress = std::make_shared<ConsoleProgress>(img_number);
 
-    TensorDesc inputDesc = collectorInstance->_cnn_network->getInputsInfo().begin()->second->getTensorDesc();
+    auto inpuInfo = collectorInstance->_cnn_network->getInputsInfo();
+    if (inpuInfo.empty())
+        THROW_IE_EXCEPTION << "Input info is empty";
+    TensorDesc inputDesc = inpuInfo.begin()->second->getTensorDesc();
     const Precision::ePrecision inputPrecision = inputDesc.getPrecision();
 
     PreprocessingOptions preprocessingOptions;
index a3b3333..738b5a8 100644 (file)
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 if(ENABLE_MYRIAD)
+    add_subdirectory(vpu_perfcheck)
     add_subdirectory(vpu_profile)
 endif()
 
index 7d5fe0c..1ffccb9 100644 (file)
@@ -31,7 +31,7 @@
 #include <limits>
 
 #include "vpu_tools_common.hpp"
-#include "vpu/utils/string.hpp"
+#include <vpu/utils/string.hpp>
 #include "samples/common.hpp"
 
 #include "precision_utils.h"
index 94830ba..e093029 100644 (file)
@@ -43,7 +43,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE
     gflags
 )
 
-add_dependencies(${TARGET_NAME} myriadPlugin vpu_copy_firmware)
+add_dependencies(${TARGET_NAME} myriadPlugin)
 
 set_target_properties(${TARGET_NAME} PROPERTIES
     COMPILE_PDB_NAME
index f447330..631c369 100644 (file)
@@ -28,7 +28,7 @@
 #include "inference_engine.hpp"
 #include <vpu/private_plugin_config.hpp>
 #include "samples/common.hpp"
-#include "vpu/utils/string.hpp"
+#include <vpu/utils/string.hpp>
 
 #include "vpu_tools_common.hpp"
 
diff --git a/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt b/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt
new file mode 100644 (file)
index 0000000..34345fd
--- /dev/null
@@ -0,0 +1,69 @@
+#
+# Copyright (c) 2018-2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+function(add_perfcheck_target TARGET_NAME PLUGIN_NAME)
+    find_package(Threads REQUIRED)
+
+    file(GLOB SOURCES *.cpp)
+
+    if(WIN32)
+        file(GLOB WIN_PTHREAD_SOURCES "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/WinPthread/win_pthread.c")
+        file(GLOB_RECURSE SHARED "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/shared/*")
+        list(APPEND SOURCES ${WIN_PTHREAD_SOURCES} ${SHARED})
+    endif()
+
+    add_executable(${TARGET_NAME} ${SOURCES})
+
+    # TODO: enable some day and fix all warnings
+#    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+#        target_compile_options(${TARGET_NAME}
+#            PRIVATE
+#                "-Wall")
+#    endif()
+
+    target_include_directories(${TARGET_NAME}
+        SYSTEM PRIVATE
+            "${IE_MAIN_SOURCE_DIR}/include"
+            "${IE_MAIN_SOURCE_DIR}/src/inference_engine"
+            "${IE_MAIN_SOURCE_DIR}/src/vpu/graph_transformer/include"
+            "${IE_MAIN_SOURCE_DIR}/samples/common/samples"
+            "${IE_MAIN_SOURCE_DIR}/samples/common/format_reader")
+
+    if(WIN32)
+        target_include_directories(${TARGET_NAME}
+                PRIVATE
+                "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/WinPthread"
+                "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/shared/include")
+    endif()
+
+    target_link_libraries(${TARGET_NAME}
+        PRIVATE
+            inference_engine format_reader
+            ${CMAKE_DL_LIBS}
+            Threads::Threads)
+
+    add_dependencies(${TARGET_NAME}
+        ${PLUGIN_NAME} ${ARGN})
+
+    set_target_properties(${TARGET_NAME} PROPERTIES
+        COMPILE_PDB_NAME ${TARGET_NAME})
+
+    add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
+endfunction()
+
+if(ENABLE_MYRIAD)
+    add_perfcheck_target(myriad_perfcheck myriadPlugin)
+endif()
diff --git a/inference-engine/tools/vpu/vpu_perfcheck/main.cpp b/inference-engine/tools/vpu/vpu_perfcheck/main.cpp
new file mode 100644 (file)
index 0000000..5130c87
--- /dev/null
@@ -0,0 +1,788 @@
+/*
+// Copyright (C) 2018-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#if defined(_WIN32)
+#define NOMINMAX
+#endif
+#if (defined(_WIN32) || defined(_WIN64))
+#define WIN32_LEAN_AND_MEAN
+#include "win_pthread.h"
+#else
+#include <pthread.h>
+#endif
+
+#include <cmath>
+#include <chrono>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <functional>
+#include <map>
+#include <algorithm>
+#include <utility>
+#include <iomanip>
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <condition_variable>
+#include <stdio.h>
+#include <ios>
+#include <sys/stat.h>
+#include <w_dirent.h>
+
+#include <inference_engine.hpp>
+#include <common.hpp>
+#include <vpu/vpu_plugin_config.hpp>
+
+static char* m_exename = nullptr;
+
+#if defined(WIN32) || defined(__APPLE__)
+typedef std::chrono::time_point<std::chrono::steady_clock> time_point;
+#else
+typedef std::chrono::time_point<std::chrono::system_clock> time_point;
+#endif
+typedef std::chrono::high_resolution_clock Time;
+typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
+typedef std::chrono::duration<float> fsec;
+
+#define TIMEDIFF(start, end) ((std::chrono::duration_cast<ms>((end) - (start))).count())
+
+class BitMap {
+private:
+    typedef struct {
+        unsigned short type   = 0u;               /* Magic identifier            */
+        unsigned int size     = 0u;               /* File size in bytes          */
+        unsigned int reserved = 0u;
+        unsigned int offset   = 0u;               /* Offset to image data, bytes */
+    } BmpHeader;
+
+    typedef struct {
+        unsigned int size = 0u;                   /* Header size in bytes      */
+        int width = 0, height = 0;                /* Width and height of image */
+        unsigned short planes = 0u;               /* Number of colour planes   */
+        unsigned short bits = 0u;                 /* Bits per pixel            */
+        unsigned int compression = 0u;            /* Compression type          */
+        unsigned int imagesize = 0u;              /* Image size in bytes       */
+        int xresolution = 0, yresolution = 0;     /* Pixels per meter          */
+        unsigned int ncolours = 0u;               /* Number of colours         */
+        unsigned int importantcolours = 0u;       /* Important colours         */
+    } BmpInfoHeader;
+
+public:
+    explicit BitMap(const std::string &filename) {
+        BmpHeader header;
+        BmpInfoHeader infoHeader;
+
+        std::ifstream input(filename, std::ios::binary);
+        if (!input) {
+            return;
+        }
+
+        input.read(reinterpret_cast<char *>(&header.type), 2);
+
+        if (header.type != 'M'*256+'B') {
+            std::cerr << "[BMP] file is not bmp type\n";
+            return;
+        }
+
+        input.read(reinterpret_cast<char *>(&header.size), 4);
+        input.read(reinterpret_cast<char *>(&header.reserved), 4);
+        input.read(reinterpret_cast<char *>(&header.offset), 4);
+
+        input.read(reinterpret_cast<char *>(&infoHeader), sizeof(BmpInfoHeader));
+
+        bool rowsReversed = infoHeader.height < 0;
+        _width = infoHeader.width;
+        _height = abs(infoHeader.height);
+
+        if (infoHeader.bits != 24) {
+            std::cerr << "[BMP] 24bpp only supported. But input has:" << infoHeader.bits << "\n";
+            return;
+        }
+
+        if (infoHeader.compression != 0) {
+            std::cerr << "[BMP] compression not supported\n";
+        }
+
+        int padSize = _width & 3;
+        char pad[3];
+        size_t size = _width * _height * 3;
+
+        _data.reset(new unsigned char[size], std::default_delete<unsigned char[]>());
+
+        input.seekg(header.offset, std::ios::beg);
+
+        // reading by rows in invert vertically
+        for (uint32_t i = 0; i < _height; i++) {
+            uint32_t storeAt = rowsReversed ? i : (uint32_t)_height - 1 - i;
+            input.read(reinterpret_cast<char *>(_data.get()) + _width * 3 * storeAt, _width * 3);
+            input.read(pad, padSize);
+        }
+    }
+
+    ~BitMap() = default;
+
+    size_t _height = 0;
+    size_t _width = 0;
+    std::shared_ptr<unsigned char> _data;
+
+public:
+    size_t size() const { return _width * _height * 3; }
+    size_t width() const { return _width; }
+    size_t height() const { return _height; }
+
+    std::shared_ptr<unsigned char> getData() {
+        return _data;
+    }
+};
+
+#define IECALL(call)                                                                \
+{                                                                                   \
+    if (InferenceEngine::OK != (call)) {                                            \
+        std::cout << #call " failed: " << resp.msg << std::endl;                    \
+        return 1;                                                                   \
+    }                                                                               \
+}
+
+static short f32tof16(float x);
+static float f16tof32(short x);
+static bool loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob);
+static bool loadBinaryTensor(const std::string &binaryFilename, InferenceEngine::Blob::Ptr &blob);
+
+
+static void setConfig(std::map<std::string, std::string>& config,
+                      const std::string& file_config_cl) {
+    config[VPU_CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
+    config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
+    config[VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME)] = CONFIG_VALUE(YES);
+    config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = file_config_cl;
+}
+
+static void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfMap) {
+    std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> perfVec(perfMap.begin(),
+                                                                                             perfMap.end());
+    std::sort(perfVec.begin(), perfVec.end(),
+              [=](const std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo> &pair1,
+                  const std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo> &pair2) -> bool {
+                  return pair1.second.execution_index < pair2.second.execution_index;
+              });
+
+    size_t maxLayerName = 0u, maxExecType = 0u;
+    for (auto & it : perfVec) {
+        maxLayerName = std::max(maxLayerName, it.first.length());
+        maxExecType = std::max(maxExecType, std::strlen(it.second.exec_type));
+    }
+
+    size_t indexWidth = 7, nameWidth = maxLayerName + 5, typeWidth = maxExecType + 5, timeWidth = 10;
+    size_t totalWidth = indexWidth + nameWidth + typeWidth + timeWidth;
+
+    std::cout << std::endl << "Detailed Per Stage Profile" << std::endl;
+    for (size_t i = 0; i < totalWidth; i++)
+        std::cout << "=";
+    std::cout << std::endl;
+    std::cout << std::setw(indexWidth) << std::left << "Index"
+              << std::setw(nameWidth) << std::left << "Name"
+              << std::setw(typeWidth) << std::left << "Type"
+              << std::setw(timeWidth) << std::right << "Time (ms)"
+              << std::endl;
+    for (size_t i = 0; i < totalWidth; i++)
+        std::cout << "-";
+    std::cout << std::endl;
+
+    long long totalTime = 0;
+    for (const auto& p : perfVec) {
+        const auto& stageName = p.first;
+        const auto& info = p.second;
+        if (info.status == InferenceEngine::InferenceEngineProfileInfo::EXECUTED) {
+            std::cout << std::setw(indexWidth) << std::left << info.execution_index
+                      << std::setw(nameWidth) << std::left << stageName
+                      << std::setw(typeWidth) << std::left << info.exec_type
+                      << std::setw(timeWidth) << std::right << info.realTime_uSec / 1000.0
+                      << std::endl;
+
+            totalTime += info.realTime_uSec;
+        }
+    }
+
+    for (int i = 0; i < totalWidth; i++)
+        std::cout << "-";
+    std::cout << std::endl;
+    std::cout << std::setw(totalWidth / 2) << std::right << "Total inference time:"
+              << std::setw(totalWidth / 2 + 1) << std::right << totalTime / 1000.0
+              << std::endl;
+    for (int i = 0; i < totalWidth; i++)
+        std::cout << "-";
+    std::cout << std::endl;
+}
+
+static std::string getAppRealName(const char* name) {
+    std::string filename(name);
+    size_t splitpos = filename.find_last_of('\\');
+    if (std::string::npos == splitpos) {
+        splitpos = filename.find_last_of('/');
+        if (std::string::npos == splitpos) {
+            return filename;
+        }
+    }
+    return filename.substr(splitpos + 1);
+}
+
+static void print_usage() {
+    std::cout << "Usage:" << std::endl << getAppRealName(m_exename) << " <model_path> <img_dir_path> [number of iterations >= 1000]"
+              << " [batch >= 1, default=1] [num_networks, default=1] [config_file_custom_layer, default='']" << std::endl;
+}
+
+static void getBMPFiles(std::vector<std::string> &out, const std::string &directory) {
+    const std::string ext = ".bmp";
+    DIR *dir;
+    dirent *ent;
+    dir = opendir(directory.c_str());
+    if (!dir)
+        return;
+    while ((ent = readdir(dir)) != nullptr) {
+        const std::string file_name = ent->d_name;
+        const std::string full_file_name = directory + "/" + file_name;
+        if ((file_name.length() >= ext.length())
+            && (0 == file_name.compare(file_name.length() - ext.length(), ext.length(), ext))) {
+            // proceed
+        } else {
+            continue;
+        }
+        struct stat st;
+        if (stat(full_file_name.c_str(), &st) == -1)
+            continue;
+        const bool is_directory = (st.st_mode & S_IFDIR) != 0;
+        if (is_directory)
+            continue;
+        out.push_back(full_file_name);
+    }
+    closedir(dir);
+}
+
+static void getBINFiles(std::vector<std::string> &out, const std::string &directory) {
+    const std::string ext = ".bin";
+    DIR *dir;
+    dirent *ent;
+    dir = opendir(directory.c_str());
+    if (!dir)
+        return;
+    while ((ent = readdir(dir)) != nullptr) {
+        const std::string file_name = ent->d_name;
+        const std::string full_file_name = directory + "/" + file_name;
+        if ((file_name.length() >= ext.length())
+            && (0 == file_name.compare(file_name.length() - ext.length(), ext.length(), ext))) {
+            // proceed
+        } else {
+            continue;
+        }
+        struct stat st;
+        if (stat(full_file_name.c_str(), &st) == -1)
+            continue;
+        const bool is_directory = (st.st_mode & S_IFDIR) != 0;
+        if (is_directory)
+            continue;
+        out.push_back(full_file_name);
+    }
+    closedir(dir);
+}
+
+int num_requests = 4;
+
+#define MIN_ITER 1000
+
+#define USE_CALLBACK
+
+int niter;
+std::atomic<int> iterations_to_run;
+std::mutex done_mutex;
+std::condition_variable alldone;
+int reallydone = 0;
+
+std::vector<time_point> iter_start;
+std::vector<time_point> iter_end;
+std::vector<double> iter_time;
+
+const int profile = 0;
+std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
+
+int process(const std::string& modelFileName, const std::string& inputsDir,
+            std::string& file_config_cl, int nBatch, int num_networks) {
+    InferenceEngine::ResponseDesc resp;
+
+    niter /= nBatch;
+    num_requests = num_requests * num_networks;
+
+    // add some more requests. they'll be excluded on performance measurement
+    niter += 2 * 2 * num_requests;
+
+    if (pthread_setname_np(
+#ifndef __APPLE__
+    pthread_self(),
+#endif
+    "MainThread") != 0) {
+        perror("Setting name for main thread failed");
+    }
+
+    InferenceEngine::PluginDispatcher disp;
+    InferenceEngine::InferenceEnginePluginPtr plugin(
+        disp.getPluginByName(std::string("myriadPlugin") + IE_BUILD_POSTFIX));
+
+    std::cout << "InferenceEngine: " << std::endl;
+
+    const InferenceEngine::Version *pluginVersion = nullptr;
+    plugin->GetVersion(pluginVersion);
+    std::cout << pluginVersion << std::endl << std::endl;
+
+    InferenceEngine::CNNNetReader netReader;
+    netReader.ReadNetwork(modelFileName);
+
+    std::string binFileName = fileNameNoExt(modelFileName) + ".bin";
+    netReader.ReadWeights(binFileName);
+
+    std::ifstream file(file_config_cl);
+    if (!file.is_open()) {
+        file_config_cl.clear();
+    }
+
+    std::vector<std::string> pictures;
+    getBMPFiles(pictures, inputsDir);
+    int numPictures = pictures.size();
+
+    std::vector<std::string> binaries;
+    getBINFiles(binaries, inputsDir);
+    int numBinaries = binaries.size();
+
+    if (pictures.empty() && binaries.empty()) {
+        std::cout << inputsDir << " directory doesn't contain input files" << std::endl;
+        return 1;
+    }
+
+    InferenceEngine::CNNNetwork cnnNetwork = netReader.getNetwork();
+
+    if (nBatch != 1) {
+        std::cout << "Setting batch to : "<< nBatch << "\n";
+        cnnNetwork.setBatchSize(nBatch);
+    }
+
+    InferenceEngine::InputsDataMap networkInputs;
+    networkInputs = cnnNetwork.getInputsInfo();
+    InferenceEngine::OutputsDataMap networkOutputs;
+    networkOutputs = cnnNetwork.getOutputsInfo();
+
+    for (auto &input : networkInputs) {
+        input.second->setPrecision(InferenceEngine::Precision::FP16);
+    }
+
+    for (auto &output : networkOutputs) {
+        output.second->setPrecision(InferenceEngine::Precision::FP16);
+    }
+
+    std::vector<InferenceEngine::IExecutableNetwork::Ptr> exeNetwork(num_networks);
+    std::map<std::string, std::string> networkConfig;
+    setConfig(networkConfig, file_config_cl);
+
+    for (int n = 0; n < num_networks; ++n) {
+        if (num_networks > 1)
+            printf("Load network %d...\n", n);
+        else
+            printf("Load network... \n");
+        fflush(stdout);
+        IECALL(plugin->LoadNetwork(exeNetwork[n], cnnNetwork, networkConfig, &resp));
+    }
+
+    std::vector<InferenceEngine::IInferRequest::Ptr> request(num_requests);
+    iter_start.resize(niter);
+    iter_end.resize(niter);
+    iter_time.resize(niter);
+
+    iterations_to_run = niter - num_requests;
+
+    for (int r = 0, idxPic = 0; r < num_requests; ++r) {
+        int n = r % num_networks;
+        IECALL(exeNetwork[n]->CreateInferRequest(request[r], &resp));
+
+        for (auto &input : networkInputs) {
+            InferenceEngine::Blob::Ptr inputBlob;
+            IECALL(request[r]->GetBlob(input.first.c_str(), inputBlob, &resp));
+
+            const auto& dims = inputBlob->getTensorDesc().getDims();
+            auto layout = inputBlob->getTensorDesc().getLayout();
+
+            // number of channels is 3 for Image, dims order is always NCHW
+            const bool isImage = ((layout == InferenceEngine::NHWC || layout == InferenceEngine::NCHW) && dims[1] == 3);
+
+            if (isImage && (numPictures > 0)) {
+                if (!loadImage(pictures[(idxPic++) % numPictures], inputBlob))
+                    return 1;
+            } else if (numBinaries > 0) {
+                if (!loadBinaryTensor(binaries[(idxPic++) % numBinaries], inputBlob))
+                    return 1;
+            }   else {
+                std::cout << inputsDir << " directory doesn't contain correct input files" << std::endl;
+                return 1;
+            }
+        }
+
+        IECALL(request[r]->SetCompletionCallback(
+                [](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) {
+                    if (code != InferenceEngine::OK) {
+                        std::cout << "Infer failed: " << code << std::endl;
+                        exit(1);
+                    }
+
+                    int iter = --iterations_to_run;
+                    int reqIdx = (niter - iter - 1) - num_requests;
+
+                    iter_end[reqIdx] = Time::now();
+
+                    InferenceEngine::ResponseDesc resp;
+                    if (profile && (reqIdx == niter / 2)) {
+                        request->GetPerformanceCounts(perfMap, &resp);
+                    }
+
+                    if (iter >= 0) {
+                        iter_start[reqIdx + (num_requests)] = Time::now();
+                        if (InferenceEngine::OK != request->StartAsync(&resp)) {
+                            std::cout << "StartAsync failed: " << resp.msg << std::endl;
+                            exit(1);
+                        }
+                    }
+
+                    iter_time[reqIdx] = TIMEDIFF(iter_start[reqIdx], iter_end[reqIdx]);
+                    // printf("request#%d %fms\n", reqIdx, iter_time[reqIdx]);
+
+                    if (iter == -num_requests) {
+                        reallydone = 1;
+                        alldone.notify_all();
+                    }
+                }));
+    }
+
+    printf("Inference started. Running %d iterations...\n", niter - 2 * 2 * num_requests);
+    fflush(stdout);
+    for (int r = 0; r < num_requests; ++r) {
+        iter_start[r] = Time::now();
+        IECALL(request[r]->StartAsync(&resp));
+    }
+
+    {
+        std::unique_lock<std::mutex> lock(done_mutex);
+        alldone.wait(lock, [&](){return reallydone;});
+    }
+
+    // check 10 time intervals to get min/max fps values
+    const int fps_checks = 10;
+    // exclude (2 * num_requests) first and last iterations
+    int num_exclude = 2 * num_requests;
+    time_point cstart = iter_end[num_exclude - 1];
+    time_point cend = iter_end[niter - num_exclude - 1];
+
+    double totalTime = (std::chrono::duration_cast<ms>(cend - cstart)).count();
+    std::cout << std::endl << "Total time: " << (totalTime) << " ms" << std::endl;
+
+    std::cout << "Average fps on " << (niter - 2 * num_exclude) << " iterations"
+              << (nBatch == 1 ? ": " : (" of " + std::to_string(nBatch) + " frames: "))
+              << static_cast<double>(niter - 2 * num_exclude) * 1000.0 * nBatch / (totalTime) << " fps" << std::endl;
+
+    double check_time = totalTime / fps_checks;
+
+    double min_fps = 100000;
+    double max_fps = -100000;
+    int citer = num_exclude;
+    for (int f = 0; f < fps_checks; ++f) {
+        int fiter = 0;
+        auto fend = (f < fps_checks - 1) ? cstart + std::chrono::microseconds((unsigned int)(check_time * 1000)) : cend;
+        while ((citer + fiter < niter - num_exclude) && iter_end[citer + fiter] <= fend) {
+            fiter++;
+        }
+
+        double ffps = 1000 * fiter * nBatch / (check_time);
+        min_fps = std::min(min_fps, ffps);
+        max_fps = std::max(max_fps, ffps);
+        citer += fiter;
+        cstart = fend;
+    }
+
+    std::cout << "Min fps: " << min_fps << std::endl;
+    std::cout << "Max fps: " << max_fps << std::endl;
+
+    if (profile) {
+        printPerformanceCounts(perfMap);
+    }
+
+    return 0;
+}
+
+int main(int argc, char *argv[]) {
+    niter = MIN_ITER;
+    int num_networks = 1;
+    int nBatch = 1;
+    std::string file_config_cl;
+
+    m_exename = argv[0];
+
+    if (argc < 3) {
+        print_usage();
+        return 0;
+    }
+
+    auto parse = [](const std::string& src) {
+        try {
+            return std::stol(src, nullptr, 0);
+        } catch (const std::invalid_argument& exception) {
+            std::cout << "Cannot perform conversion for " << src << ": " << exception.what() << std::endl;
+            print_usage();
+            std::abort();
+        } catch (const std::out_of_range& exception) {
+            std::cout << src << " is out of range: " << exception.what() << std::endl;
+            print_usage();
+            std::abort();
+        } catch (...) {
+            std::cout << "Unexpected exception" << std::endl;
+            print_usage();
+            std::abort();
+        }
+    };
+
+    if (argc > 3) {
+        niter = static_cast<int>(parse(argv[3]));
+    }
+
+    if (argc > 4) {
+        nBatch = static_cast<int>(parse(argv[4]));
+    }
+
+    if (argc > 5) {
+        num_networks = static_cast<int>(parse(argv[5]));
+    }
+
+    if (argc > 6) {
+        file_config_cl = std::string(argv[6]);
+    }
+
+    if (niter < MIN_ITER) {
+        print_usage();
+        return 0;
+    }
+
+    if (num_networks < 1 || num_networks > 16) {
+        print_usage();
+        return 0;
+    }
+
+    if (nBatch < 1) {
+        print_usage();
+        return 0;
+    }
+
+    try {
+        std::string modelFileName(argv[1]);
+        std::string inputsDir(argv[2]);
+        return process(modelFileName, inputsDir, file_config_cl, nBatch, num_networks);
+    }
+    catch (const std::exception& ex) {
+        std::cout << ex.what();
+    }
+
+    return -1;
+}
+
+inline float asfloat(uint32_t v) {
+    return *reinterpret_cast<float *>(&v);
+}
+
+#define EXP_MASK_F32 0x7F800000U
+#define EXP_MASK_F16     0x7C00U
+
+static short f32tof16(float x) {
+    static float min16 = asfloat((127 - 14) << 23);
+
+    static float max16 = asfloat(((127 + 15) << 23) | 0x007FE000);
+    static uint32_t max16f16 = ((15 + 15) << 10) | 0x3FF;
+
+    union {
+        float f;
+        uint32_t u;
+    } v{};
+    v.f = x;
+
+    uint32_t s = (v.u >> 16) & 0x8000;
+
+    v.u &= 0x7FFFFFFF;
+
+    if ((v.u & EXP_MASK_F32) == EXP_MASK_F32) {
+        if (v.u & 0x007FFFFF) {
+            return s | (v.u >> (23 - 10)) | 0x0200;
+        } else {
+            return s | (v.u >> (23 - 10));
+        }
+    }
+
+    float halfULP = asfloat(v.u & EXP_MASK_F32) * asfloat((127 - 11) << 23);
+    v.f += halfULP;
+
+    if (v.f < min16 * 0.5F) {
+        return s;
+    }
+
+    if (v.f < min16) {
+        return s | (1 << 10);
+    }
+
+    if (v.f >= max16) {
+        return max16f16 | s;
+    }
+
+    v.u -= ((127 - 15) << 23);
+
+    v.u >>= (23 - 10);
+
+    return v.u | s;
+}
+
+static float f16tof32(short x) {
+    // this is storage for output result
+    uint32_t u = x;
+
+    // get sign in 32bit format
+    uint32_t s = ((u & 0x8000) << 16);
+
+    // check for NAN and INF
+    if ((u & EXP_MASK_F16) == EXP_MASK_F16) {
+        // keep mantissa only
+        u &= 0x03FF;
+
+        // check if it is NAN and raise 10 bit to be align with intrin
+        if (u) {
+            u |= 0x0200;
+        }
+
+        u <<= (23 - 10);
+        u |= EXP_MASK_F32;
+        u |= s;
+    } else if ((x & EXP_MASK_F16) == 0) {  // check for zero and denormals. both are converted to zero
+        u = s;
+    } else {
+        // abs
+        u = (u & 0x7FFF);
+
+        // shift mantissa and exp from f16 to f32 position
+        u <<= (23 - 10);
+
+        // new bias for exp (f16 bias is 15 and f32 bias is 127)
+        u += ((127 - 15) << 23);
+
+        // add sign
+        u |= s;
+    }
+
+    // finaly represent result as float and return
+    return *reinterpret_cast<float *>(&u);
+}
+
+static bool loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob) {
+    InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc();
+    if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) {
+        std::cout << "loadImage error: Input must have FP16 precision" << std::endl;
+        return false;
+    }
+
+    if (tensDesc.getLayout() != InferenceEngine::NHWC && tensDesc.getLayout() != InferenceEngine::NCHW) {
+        std::cout << "loadImage error: Input must have NCHW or NHWC layout" << std::endl;
+        return false;
+    }
+
+    BitMap reader(imageFilename);
+
+    const auto dims = tensDesc.getDims();
+    auto numBlobChannels = dims[1];
+    size_t batch = dims[0];
+    size_t w = dims[3];
+    size_t h = dims[2];
+    size_t img_w = reader.width();
+    size_t img_h = reader.height();
+
+    size_t numImageChannels = reader.size() / (reader.width() * reader.height());
+    if (numBlobChannels != numImageChannels && numBlobChannels != 1) {
+        std::cout << "loadImage error: Input channels mismatch: image channels " << numImageChannels << ", "
+                  << "network channels " << numBlobChannels << ", expecting count of image channels are equal "
+                  << "to count if network channels or count of network channels are equal to 1" << std::endl;
+        return false;
+    }
+
+    int16_t *blobDataPtr = std::dynamic_pointer_cast<InferenceEngine::TBlob<int16_t>>(blob)->data();
+    auto nPixels = w * h;
+    unsigned char *RGB8 = reader.getData().get();
+    float xscale = 1.0f * img_w / w;
+    float yscale = 1.0f * img_h / h;
+
+    for (int n = 0; n != batch; n++) {
+        for (int i = 0; i < h; ++i) {
+            int y = static_cast<int>(std::floor((i + 0.5f) * yscale));
+            for (int j = 0; j < w; ++j) {
+                int x = static_cast<int>(std::floor((j + 0.5f) * xscale));
+                for (int k = 0; k < numBlobChannels; k++) {
+                    if (tensDesc.getLayout() == InferenceEngine::NHWC) {
+                        blobDataPtr[n * h * w * numBlobChannels + (i * w + j) * numBlobChannels + k] =
+                                f32tof16(1.0 * RGB8[(y * img_w + x) * numImageChannels + k]);
+                    } else {
+                        blobDataPtr[n * h * w * numBlobChannels + (i * w + j) + k * nPixels] =
+                                f32tof16(1.0 * RGB8[(y * img_w + x) * numImageChannels + k]);
+                    }
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+bool loadBinaryTensor(const std::string &binaryFilename, InferenceEngine::Blob::Ptr &blob) {
+    InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc();
+    if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) {
+        std::cout << "loadBinaryTensor error: Input must have FP16 precision" << std::endl;
+        return false;
+    }
+
+    std::ifstream binaryFile(binaryFilename, std::ios_base::binary | std::ios_base::ate);
+
+    if (!binaryFile) {
+        std::cout << "loadBinaryTensor error: While opening a file an error is encountered" << std::endl;
+        return false;
+    }
+
+    int fileSize = binaryFile.tellg();
+    binaryFile.seekg(0, std::ios_base::beg);
+    size_t count = blob->size();
+    if (fileSize != count * sizeof(float)) {
+        std::cout << "loadBinaryTensor error: File contains insufficient items" << std::endl;
+        return false;
+    }
+
+    if (binaryFile.good()) {
+        int16_t *blobDataPtr = std::dynamic_pointer_cast<InferenceEngine::TBlob<int16_t>>(blob)->data();
+        for (size_t i = 0; i < count; i++) {
+            float tmp = 0.f;
+            binaryFile.read(reinterpret_cast<char *>(&tmp), sizeof(float));
+            blobDataPtr[i] = f32tof16(tmp);
+        }
+    } else {
+        std::cout << "loadBinaryTensor error: While reading a file an error is encountered" << std::endl;
+        return false;
+    }
+    return true;
+}
index a7d818f..f5e57fb 100644 (file)
@@ -25,7 +25,7 @@ vpu_profile [OPTIONS]
        -inputs_dir      <value>        Path to folder with images, only bitmap(.bmp) supported. Default: ".".
        -config          <value>        Path to the configuration file. Default value: "config".
        -iterations      <value>        Specifies number of iterations. Default value: 16.
-       -plugin          <value>        Specifies plugin. Supported values: myriad.
+       -plugin          <value>        Specifies plugin. Supported values: myriad, hddl.
                                        Default value: "myriad".
        -report          <value>        Specifies report type. Supported values: per_layer, per_stage.
                                        Overrides value in configuration file if provided. Default value: "per_layer"
@@ -40,6 +40,15 @@ $./vpu_profile -model <path_to_model>/model_name.xml
 ```
 > **NOTE**: Models should be first converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer).
 
+## Plugin Option
+
+You have to select between Myriad and HDDL plugin manually, by default vpu_profile will try to use myriad plugin
+If you need to run HDDL, need to set it explicitly
+
+```sh
+$./vpu_profile -model <path_to_model>/model_name.xml -plugin hddl
+```
+
 ## Iterations Option
 
 Sets amount of Infer requests to be executed, will affect overall inference time, performance counts will be reported for last iteration
index 592a4e9..8403611 100644 (file)
@@ -43,7 +43,7 @@ static constexpr char model_message[]       = "Path to xml model.";
 static constexpr char inputs_dir_message[]  = "Path to folder with images, only bitmap(.bmp) supported. Default: \".\".";
 static constexpr char config_message[]      = "Path to the configuration file. Default value: \"config\".";
 static constexpr char iterations_message[]  = "Specifies number of iterations. Default value: 16.";
-static constexpr char plugin_message[]      = "Specifies plugin. Supported values: myriad.\n"
+static constexpr char plugin_message[]      = "Specifies plugin. Supported values: myriad, hddl.\n"
     "\t            \t         \tDefault value: \"myriad\".";
 static constexpr char report_message[]      = "Specifies report type. Supported values: per_layer, per_stage.\n"
     "\t            \t         \tOverrides value in configuration file if provided. Default value: \"per_stage\"";
index 8bfe217..9a111d9 100644 (file)
@@ -4,17 +4,15 @@ Project structure:
 <pre>
     |-- root
         |-- extensions
-            |-- front/ - graph transformations during front phase
-            |-- middle/ - graph transformations during middle phase (after partial inference)
-            |-- end/  - graph transformations during back phase (before IR generation) 
-            |-- ops/ - Model Optimizer operation classes
+            |-- front/caffe
+                |-- CustomLayersMapping.xml.example - example of file for registering custom Caffe layers in 2017R3 public
+                manner
         |-- mo
             |-- back - Back-End logic: contains IR emitting logic
-            |-- front - Front-End logic: contains matching between Framework-specific layers and IR specific, 
-                        calculation of output shapes for each registered layer
+            |-- front - Front-End logic: contains matching between Framework-specific layers and IR specific, calculation
+            of output shapes for each registered layer
             |-- graph - Graph utilities to work with internal IR representation
             |-- middle - Graph transformations - optimizations of the model
-            |-- ops - Model Optimizer operation classes
             |-- pipeline - Sequence of steps required to create IR for each framework
             |-- utils - Utility functions
         |-- tf_call_ie_layer - Sources for TensorFlow fallback in Inference Engine during model inference
@@ -22,18 +20,25 @@ Project structure:
         |-- mo_caffe.py - Entry point particularly for Caffe
         |-- mo_mxnet.py - Entry point particularly for MXNet
         |-- mo_tf.py - Entry point particularly for TensorFlow
-
+        |-- ModelOptimizer - Entry point particularly for Caffe that contains same CLI as 2017R3 publicly released
+        Model Optimizer
 </pre>
 
 ## Prerequisites
 
 Model Optimizer requires:
 
-1. Python 3.4 or newer
+1. Python 3 or newer
+
+2. [Optional] Please read about use cases that require Caffe available on the machine (:doc:`caffe_dependency`).
+   Please follow the steps described (:doc:`caffe_build`).
 
 ## Installation instructions
 
-1. Go to the Model Optimizer folder
+1. Go to the Model Optimizer folder:
+<pre>
+    cd PATH_TO_INSTALL_DIR/deployment_tools/model_optimizer/model_optimizer_tensorflow
+</pre>
 
 2. Create virtual environment and activate it. This option is strongly recommended as it creates a Python sandbox and
    dependencies for Model Optimizer do not influence global Python configuration, installed libraries etc. At the same
@@ -41,9 +46,13 @@ Model Optimizer requires:
    step only if you do want to install all Model Optimizer dependencies globally:
 
     * Create environment:
-          <pre>virtualenv -p /usr/bin/python3.6 .env3 --system-site-packages</pre>
+        <pre>
+          virtualenv -p /usr/bin/python3.6 .env3 --system-site-packages
+        </pre>
     * Activate it:
-      <pre>. .env3/bin/activate</pre>
+      <pre>
+        . .env3/bin/activate
+      </pre>
 3. Install dependencies. If you want to convert models only from particular framework, you should use one of
    available <code>requirements_*.txt</code> files corresponding to the framework of choice. For example, for Caffe use
    <code>requirements_caffe.txt</code> and so on. When you decide to switch later to other frameworks, please install dependencies
diff --git a/model-optimizer/extensions/back/CutMemory.py b/model-optimizer/extensions/back/CutMemory.py
new file mode 100644 (file)
index 0000000..65488fc
--- /dev/null
@@ -0,0 +1,65 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.back.ParameterToPlaceholder import ParameterToInput
+from extensions.ops.parameter import Parameter
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
+from mo.ops.crop import Crop
+from mo.utils.logger import log
+
+
+class CutMemory(BackReplacementPattern):
+    """
+    Cut Memory layers and have inputs/outputs in graph instead of them
+    """
+    enabled = False
+
+    def run_before(self):
+        return [ParameterToInput]
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[
+                ('op', dict(kind='op', op='Memory'))],
+            edges=[]
+        )
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        node = match['op']
+        node_id = node['id']
+
+        if node.in_port(0).disconnected():
+            i = 0
+            for dest in node.out_port(0).get_destinations():
+                new_in = Parameter(graph, {'name': "Parameter_"+str(i)+"_for_"+node_id,
+                                           'shape': dest.data.get_shape()}).create_node()
+                i += 1
+                dest.disconnect()
+                new_in.out_port(0).connect(dest)
+                log.error("Add input/output mapped {} -> {} ".format(new_in.name, "Result_for_"+node_id),
+                          extra={'is_warning': True})
+        else:
+            out_node_port = node.out_port(0).get_destination()
+            in_node_port = node.in_port(0).get_source()
+            node.in_port(0).disconnect()
+            node.out_port(0).disconnect()
+            crop = Crop(graph, {'name': 'Result_for_'+node_id, 'dim': np.array([1]), 'offset': np.array([0]), 'axis': np.array([0])}).create_node()
+            in_node_port.connect(crop.in_port(0))
+            crop.out_port(0).connect(out_node_port)
diff --git a/model-optimizer/extensions/back/CutMemory_test.py b/model-optimizer/extensions/back/CutMemory_test.py
new file mode 100644 (file)
index 0000000..911acaa
--- /dev/null
@@ -0,0 +1,71 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+from extensions.back.CutMemory import CutMemory
+from mo.utils.unittest.graph import compare_graphs, build_graph
+
+
+class CutMemoryTest(unittest.TestCase):
+    def test_remove_memory(self):
+        """Memory should be replaced by input and output"""
+        graph = build_graph(
+            nodes_attrs={
+                'input': {'kind': 'op'},
+                'data_in': {'kind': 'data', 'shape': None, 'value': None},
+                'memory_in': {'kind': 'op', 'op': 'Memory', 'index': 1, 'id': 'memory_', 'in_ports_count': 1},
+                'data_mem': {'kind': 'data', 'shape': None, 'value': None},
+                'concat': {'kind': 'op', 'op': 'Concat', 'axis': 0},
+                'concat_data': {'kind': 'data', 'shape': None, 'value': None},
+                'some_op': {'kind': 'op'},
+                'some_op_data': {'kind': 'data', 'shape': None, 'value': None},
+                'memory_out': {'kind': 'op', 'op': 'Memory', 'index': 0, 'id': 'memory_'},
+                'data_mem_out': {'kind': 'data', 'shape': None, 'value': None},
+                'mem_out_result': {'kind': 'op', 'op': 'Result'}
+            },
+            edges=[
+                ('input', 'data_in'), ('memory_in', 'data_mem'),
+                ('data_in', 'concat', {'in': 0}), ('data_mem', 'concat', {'in': 1}),
+                ('concat', 'concat_data'), ('concat_data', 'some_op'),
+                ('some_op', 'some_op_data'), ('some_op_data', 'memory_out'),
+                ('memory_out', 'data_mem_out'), ('data_mem_out', 'mem_out_result')
+            ]
+        )
+        graph_ref = build_graph(
+            nodes_attrs={
+                'input': {'kind': 'op'},
+                'data_in': {'kind': 'data', 'shape': None, 'value': None},
+                'new_input': {'kind': 'op', 'op': 'Parameter'},
+                'new_in_data': {'kind': 'data', 'shape': None, 'value': None},
+                'concat': {'kind': 'op', 'op': 'Concat', 'axis': 0},
+                'concat_data': {'kind': 'data', 'shape': None, 'value': None},
+                'some_op': {'kind': 'op'},
+                'some_op_data': {'kind': 'data', 'shape': None, 'value': None},
+                'crop': {'kind': 'op', 'op': 'Crop', 'axis': 0},
+                'crop_data': {'kind': 'data', 'shape': None, 'value': None},
+                'mem_out_result': {'kind': 'op', 'op': 'Result'},
+            },
+            edges=[
+                ('input', 'data_in'), ('new_input', 'new_in_data'),
+                ('data_in', 'concat', {'in': 0}), ('new_in_data', 'concat', {'in': 1}),
+                ('concat', 'concat_data'), ('concat_data', 'some_op'),
+                ('some_op', 'some_op_data'), ('some_op_data', 'crop'),
+                ('crop', 'crop_data'), ('crop_data', 'mem_out_result')
+            ],
+        )
+        CutMemory().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, last_node='mem_out_result', check_op_attrs=True)
+        self.assertTrue(flag, resp)
index 2f2459f..27ca948 100644 (file)
@@ -46,6 +46,11 @@ class FuseReshapesSequence(BackReplacementPattern):
                 next_op = get_next_operation(node)[0]
                 log.debug('second node: id={}, type={}'.format(next_op.soft_get('id'), next_op.soft_get('type')))
                 if next_op.has_valid('type') and next_op.type == 'Reshape':
+                    dim_value = next_op.in_port(1).data.get_value()
+                    if dim_value is None or 0 in dim_value or -1 in dim_value:
+                        # we do not fuse reshape sequences with special symbols: 0, -1
+                        continue
+
                     # Detected Reshape1 --> data --> Reshape2 pattern without side edges. Remove Reshape1
                     log.debug('Second phase for Reshape: {}'.format(node.soft_get('name')))
                     remove_op_node_with_data_node(graph, node)
diff --git a/model-optimizer/extensions/back/Gather0D.py b/model-optimizer/extensions/back/Gather0D.py
new file mode 100644 (file)
index 0000000..e896bcd
--- /dev/null
@@ -0,0 +1,61 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import logging as log
+
+import numpy as np
+
+from mo.back.replacement import BackReplacementPattern
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
+from mo.ops.const import Const
+from mo.ops.squeeze import Squeeze
+
+
+class Gather0D(BackReplacementPattern):
+    """
+        This is a workaround until InferenceEngine starts support 0D.
+        The pass finds Gather with 0D constant input with indices to gather and converts it to 1D with 1 element and
+        then add Squeeze to restore initial number of dimension.
+    """
+
+    enabled = True
+    force_shape_inference = True
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for gather in graph.get_op_nodes(type='Gather'):
+            indices = gather.in_port(1).get_source().node
+            indices_value = gather.in_port(1).data.get_value()
+            if indices.op == 'Const' and indices_value is not None and indices_value.ndim == 0:
+                log.debug('The Gather node {} has constant 0D input with indices'.format(gather.id))
+
+                new_indices = Const(graph, {'value': np.array([indices_value.item()])}).create_node()
+
+                # the input shape is changed so need to disconnect port first
+                gather.in_port(1).disconnect()
+                gather.in_port(1).connect(new_indices.out_port(0))
+
+                # the output of Gather is changed so need to run shape inference for it and override the existing shape
+                gather['override_output_shape'] = True
+                gather['need_shape_inference'] = True
+
+                # insert Squeeze to remove the dimension 'axis' which become equal to 1 after change of the Gather
+                # indices constant
+                squeeze = Squeeze(graph, {'name': gather.id + '/Squeeze'}).create_node()
+                squeeze_axis = Const(graph, {'name': squeeze.id + '/axis',
+                                             'value': int64_array([gather.axis])}).create_node()
+
+                gather.out_port(0).get_connection().insert_node(squeeze)
+                squeeze.in_port(1).connect(squeeze_axis.out_port(0))
index 9646ade..2ed9022 100644 (file)
@@ -78,6 +78,8 @@ class ReduceReplacer(BackReplacementPattern):
         axis_data_value = node.in_port(1).data.get_value()
         axis = int64_array([axis_data_value.item()]) if axis_data_value.size == 1 else axis_data_value
         axis = [get_canonical_axis_index(input_shape, a) for a in axis]
+        assert 0 not in axis, 'The node "{}" is a Reduce operation for batch dimension which is not supported'.format(
+            node.name)
 
         # Check that values in axis list are consecutive
         for idx in range(1, len(axis)):
@@ -94,10 +96,11 @@ class ReduceReplacer(BackReplacementPattern):
 
         # 2. Create reshape with appropriate shape
         if len(begin_dims) > 2:
-            begin_dims = np.array([np.prod(begin_dims[0:-1]), begin_dims[-1]], dtype=np.int64)
+            begin_dims = int64_array([begin_dims[0], np.prod(begin_dims[1:])])
         else:
             # Expand begin_dims to 2
-            begin_dims = np.array(np.append(begin_dims, [1] * (2 - len(begin_dims))), dtype=np.int64)
+            begin_dims = int64_array(np.append(begin_dims, [1] * (2 - len(begin_dims))))
+
         reshape_shape = np.array([*begin_dims, reduction_dim, end_dim], dtype=np.int64)
         pool_window = np.array([1, 1, reduction_dim, 1], dtype=np.int64)
 
@@ -105,7 +108,8 @@ class ReduceReplacer(BackReplacementPattern):
         reshape_op = Reshape(graph, {'name': node.id + '/Reshape'})
         reshape_dim_const_data = Const(graph, {'name': node.id + '/Reshape/Dim',
                                                'value': reshape_shape}).create_node_with_data()
-        final_reshape_op = Reshape(graph, {'name': node.id + '/FinalReshape', 'dim': output_shape})
+
+        final_reshape_op = Reshape(graph, {'name': node.id + '/FinalReshape'})
         final_reshape_dim_const_data = Const(graph, {'name': node.id + '/FinalReshape/Dim',
                                                      'value': output_shape}).create_node_with_data()
         pooling_op = Pooling(graph,
@@ -127,6 +131,10 @@ class ReduceReplacer(BackReplacementPattern):
             ), final_reshape_dim_const_data],
             data_nodes=output_data)
 
+        # convert batch dimension to 0 to produce reshape-able IR over the batch dimension
+        reshape_dim_const_data.in_node(0).value[0] = 0
+        final_reshape_dim_const_data.in_node(0).value[0] = 0
+
         # 4. If it is reduction with summation, we need to multiply by size of the reduction slice with Mul op
         if reduce_type == 'ReduceSum':
             output_data.in_node().insert_node_with_data_after(
index 8861dab..9d7eec9 100644 (file)
@@ -98,14 +98,14 @@ class ReduceReplacerTest(unittest.TestCase):
                                  ('reshape_2_data', 'concat'),
                                  ],
                                 {'placeholder_1_data': {'shape': int64_array([1, 64, 1])},
-                                 'reshape_1_const': {'value': int64_array([1, 1, 64, 1]), 'shape': int64_array([4])},
-                                 'reshape_1_const_data': {'value': int64_array([1, 1, 64, 1]),
+                                 'reshape_1_const': {'value': int64_array([0, 1, 64, 1]), 'shape': int64_array([4])},
+                                 'reshape_1_const_data': {'value': int64_array([0, 1, 64, 1]),
                                                           'shape': int64_array([4])},
                                  'reshape_1_data': {'shape': int64_array([1, 1, 64, 1])},
                                  'pooling': {'window': int64_array([1, 1, 64, 1])},
                                  'pooling_data': {'shape': int64_array([1, 1, 1, 1])},
-                                 'reshape_2_const': {'value': int64_array([1, 1, 1]), 'shape': int64_array([3])},
-                                 'reshape_2_const_data': {'value': int64_array([1, 1, 1]), 'shape': int64_array([3])},
+                                 'reshape_2_const': {'value': int64_array([0, 1, 1]), 'shape': int64_array([3])},
+                                 'reshape_2_const_data': {'value': int64_array([0, 1, 1]), 'shape': int64_array([3])},
                                  'reshape_2_data': {'shape': int64_array([1, 1, 1])},
                                  }, nodes_with_edges_only=True)
 
@@ -155,14 +155,14 @@ class ReduceReplacerTest(unittest.TestCase):
                                  ],
                                 {'placeholder_1': {'shape': int64_array([1, 3, 64, 64])},
                                  'placeholder_1_data': {'shape': int64_array([1, 3, 64, 64])},
-                                 'reshape_1_const': {'value': int64_array([1, 3, 64, 64]), 'shape': int64_array([4])},
-                                 'reshape_1_const_data': {'value': int64_array([1, 3, 64, 64]),
+                                 'reshape_1_const': {'value': int64_array([0, 3, 64, 64]), 'shape': int64_array([4])},
+                                 'reshape_1_const_data': {'value': int64_array([0, 3, 64, 64]),
                                                           'shape': int64_array([4])},
                                  'reshape_1_data': {'shape': int64_array([1, 3, 64, 64])},
                                  'pooling': {'window': int64_array([1, 1, 64, 1])},
                                  'pooling_data': {'shape': int64_array([1, 3, 1, 64])},
-                                 'reshape_2_const': {'value': int64_array([1, 3, 1, 64]), 'shape': int64_array([4])},
-                                 'reshape_2_const_data': {'value': int64_array([1, 3, 1, 64]),
+                                 'reshape_2_const': {'value': int64_array([0, 3, 1, 64]), 'shape': int64_array([4])},
+                                 'reshape_2_const_data': {'value': int64_array([0, 3, 1, 64]),
                                                           'shape': int64_array([4])},
                                  'reshape_2_data': {'shape': int64_array([1, 3, 1, 64])},
                                  }, nodes_with_edges_only=True)
@@ -213,15 +213,15 @@ class ReduceReplacerTest(unittest.TestCase):
                                  ],
                                 {'placeholder_1': {'shape': int64_array([1, 3, 64, 64])},
                                  'placeholder_1_data': {'shape': int64_array([1, 3, 64, 64])},
-                                 'reshape_1_const': {'value': int64_array([1, 3, 64 * 64, 1]),
+                                 'reshape_1_const': {'value': int64_array([0, 3, 64 * 64, 1]),
                                                      'shape': int64_array([4])},
-                                 'reshape_1_const_data': {'value': int64_array([1, 3, 64 * 64, 1]),
+                                 'reshape_1_const_data': {'value': int64_array([0, 3, 64 * 64, 1]),
                                                           'shape': int64_array([4])},
                                  'reshape_1_data': {'shape': int64_array([1, 3, 64 * 64, 1])},
                                  'pooling': {'window': int64_array([1, 1, 64 * 64, 1])},
                                  'pooling_data': {'shape': int64_array([1, 3, 1, 1])},
-                                 'reshape_2_const': {'value': int64_array([1, 3, 1, 1]), 'shape': int64_array([4])},
-                                 'reshape_2_const_data': {'value': int64_array([1, 3, 1, 1]),
+                                 'reshape_2_const': {'value': int64_array([0, 3, 1, 1]), 'shape': int64_array([4])},
+                                 'reshape_2_const_data': {'value': int64_array([0, 3, 1, 1]),
                                                           'shape': int64_array([4])},
                                  'reshape_2_data': {'shape': int64_array([1, 3, 1, 1])},
                                  }, nodes_with_edges_only=True)
@@ -272,15 +272,15 @@ class ReduceReplacerTest(unittest.TestCase):
                                  ],
                                 {'placeholder_1': {'shape': int64_array([2, 3, 64, 64])},
                                  'placeholder_1_data': {'shape': int64_array([2, 3, 64, 64])},
-                                 'reshape_1_const': {'value': int64_array([2, 1, 3 * 64 * 64, 1]),
+                                 'reshape_1_const': {'value': int64_array([0, 1, 3 * 64 * 64, 1]),
                                                      'shape': int64_array([4])},
-                                 'reshape_1_const_data': {'value': int64_array([2, 1, 3 * 64 * 64, 1]),
+                                 'reshape_1_const_data': {'value': int64_array([0, 1, 3 * 64 * 64, 1]),
                                                           'shape': int64_array([4])},
                                  'reshape_1_data': {'shape': int64_array([2, 1, 3 * 64 * 64, 1])},
                                  'pooling': {'window': int64_array([1, 1, 3 * 64 * 64, 1])},
                                  'pooling_data': {'shape': int64_array([2, 1, 1, 1])},
-                                 'reshape_2_const': {'value': int64_array([2]), 'shape': int64_array([1])},
-                                 'reshape_2_const_data': {'value': int64_array([2]), 'shape': int64_array([1])},
+                                 'reshape_2_const': {'value': int64_array([0]), 'shape': int64_array([1])},
+                                 'reshape_2_const_data': {'value': int64_array([0]), 'shape': int64_array([1])},
                                  'reshape_2_data': {'shape': int64_array([2])},
                                  }, nodes_with_edges_only=True)
 
@@ -330,16 +330,16 @@ class ReduceReplacerTest(unittest.TestCase):
                                  ],
                                 {'placeholder_1': {'shape': int64_array([1, 16, 64, 64, 64, 4])},
                                  'placeholder_1_data': {'shape': int64_array([1, 16, 64, 64, 64, 4])},
-                                 'reshape_1_const': {'value': int64_array([65536, 64, 4, 1]),
+                                 'reshape_1_const': {'value': int64_array([0, 4194304, 4, 1]),
                                                      'shape': int64_array([4])},
-                                 'reshape_1_const_data': {'value': int64_array([65536, 64, 4, 1]),
+                                 'reshape_1_const_data': {'value': int64_array([0, 4194304, 4, 1]),
                                                           'shape': int64_array([4])},
-                                 'reshape_1_data': {'shape': int64_array([65536, 64, 4, 1])},
+                                 'reshape_1_data': {'shape': int64_array([1, 4194304, 4, 1])},
                                  'pooling': {'window': int64_array([1, 1, 4, 1])},
-                                 'pooling_data': {'shape': int64_array([65536, 64, 1, 1])},
-                                 'reshape_2_const': {'value': int64_array([1, 16, 64, 64, 64]),
+                                 'pooling_data': {'shape': int64_array([1, 4194304, 1, 1])},
+                                 'reshape_2_const': {'value': int64_array([0, 16, 64, 64, 64]),
                                                      'shape': int64_array([5])},
-                                 'reshape_2_const_data': {'value': int64_array([1, 16, 64, 64, 64]),
+                                 'reshape_2_const_data': {'value': int64_array([0, 16, 64, 64, 64]),
                                                           'shape': int64_array([5])},
                                  'reshape_2_data': {'shape': int64_array([1, 16, 64, 64, 64])},
                                  }, nodes_with_edges_only=True)
@@ -392,14 +392,14 @@ class ReduceReplacerTest(unittest.TestCase):
                                  ],
                                 {'placeholder_1': {'shape': int64_array([1, 64, 1])},
                                  'placeholder_1_data': {'shape': int64_array([1, 64, 1])},
-                                 'reshape_1_const': {'value': int64_array([1, 1, 64, 1]), 'shape': int64_array([4])},
-                                 'reshape_1_const_data': {'value': int64_array([1, 1, 64, 1]),
+                                 'reshape_1_const': {'value': int64_array([0, 1, 64, 1]), 'shape': int64_array([4])},
+                                 'reshape_1_const_data': {'value': int64_array([0, 1, 64, 1]),
                                                           'shape': int64_array([4])},
                                  'reshape_1_data': {'shape': int64_array([1, 1, 64, 1])},
                                  'pooling': {'window': int64_array([1, 1, 64, 1])},
                                  'pooling_data': {'shape': int64_array([1, 1, 1, 1])},
-                                 'reshape_2_const': {'value': int64_array([1, 1, 1]), 'shape': int64_array([3])},
-                                 'reshape_2_const_data': {'value': int64_array([1, 1, 1]), 'shape': int64_array([3])},
+                                 'reshape_2_const': {'value': int64_array([0, 1, 1]), 'shape': int64_array([3])},
+                                 'reshape_2_const_data': {'value': int64_array([0, 1, 1]), 'shape': int64_array([3])},
                                  'reshape_2_data': {'shape': int64_array([1, 1, 1])},
                                  'power': {'scale': 64.0},
                                  'power_data': {'shape': int64_array([1, 1, 1])},
index e0a0856..add5033 100644 (file)
@@ -27,6 +27,9 @@ from mo.ops.reshape import Reshape
 
 
 # Temporary nGraph workaround. TODO: REMOVE
+from mo.ops.unsqueeze import Unsqueeze
+
+
 class ScalarNormalize(BackReplacementPattern):
     enabled = True
     graph_condition = [lambda graph: graph.graph['cmd_params'].generate_experimental_IR_V10]
@@ -73,6 +76,7 @@ class ScalarNormalizeForSpecificOps(BackReplacementPattern):
                  'Unsqueeze': [1],
                  'Squeeze': [1],
                  'Eltwise': [1],
+                 'Range': [0, 1, 2],
                  }
         for node in graph.get_op_nodes():
             if node.has_and_set('type') and node.type in rules:
@@ -88,3 +92,33 @@ class ScalarNormalizeForSpecificOps(BackReplacementPattern):
                             src_node.out_port(0).connect(reshape.in_port(0))
                             reshape.infer(reshape)
         graph.strict_mode = True
+
+
+class RangeInputNormalize(BackReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: not graph.graph['cmd_params'].generate_experimental_IR_V10]
+    force_clean_up = True
+
+    def run_after(self):
+        return [ScalarNormalizeForSpecificOps]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        graph.strict_mode = False
+        # key is the type of the operation. The value is list of ports to convert from 0D to 1D
+        rules = {
+                 'Range': [0, 1, 2],
+                 }
+        for node in graph.get_op_nodes():
+            if node.has_and_set('type') and node.type in rules:
+                for port in rules[node.type]:
+                    if port in node.in_ports() and not node.in_port(port).disconnected():
+                        src_node = node.in_port(port).get_connection().get_source().node
+                        shape = node.in_port(port).data.get_shape()
+                        assert shape is not None
+                        if shape is not None and shape.size == 0:
+                            reshape = create_op_node_with_second_input(graph, Unsqueeze, int64_array([0]),
+                                                                       {'name': src_node.id + '/Dims'})
+                            src_node.out_port(0).get_connection().set_source(reshape.out_port(0))
+                            src_node.out_port(0).connect(reshape.in_port(0))
+                            reshape.infer(reshape)
+        graph.strict_mode = True
\ No newline at end of file
index 1792ff8..f871d1e 100644 (file)
@@ -124,6 +124,10 @@ class DeconvFrontExtractor(FrontExtractorOp):
             'get_pad': DeconvFrontExtractor.get_pad,
         }
 
+        output_padding = attr.tuple("adj", int, None)
+        if target_shape is None and output_padding:
+            node_attrs["output_padding"] = np.array([0, 0, *[s for s in output_padding]], dtype=np.int64)
+
         # update the attributes of the node
         Convolution.update_node_stat(node, node_attrs)
         return __class__.enabled
index 2a75fce..35eb0fe 100644 (file)
@@ -150,3 +150,70 @@ class TestDeconvShapesParsing(unittest.TestCase):
                 np.testing.assert_equal(node[key], exp_res[key])
             else:
                 self.assertEqual(node[key], exp_res[key])
+
+    def test_deconv_ext_output_pad(self):
+        params = {'attrs': {
+            "kernel": "(4, 4)",
+            "no_bias": "True",
+            "num_filter": "21",
+            "num_group": "14",
+            "pad": "(4, 4)",
+            "stride": "(2, 2)",
+            "dilate": "(3, 3)",
+            "workspace": "1536",
+            "adj": "(1, 1)"
+        }}
+        node = PB({'symbol_dict': params})
+        DeconvFrontExtractor.extract(node)
+        exp_res = {
+            'op': 'Deconvolution',
+            'pad': np.array([[0, 0], [0, 0], [4, 4], [4, 4]]),
+            'pad_spatial_shape': np.array([[4, 4], [4, 4]]),
+            'stride': np.array([1, 1, 2, 2]),
+            'kernel_spatial': np.array([4, 4]),
+            'dilation': np.array([1, 1, 3, 3]),
+            'group': 14,
+            'output': 21,
+            'bias_addable': True,
+            'bias_term': False,
+            'output_padding': np.array([0, 0, 1, 1]),
+        }
+        for key in exp_res.keys():
+            if key in ('pad', 'pad_spatial_shape', 'stride', 'kernel_spatial', 'dilation', 'output_spatial_shape', 'output_padding'):
+                np.testing.assert_equal(node[key], exp_res[key])
+            else:
+                self.assertEqual(node[key], exp_res[key])
+
+    def test_deconv_ext_target_shape_with_output_pad(self):
+        params = {'attrs': {
+            "kernel": "(4, 4)",
+            "no_bias": "True",
+            "num_filter": "21",
+            "num_group": "14",
+            "pad": "(4, 4)",
+            "stride": "(2, 2)",
+            "dilate": "(3, 3)",
+            "workspace": "1536",
+            "target_shape": "(120, 120)",
+            "adj": "(1, 1)"
+        }}
+        node = PB({'symbol_dict': params})
+        DeconvFrontExtractor.extract(node)
+        exp_res = {
+            'op': 'Deconvolution',
+            'pad': np.array([[0, 0], [0, 0], [4, 4], [4, 4]]),
+            'pad_spatial_shape': np.array([[4, 4], [4, 4]]),
+            'stride': np.array([1, 1, 2, 2]),
+            'kernel_spatial': np.array([4, 4]),
+            'dilation': np.array([1, 1, 3, 3]),
+            'group': 14,
+            'output': 21,
+            'bias_addable': True,
+            'bias_term': False,
+            'output_spatial_shape': np.array([120, 120]),
+        }
+        for key in exp_res.keys():
+            if key in ('pad', 'pad_spatial_shape', 'stride', 'kernel_spatial', 'dilation', 'output_spatial_shape'):
+                np.testing.assert_equal(node[key], exp_res[key])
+            else:
+                self.assertEqual(node[key], exp_res[key])
index 3a68da7..03475c6 100644 (file)
@@ -15,7 +15,7 @@
 """
 import numpy as np
 
-from extensions.ops.elementwise import Mul, Sub, Add, Maximum, Minimum
+from extensions.ops.elementwise import Mul, Sub, Add, Maximum, Minimum, Div, Greater, GreaterEqual, Equal, Less, LessEqual, Pow, NotEqual, LogicalAnd, LogicalOr
 from mo.front.extractor import FrontExtractorOp
 from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
 from mo.graph.graph import Node
@@ -43,6 +43,26 @@ class BroadcastAddFrontExtractor(FrontExtractorOp):
         return __class__.enabled
 
 
+class BroadcastDivFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_div'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Div.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastSubFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_sub'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Sub.update_node_stat(node)
+        return __class__.enabled
+
+
 class ElementwiseAddExtractor(FrontExtractorOp):
     op = 'elemwise_add'
     enabled = True
@@ -103,6 +123,126 @@ class ElemwiseSubFrontExtractor(FrontExtractorOp):
         return __class__.enabled
 
 
+class ElemwiseDivFrontExtractor(FrontExtractorOp):
+    op = 'elemwise_div'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Div.update_node_stat(node, {})
+        return __class__.enabled
+
+
+class BroadcastMaximumFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_maximum'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Maximum.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastMinimumFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_minimum'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Minimum.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastGreaterFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_greater'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Greater.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastGreaterEqualFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_greater_equal'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        GreaterEqual.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastEqualFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_equal'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Equal.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastNotEqualFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_not_equal'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        NotEqual.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastLesserFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_lesser'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Less.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastLesserEqualFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_lesser_equal'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        LessEqual.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastPowerFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_power'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Pow.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastLogicalAndFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_logical_and'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        LogicalAnd.update_node_stat(node)
+        return __class__.enabled
+
+
+class BroadcastLogicalOrFrontExtractor(FrontExtractorOp):
+    op = 'broadcast_logical_or'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        LogicalOr.update_node_stat(node)
+        return __class__.enabled
+
+
 class MaximumFrontExtractor(FrontExtractorOp):
     op = '_maximum'
     enabled = True
@@ -178,6 +318,83 @@ class GreaterScalarFrontExtractor(FrontExtractorOp):
         return __class__.enabled
 
 
+class GreaterEqualScalarFrontExtractor(FrontExtractorOp):
+    op = '_greater_equal_scalar'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        node['scalar'] = np.array([attrs.float('scalar', 1.0)])
+        return __class__.enabled
+
+
+class EqualScalarFrontExtractor(FrontExtractorOp):
+    op = '_equal_scalar'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        node['scalar'] = np.array([attrs.float('scalar', 1.0)])
+        return __class__.enabled
+
+
+class NotEqualScalarFrontExtractor(FrontExtractorOp):
+    op = '_not_equal_scalar'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        node['scalar'] = np.array([attrs.float('scalar', 1.0)])
+        return __class__.enabled
+
+
+class LesserScalarFrontExtractor(FrontExtractorOp):
+    op = '_lesser_scalar'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        node['scalar'] = np.array([attrs.float('scalar', 1.0)])
+        return __class__.enabled
+
+
+class LesserEqualScalarFrontExtractor(FrontExtractorOp):
+    op = '_lesser_equal_scalar'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        node['scalar'] = np.array([attrs.float('scalar', 1.0)])
+        return __class__.enabled
+
+
+class MinimumScalarFrontExtractor(FrontExtractorOp):
+    op = '_minimum_scalar'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        node['scalar'] = attrs.float('scalar', 1.0)
+        return __class__.enabled
+
+
+class MaximumScalarFrontExtractor(FrontExtractorOp):
+    op = '_maximum_scalar'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        node['scalar'] = attrs.float('scalar', 1.0)
+        return __class__.enabled
+
+
 class ZerosFrontExtractor(FrontExtractorOp):
     op = 'zeros_like'
     enabled = True
index 983845c..6f44354 100644 (file)
@@ -14,7 +14,7 @@
  limitations under the License.
 """
 
-from extensions.ops.elementwise import Div, Greater, Sub, Mul, Add
+from extensions.ops.elementwise import Div, Greater, GreaterEqual, Equal, NotEqual, Sub, Mul, Add, Less, LessEqual, Minimum, Maximum
 from mo.front.common.replacement import FrontReplacementOp
 from mo.front.mxnet.extractors.utils import scalar_ops_replacer
 from mo.graph.graph import Node, Graph
@@ -42,6 +42,51 @@ class GreaterScalarFrontReplacer(FrontReplacementOp):
         return [greater_node.id]
 
 
+class GreaterEqualScalarFrontReplacer(FrontReplacementOp):
+    op = '_greater_equal_scalar'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        greater_node = scalar_ops_replacer(graph, node, GreaterEqual)
+        return [greater_node.id]
+
+
+class EqualScalarFrontReplacer(FrontReplacementOp):
+    op = '_equal_scalar'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        equal_scalar_node = scalar_ops_replacer(graph, node, Equal)
+        return [equal_scalar_node.id]
+
+
+class NotEqualScalarFrontReplacer(FrontReplacementOp):
+    op = '_not_equal_scalar'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        not_equal_scalar_node = scalar_ops_replacer(graph, node, NotEqual)
+        return [not_equal_scalar_node.id]
+
+
+class LesserScalarFrontReplacer(FrontReplacementOp):
+    op = '_lesser_scalar'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        lesser_scalar_node = scalar_ops_replacer(graph, node, Less)
+        return [lesser_scalar_node.id]
+
+
+class LesserEqualScalarFrontReplacer(FrontReplacementOp):
+    op = '_lesser_equal_scalar'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        lesser_equal_scalar_node = scalar_ops_replacer(graph, node, LessEqual)
+        return [lesser_equal_scalar_node.id]
+
+
 class MinusScalarFrontReplacer(FrontReplacementOp):
     op = '_minus_scalar'
     enabled = True
@@ -71,3 +116,21 @@ class PlusScalarFrontReplacer(FrontReplacementOp):
     def replace_op(self, graph: Graph, node: Node):
         add_node = scalar_ops_replacer(graph, node, Add)
         return [add_node.id]
+
+
+class MinimumScalarFrontReplacer(FrontReplacementOp):
+    op = '_minimum_scalar'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        minimum_scalar_node = scalar_ops_replacer(graph, node, Minimum)
+        return [minimum_scalar_node.id]
+
+
+class MaximumScalarFrontReplacer(FrontReplacementOp):
+    op = '_maximum_scalar'
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        maximum_scalar_node = scalar_ops_replacer(graph, node, Maximum)
+        return [maximum_scalar_node.id]
diff --git a/model-optimizer/extensions/front/mxnet/expand_dims_ext.py b/model-optimizer/extensions/front/mxnet/expand_dims_ext.py
new file mode 100644 (file)
index 0000000..ae86c2d
--- /dev/null
@@ -0,0 +1,31 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
+from mo.ops.expand_dims import ExpandDims
+
+
+class ExpandDimsExtractor(FrontExtractorOp):
+    op = 'expand_dims'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = get_mxnet_layer_attrs(node.symbol_dict)
+        expand_axis = attrs.int('axis', None)
+        ExpandDims.update_node_stat(node, {'expand_axis': expand_axis})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/constant_of_shape_ext.py b/model-optimizer/extensions/front/onnx/constant_of_shape_ext.py
new file mode 100644 (file)
index 0000000..62ced0a
--- /dev/null
@@ -0,0 +1,33 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+from onnx import numpy_helper
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+from mo.ops.constant_of_shape import ConstantOfShape
+
+
+class ConstantOfShapeExtractor(FrontExtractorOp):
+    op = 'ConstantOfShape'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        fill_value = onnx_attr(node, 'value', 't', default=np.array([0.0]), dst_type=lambda x: numpy_helper.to_array(x))
+
+        ConstantOfShape.update_node_stat(node, {'fill_value': fill_value})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/constant_of_shape_to_broadcast.py b/model-optimizer/extensions/front/onnx/constant_of_shape_to_broadcast.py
new file mode 100644 (file)
index 0000000..81e607e
--- /dev/null
@@ -0,0 +1,41 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
+from mo.ops.broadcast import Broadcast
+from mo.ops.const import Const
+
+
+class ConstantOfShapeToBroadcast(FrontReplacementPattern):
+    """
+    Converts the 'ConstantOfShape' layer to 'Broadcast'.
+
+    The 'ConstantOfShape' has one 1D input defining the output constant shape. The value to be filled is defined by the
+    'value' attribute. The transformation creates constant node with value equal to 'value' attribute and connects it to
+    the first input of a newly created 'Broadcast' node which defines value to broadcast. Then the input of the
+    'ConstantOfShape' is connected to the second input of the 'Broadcast'.
+    """
+    enabled = True
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for const_of_shape_node in graph.get_op_nodes(op='ConstantOfShape'):
+            broadcast_node = Broadcast(graph, {'name': const_of_shape_node.name + '/Broadcast'}).create_node()
+            const_of_shape_node.in_port(0).get_connection().set_destination(broadcast_node.in_port(1))
+            broadcast_node.in_port(0).connect(Const(graph, {'name': broadcast_node.name + '/FillValue',
+                                                            'value': const_of_shape_node.fill_value}
+                                                    ).create_node().out_port(0))
+            const_of_shape_node.out_port(0).get_connection().set_source(broadcast_node.out_port(0))
index d33e4a5..09fad5a 100644 (file)
@@ -15,7 +15,7 @@
 """
 import numpy as np
 
-from extensions.ops.elementwise import Add, Mul, Pow
+from extensions.ops.elementwise import Add, Mul, Pow, Less, Equal, Greater, LogicalAnd, LogicalOr
 from mo.front.extractor import FrontExtractorOp
 from mo.front.onnx.extractors.utils import onnx_attr
 from mo.graph.graph import Node
@@ -76,6 +76,16 @@ class NegFrontExtractor(FrontExtractorOp):
         return __class__.enabled
 
 
+class SqrtExtractor(FrontExtractorOp):
+    op = 'Sqrt'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Power.update_node_stat(node, {'power': 0.5})
+        return __class__.enabled
+
+
 class ScaleFrontExtractor(FrontExtractorOp):
     op = 'Scale'
     enabled = True
@@ -95,3 +105,53 @@ class MaxExtractor(FrontExtractorOp):
     def extract(node: Node):
         EltwiseNMax.update_node_stat(node)
         return __class__.enabled
+
+
+class EqualExtractor(FrontExtractorOp):
+    op = 'Equal'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Equal.update_node_stat(node)
+        return __class__.enabled
+
+
+class LessExtractor(FrontExtractorOp):
+    op = 'Less'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Less.update_node_stat(node)
+        return __class__.enabled
+
+
+class GreaterExtractor(FrontExtractorOp):
+    op = 'Greater'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Greater.update_node_stat(node)
+        return __class__.enabled
+
+
+class AndExtractor(FrontExtractorOp):
+    op = 'And'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        LogicalAnd.update_node_stat(node)
+        return __class__.enabled
+
+
+class OrExtractor(FrontExtractorOp):
+    op = 'Or'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        LogicalOr.update_node_stat(node)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/expand_ext.py b/model-optimizer/extensions/front/onnx/expand_ext.py
new file mode 100644 (file)
index 0000000..e0db7aa
--- /dev/null
@@ -0,0 +1,28 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.ops.broadcast import Broadcast
+
+
+class ExpandExtractor(FrontExtractorOp):
+    op = 'Expand'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Broadcast.update_node_stat(node)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/floor_ext.py b/model-optimizer/extensions/front/onnx/floor_ext.py
new file mode 100644 (file)
index 0000000..5ce07de
--- /dev/null
@@ -0,0 +1,28 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.activation_ops import Floor
+from mo.front.extractor import FrontExtractorOp
+
+
+class FloorExtractor(FrontExtractorOp):
+    op = 'Floor'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Floor.update_node_stat(node)
+        return __class__.enabled
@@ -1,5 +1,5 @@
 """
- Copyright (C) 2018-2019 Intel Corporation
+ Copyright (c) 2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  limitations under the License.
 """
 
-from .benchmark import main
+from extensions.ops.activation_ops import Not
+from mo.front.extractor import FrontExtractorOp
+
+
+class NotExtractor(FrontExtractorOp):
+    op = 'Not'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Not.update_node_stat(node)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/reduce_min_ext.py b/model-optimizer/extensions/front/onnx/reduce_min_ext.py
new file mode 100644 (file)
index 0000000..bf5bbe9
--- /dev/null
@@ -0,0 +1,33 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.ReduceOps import ReduceMin
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+from mo.graph.graph import Node
+
+
+class ReduceMinFrontExtractor(FrontExtractorOp):
+    op = 'ReduceMin'
+    enabled = True
+
+    @staticmethod
+    def extract(node: Node):
+        axis = onnx_attr(node, 'axes', 'ints', default=None, dst_type=lambda x: int64_array(x))
+        keep_dims = onnx_attr(node, 'keepdims', 'i', default=True)
+        ReduceMin.update_node_stat(node, {'axis': axis, 'keep_dims': keep_dims})
+        return __class__.enabled
index 93affa0..8d67c1b 100644 (file)
 """
 
 import numpy as np
-import logging as log
 
-from mo.ops.op import Op
-from mo.graph.graph import Node
 from mo.front.extractor import FrontExtractorOp
 from mo.front.onnx.extractors.utils import onnx_attr
 from mo.ops.slice import Slice
@@ -38,6 +35,7 @@ class SliceFrontExtractor(FrontExtractorOp):
             'axis': axis if len(axis) != 0 else None,
             'start': start if len(start) != 0 else None,
             'end': end if len(end) != 0 else None,
+            'format': 'onnx'
         }
 
         # update the attributes of the node
diff --git a/model-optimizer/extensions/front/onnx/top_k_ext.py b/model-optimizer/extensions/front/onnx/top_k_ext.py
new file mode 100644 (file)
index 0000000..20688b7
--- /dev/null
@@ -0,0 +1,30 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.topk import TopK
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class TopKExtractor(FrontExtractorOp):
+    op = 'TopK'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        axis = onnx_attr(node, 'axis', 'i', default=-1)
+        TopK.update_node_stat(node, {'axis': axis, 'sort': 'value'})
+        return __class__.enabled
index 739f9e0..0ad8b57 100644 (file)
@@ -45,6 +45,8 @@ class ReduceAxisNormalizer(FrontReplacementSubgraph):
         node = match['reduce']
         connected_in_ports = [port for port in node.in_ports().values() if not port.disconnected()]
         if len(connected_in_ports) == 1:
+            # if the 'axis' is None then we still add a second input to the layer with a 1D array with 1 element equal
+            # to None. The infer function handles this case because the input shape is known at this stage only
             if node.has('axis'):
                 const = Const(graph, {'value': node.axis}).create_node()
                 node.add_input_port(1, skip_if_exist=True)
diff --git a/model-optimizer/extensions/front/tf/BatchToSpaceNDToUpsample.py b/model-optimizer/extensions/front/tf/BatchToSpaceNDToUpsample.py
new file mode 100644 (file)
index 0000000..4a96c57
--- /dev/null
@@ -0,0 +1,108 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import logging as log
+
+import numpy as np
+
+from extensions.ops.upsample import UpsampleOp
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph, Node
+
+
+class BatchToSpaceNDToUpsample(FrontReplacementSubgraph):
+    """
+    The transformation looks for pattern that performs NX upscale of the input image specified in the NHWC layout.
+    """
+    enabled = True
+
+    @staticmethod
+    def pattern(**kwargs):
+        return dict(
+            nodes=[
+                ('transpose', dict(op='Transpose')),
+                ('expand_dims', dict(op='Unsqueeze')),
+                ('tile', dict(op='Tile')),
+                ('batch_to_space_nd', dict(op='BatchToSpaceND')),
+                ('strided_slice', dict(op='StridedSlice')),
+                ('transpose_back', dict(op='Transpose')),
+            ],
+            edges=[
+                ('transpose', 'expand_dims', {'out': 0}),
+                ('expand_dims', 'tile', {'out': 0}),
+                ('tile', 'batch_to_space_nd', {'out': 0}),
+                ('batch_to_space_nd', 'strided_slice', {'out': 0}),
+                ('strided_slice', 'transpose_back', {'out': 0})
+            ]
+        )
+
+    @staticmethod
+    def replace_sub_graph(graph: Graph, match: dict, **kwargs):
+        def _input_node_value(node: Node, port_ind: int):
+            input_node = node.in_port(port_ind).get_source().node
+            return input_node.value if input_node.op == 'Const' else None
+
+        transpose = match['transpose']
+        transpose_order = _input_node_value(transpose, 1)
+        if transpose_order is None or not np.all(np.equal(transpose_order, int64_array([1, 2, 3, 0]))):
+            log.debug('The transpose order {} for node {} is not equal to [1, 2, 3, 0]. Cannot apply '
+                      'BatchToSpaceNDToUpsample transformation.'.format(transpose_order, transpose.name))
+            return
+
+        expand_axis = match['expand_dims']
+        expand_axis_value = _input_node_value(expand_axis, 1)
+        if expand_axis_value != 0:
+            log.debug('The expand axis {} for node {} is not equal to 0. Cannot apply BatchToSpaceNDToUpsample '
+                      'transformation.'.format(expand_axis_value, expand_axis.name))
+            return
+
+        tile = match['tile']
+        tile_value = _input_node_value(tile, 1)
+        if tile_value is None:
+            log.debug('The tile value is not defined for node {}. Cannot apply BatchToSpaceNDToUpsample '
+                      'transformation.'.format(tile.name))
+            return
+
+        if len(np.where(tile_value != 1)) != 1:
+            log.debug('The number of tiles not equal to 1 not equal to 1. Cannot apply BatchToSpaceNDToUpsample '
+                      'transformation.')
+            return
+        tile_batch = tile_value[0]
+
+        batch_to_space_nd = match['batch_to_space_nd']
+        block_shape = _input_node_value(batch_to_space_nd, 1)
+        if block_shape is None or tile_batch != np.prod(block_shape):
+            log.debug('The block shape {} for node {} is not defined or inconsistent with the tile size. Cannot apply '
+                      'BatchToSpaceNDToUpsample transformation.'.format(block_shape, batch_to_space_nd.name))
+            return
+        if len(block_shape) != 2:
+            log.debug('The block shape len is not equal to 2 for node {}. Cannot apply BatchToSpaceNDToUpsample '
+                      'transformation.'.format(batch_to_space_nd.name))
+            return
+
+        transpose_back = match['transpose_back']
+        transpose_back_order = _input_node_value(transpose_back, 1)
+        if transpose_back_order is None or not np.all(np.equal(transpose_back_order, int64_array([3, 0, 1, 2]))):
+            log.debug('The transpose order {} for node {} is not equal to [3, 0, 1, 2]. Cannot apply '
+                      'BatchToSpaceNDToUpsample transformation.'.format(transpose_back_order, transpose_back.name))
+            return
+
+        upsample_node = UpsampleOp(graph, {'height_scale': block_shape[0], 'width_scale': block_shape[1],
+                                           'mode': 'nearest',
+                                           'name': transpose.name + '/upsample'}).create_node()
+
+        match['transpose'].in_port(0).get_connection().set_destination(upsample_node.in_port(0))
+        match['transpose_back'].out_port(0).get_connection().set_source(upsample_node.out_port(0))
diff --git a/model-optimizer/extensions/front/tf/InterpolateTransposes.py b/model-optimizer/extensions/front/tf/InterpolateTransposes.py
new file mode 100644 (file)
index 0000000..b764eac
--- /dev/null
@@ -0,0 +1,58 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
+from mo.graph.graph import Graph, Node
+from mo.middle.pattern_match import find_pattern_matches, inverse_dict
+
+
+class InterpolateTranspose(FrontReplacementFromConfigFileGeneral):
+    """
+    Delete useless transposes around ResizeNearestNeighbor op. In TF this op is working in NHWC layout,
+    Resample in OpenVINO working in NCHW layout. If all graph has NCHW layout we should delete transposes around
+    Resample: (NCHW->NHWC) -> Resample -> (NHWC -> NCHW) to run this op in NCHW without changes of layout.
+    """
+    enabled = True
+    replacement_id = 'InterpolateTranspose'
+
+    pattern_nodes = [
+        ('interpolate', {'kind': 'op', 'op': 'Interpolate'}),
+        ('transpose_1', {'kind': 'op', 'op': 'Transpose'}),
+        ('transpose_2', {'kind': 'op', 'op': 'Transpose'}),
+    ]
+    pattern_edges = [
+        ('transpose_1', 'interpolate'),
+        ('interpolate', 'transpose_2'),
+    ]
+
+    def transform_graph(self, graph: Graph, replacement_descriptions: dict):
+        matches = find_pattern_matches(graph, self.pattern_nodes, self.pattern_edges)
+        for match in list(matches):
+            inverse_match = inverse_dict(match)
+            interpolate = Node(graph, inverse_match['interpolate'])
+            transpose_1 = Node(graph, inverse_match['transpose_1'])
+            transpose_2 = Node(graph, inverse_match['transpose_2'])
+
+            # Check for data layout and transposes orders
+            if graph.graph['layout'] != 'NCHW' or np.array_equal(transpose_1.in_port(1).data.get_value(), [0, 2, 3, 1]) or \
+                                                  np.array_equal(transpose_2.in_port(1).data.get_value(), [0, 3, 1, 2]):
+                return
+
+            transpose_1.in_port(0).get_connection().set_destination(interpolate.in_port(0))
+            transpose_2.out_port(0).get_connection().set_source(interpolate.out_port(0))
+
+            graph.remove_nodes_from([transpose_1.id, transpose_2.id])
index c225868..b8916e9 100644 (file)
@@ -160,7 +160,7 @@ def _relax_reshape_nodes(graph: Graph, pipeline_config: PipelineConfig):
     for ssd_head_ind in range(num_layers):
         input_node = _find_ssd_head_node(graph, ssd_head_ind, 'box')
         assert (input_node is not None)
-        old_reshape_node = _skip_node_of_type(input_node.out_node(), ['Identity'])
+        old_reshape_node = _skip_node_of_type(input_node.out_node(), ['Identity', 'FakeQuantWithMinMaxVars'])
         assert old_reshape_node.op == 'Reshape'
         reshape_size_node = Const(graph, {'value': int64_array([0, -1, 1, 4])}).create_node([])
         new_reshape_op = Reshape(graph, {'name': input_node.id + '/Reshape'})
@@ -170,7 +170,7 @@ def _relax_reshape_nodes(graph: Graph, pipeline_config: PipelineConfig):
         # fix hard-coded value for the number of items in tensor produced by the convolution to make topology reshapable
         input_node = _find_ssd_head_node(graph, ssd_head_ind, 'class')
         assert (input_node is not None)
-        old_reshape_node = _skip_node_of_type(input_node.out_node(), ['Identity'])
+        old_reshape_node = _skip_node_of_type(input_node.out_node(), ['Identity', 'FakeQuantWithMinMaxVars'])
         assert old_reshape_node.op == 'Reshape'
         reshape_size_node_2 = Const(graph, {'value': int64_array([0, -1, num_classes + 1])}).create_node([])
         new_reshape_op_2 = Reshape(graph, {'name': input_node.id + '/Reshape'})
@@ -191,6 +191,9 @@ def _create_prior_boxes_node(graph: Graph, pipeline_config: PipelineConfig):
     max_scale = pipeline_config.get_param('ssd_anchor_generator_max_scale')
     num_layers = pipeline_config.get_param('ssd_anchor_generator_num_layers')
     aspect_ratios = pipeline_config.get_param('ssd_anchor_generator_aspect_ratios')
+    if not isinstance(aspect_ratios, list):
+        aspect_ratios = [aspect_ratios]
+
     # prior boxes have to be generated using the image size used for training
     image_height = pipeline_config.get_param('resizer_image_height')
     image_width = pipeline_config.get_param('resizer_image_width')
@@ -203,7 +206,11 @@ def _create_prior_boxes_node(graph: Graph, pipeline_config: PipelineConfig):
     if pipeline_config.get_param('ssd_anchor_generator_reduce_lowest') is not None:
         reduce_boxes_in_lowest_layer = pipeline_config.get_param('ssd_anchor_generator_reduce_lowest')
 
-    scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) for i in range(num_layers)] + [1.0]
+    if pipeline_config.get_param('ssd_anchor_generator_scales') is not None:
+        scales = pipeline_config.get_param('ssd_anchor_generator_scales') + [1.0]
+    else:
+        scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) for i in range(num_layers)] + [1.0]
+
     prior_box_nodes = []
     for ssd_head_ind in range(num_layers):
         ssd_head_node = _find_ssd_head_node(graph, ssd_head_ind, 'box')
@@ -216,8 +223,10 @@ def _create_prior_boxes_node(graph: Graph, pipeline_config: PipelineConfig):
             widths = [scales[ssd_head_ind] * sqrt(ar) for ar in aspect_ratios]
             heights = [scales[ssd_head_ind] / sqrt(ar) for ar in aspect_ratios]
 
-            widths += [sqrt(scales[ssd_head_ind] * scales[ssd_head_ind + 1])]
-            heights += [sqrt(scales[ssd_head_ind] * scales[ssd_head_ind + 1])]
+            interpolated_scale_ar = pipeline_config.get_param('ssd_anchor_generator_interpolated_scale_aspect_ratio')
+            if interpolated_scale_ar > 0.0:
+                widths += [sqrt(scales[ssd_head_ind] * scales[ssd_head_ind + 1]) * interpolated_scale_ar]
+                heights += [sqrt(scales[ssd_head_ind] * scales[ssd_head_ind + 1]) / interpolated_scale_ar]
         widths = [w * image_width * base_anchor_size[1] for w in widths]
         heights = [h * image_height * base_anchor_size[0] for h in heights]
 
@@ -944,8 +953,11 @@ class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFi
                                                              {'name': 'do_reshape_conf'}, activation_conf_node)
         mark_as_correct_data_layout(reshape_conf_node)
 
-        if pipeline_config.get_param('ssd_anchor_generator_num_layers') is not None or \
-                pipeline_config.get_param('multiscale_anchor_generator_min_level') is not None:
+        custom_attributes = match.custom_replacement_desc.custom_attributes
+        if ('disable_prior_boxes_layers_generator' not in custom_attributes or
+            not custom_attributes['disable_prior_boxes_layers_generator']) and \
+            (pipeline_config.get_param('ssd_anchor_generator_num_layers') is not None or
+                pipeline_config.get_param('multiscale_anchor_generator_min_level') is not None):
             # change the Reshape operations with hardcoded number of output elements of the convolution nodes to be
             # reshapable
             _relax_reshape_nodes(graph, pipeline_config)
index e9f42ce..04a618d 100644 (file)
@@ -16,7 +16,7 @@
 import logging as log
 
 from extensions.ops.elementwise import Add, Mul, Sub, Div, Maximum, Minimum, Pow, LogicalAnd, LogicalOr, Equal, \
-    GreaterEqual, Greater, Less, LessEqual, NotEqual
+    GreaterEqual, Greater, Less, LessEqual, NotEqual, BiasAdd
 from mo.front.extractor import FrontExtractorOp
 from mo.front.tf.extractors.utils import tf_dtype_extractor
 from mo.ops.eltwise_n import EltwiseNAdd
@@ -33,6 +33,16 @@ class AddExtractor(FrontExtractorOp):
         return __class__.enabled
 
 
+class AddV2Extractor(FrontExtractorOp):
+    op = 'AddV2'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Add.update_node_stat(node, {'data_type': tf_dtype_extractor(node.pb.attr["T"].type)})
+        return __class__.enabled
+
+
 class AddNExtractor(FrontExtractorOp):
     op = 'AddN'
     enabled = True
@@ -49,13 +59,9 @@ class BiasAddExtractor(FrontExtractorOp):
 
     @staticmethod
     def extract(node):
-        data_format = node.pb.attr['data_format'].s.decode("utf-8")
-        if data_format == "NHWC":
-            Add.update_node_stat(node, {'data_type': tf_dtype_extractor(node.pb.attr["T"].type)})
-            return __class__.enabled
-        else:
-            log.error('BiasAdd operation has unsupported `data_format`={}'.format(data_format))
-            return False
+        BiasAdd.update_node_stat(node, {'data_type': tf_dtype_extractor(node.pb.attr["T"].type),
+                                        'data_format': node.pb.attr["data_format"].s.decode()})
+        return __class__.enabled
 
 
 class MulExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/tf/sparse_fill_empty_rows_ext.py b/model-optimizer/extensions/front/tf/sparse_fill_empty_rows_ext.py
new file mode 100644 (file)
index 0000000..c0038c2
--- /dev/null
@@ -0,0 +1,33 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.sparse_fill_empty_rows import SparseFillEmptyRows
+from mo.front.extractor import FrontExtractorOp
+
+
+class SparseFillEmptyRowsFrontExtractor(FrontExtractorOp):
+    op = 'SparseFillEmptyRows'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = {}
+
+        SparseFillEmptyRows.update_node_stat(node, attrs)
+
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/swish.py b/model-optimizer/extensions/front/tf/swish.py
new file mode 100644 (file)
index 0000000..eb27db6
--- /dev/null
@@ -0,0 +1,37 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.activation_ops import Sigmoid
+from extensions.ops.elementwise import Mul
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Node, Graph
+
+
+class Swish(FrontReplacementOp):
+    op = "swish_f32"
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        mul_node = Mul(graph, {'name': node.name + '/mul_'}).create_node()
+        sigmoid_node = Sigmoid(graph, {'name': node.name + '/sigmoid_'}).create_node()
+
+        # Connect nodes
+        node.in_port(0).get_connection().get_source().connect(mul_node.in_port(0))
+        node.in_port(0).get_connection().get_source().connect(sigmoid_node.in_port(0))
+        sigmoid_node.out_port(0).connect(mul_node.in_port(1))
+
+        # The "explicit" version of the return value is: [(out_node.id, 0)])
+        return [mul_node.id]
diff --git a/model-optimizer/extensions/front/tf/swish_test.py b/model-optimizer/extensions/front/tf/swish_test.py
new file mode 100644 (file)
index 0000000..bd52635
--- /dev/null
@@ -0,0 +1,56 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.front.tf.swish import Swish
+from mo.utils.unittest.graph import build_graph, compare_graphs
+
+nodes_attributes = {
+    'placeholder_1': {'shape': np.array([1, 227, 227, 3]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    'placeholder_2': {'shape': np.array([1, 227, 227, 3]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+    # swish operation
+    'swish': {'kind': 'op', 'op': 'swish_f32'},
+    # Test operation
+    'last': {'type': None, 'value': None, 'kind': 'op', 'op': None},
+    # Add and Mul operations
+    'mul': {'type': 'Multiply', 'kind': 'op', 'op': 'Mul'},
+    'sigmoid': {'value': None, 'type': 'Sigmoid', 'kind': 'op', 'op': 'Sigmoid'},
+}
+
+
+class TestSwish(unittest.TestCase):
+    def test_swish_test_1(self):
+        # Test with two different inputs from two placeholders
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'swish'),
+                             ('swish', 'last')
+                             ], nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'sigmoid', {'out': 0}),
+                                 ('placeholder_1', 'mul', {'in': 0, 'out': 0}),
+                                 ('sigmoid', 'mul', {'in': 1}),
+                                 ('mul', 'last'),
+                                 ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+        Swish().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last', check_op_attrs=True)
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/front/tf/unique_ext.py b/model-optimizer/extensions/front/tf/unique_ext.py
new file mode 100644 (file)
index 0000000..0e56287
--- /dev/null
@@ -0,0 +1,39 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.unique import Unique
+from mo.front.extractor import FrontExtractorOp
+
+
+class UniqueFrontExtractor(FrontExtractorOp):
+    op = 'Unique'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        # TensorFlow Unique operation always returns two outputs: unique elements and indices
+        # The unique elements in the output are not sorted
+        attrs = {
+            'sorted': 'false',
+            'return_inverse': 'true',
+            'return_counts': 'false'
+        }
+
+        Unique.update_node_stat(node, attrs)
+
+        return __class__.enabled
diff --git a/model-optimizer/extensions/middle/BiasAddBroadcasting.py b/model-optimizer/extensions/middle/BiasAddBroadcasting.py
new file mode 100644 (file)
index 0000000..8353cd9
--- /dev/null
@@ -0,0 +1,75 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.middle.EltwiseChecker import EltwiseChecker
+from extensions.ops.elementwise import Add
+from mo.front.common.layout import get_features_dim
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.const import Const
+from mo.ops.unsqueeze import Unsqueeze
+import numpy as np
+
+
+class BiasAddInputBroadcasting(MiddleReplacementPattern):
+    """
+    In TF BiasAdd op have 2 inputs: data tensor and bias tensor. Bias always has 1D shape and should be broadcasted
+    to data tensor by features dimension.
+
+    Also replacing BiasAdd by usual Add op after broadcasting.
+    """
+    enabled = True
+    force_shape_inference = True
+
+    def run_before(self):
+        return [EltwiseChecker]
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[
+                ('BiasAdd', dict(kind='op', op='Add', type='BiasAdd'))
+            ],
+            edges=[])
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        bias_add = match['BiasAdd']
+
+        # Replace BiasAdd by Add operation
+        new_add = Add(graph, {'name': bias_add.id + '/Add'}).create_node()
+
+        bias_add.in_port(0).get_connection().set_destination(new_add.in_port(0))
+        bias_add.in_port(1).get_connection().set_destination(new_add.in_port(1))
+        bias_add.out_port(0).get_connection().set_source(new_add.out_port(0))
+
+        if bias_add.data_format != 'NCHW':
+            return
+
+        input_shape = new_add.in_port(0).data.get_shape()
+        bias_shape = new_add.in_port(1).data.get_shape()
+        assert len(bias_shape) == 1
+
+        unsqueeze_dims = np.arange(len(input_shape))
+        channel_dim = get_features_dim('NCHW', len(input_shape))
+        unsqueeze_dims = np.delete(unsqueeze_dims, channel_dim, 0)
+
+        unsqueeze_node = Unsqueeze(graph, {'name': new_add.id + '/BiasUnsqueeze'}).create_node()
+        unsqueeze_dims_node = Const(graph, {'name': new_add.id + '/Dims',
+                                            'value': unsqueeze_dims}).create_node()
+        # Reconnecting nodes
+        unsqueeze_node.in_port(1).connect(unsqueeze_dims_node.out_port(0))
+        unsqueeze_node['override_output_shape'] = True
+
+        new_add.in_port(1).get_connection().insert_node(unsqueeze_node)
index ba9f2ed..19a115c 100644 (file)
@@ -32,7 +32,7 @@ class CastToFloatMark(MiddleReplacementPattern):
         from extensions.middle.pass_separator import PreMiddleStart
         return [PreMiddleStart]
 
-    identity_list = [np.float32, np.double, np.int32, np.int64]
+    identity_list = [np.float32, np.double, np.int32, np.int64, np.uint8, np.bool]
 
     def pattern(self):
         return dict(
@@ -41,8 +41,13 @@ class CastToFloatMark(MiddleReplacementPattern):
 
     def replace_pattern(self, graph: Graph, match: dict):
         # resulting network is fully floating point, so casts to float are useless
-        if match['op'].dst_type in [np.int32, np.int64]:
-            log.warning('Deleting Cast node {} to {} from network since Cast operation isn\'t supported yet. Inference results can be'
-                        ' incorrect'.format(match['op'].name, match['op'].dst_type))
+        node = match['op']
+        name = node.soft_get('name', node.id)
+        dst_type = node.dst_type
 
-        match['op']['identity'] = True
+        if node.out_port(0).data.get_value() is None:
+            if dst_type in [np.int32, np.int64]:
+                log.warning('Deleting Cast node {} to {} from network since Cast operation isn\'t supported yet. '
+                            'Inference results can be incorrect'.format(name, dst_type))
+
+            match['op']['identity'] = True
index 520621d..23ce86e 100644 (file)
  limitations under the License.
 """
 
-from copy import deepcopy
-
 import numpy as np
 
 from mo.front.common.layout import get_features_dim, shape_for_layout
-from mo.graph.graph import Node, Graph
-from mo.middle.passes.eliminate import graph_clean_up, graph_clean_up_tf, graph_clean_up_onnx
-from mo.middle.passes.fusing.helpers import get_value_id
-from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.const import Const
 from mo.ops.op import Op
@@ -45,7 +40,7 @@ class Eltwise1DInputReshape(MiddleReplacementPattern):
     change of graph.graph['layout'] may cause an issue
     change in re-layout function: convert_nhwc_to_nchw(graph) may cause an issue
     """
-    enabled = True
+    enabled = False
 
     def run_after(self):
         return [EltwiseInputReshape]
@@ -53,19 +48,20 @@ class Eltwise1DInputReshape(MiddleReplacementPattern):
     def find_and_replace_pattern(self, graph: Graph):
         layout = graph.graph['layout']
         for eltwise_op_node in graph.get_op_nodes(is_eltwise=True):
-            if get_value_id(eltwise_op_node) is None:
-                out_shape = eltwise_op_node.out_node().shape
+                out_shape = eltwise_op_node.out_port().data.get_shape()
                 if 4 <= len(out_shape) <= 5:
                     out_features = out_shape[get_features_dim(layout, len(out_shape))]
                     for port, node in eltwise_op_node.in_nodes().items():
                         if len(node.shape) != len(out_shape) and len(node.shape) == 1 and out_features == node.shape[0]:
-                            in_atts = deepcopy(graph.get_edge_data(node.id, eltwise_op_node.id)[0])
-                            graph.remove_edge(node.id, eltwise_op_node.id)
                             new_shape = shape_for_layout(layout, batch=1, features=out_features, height=1, width=1,
                                                          depth=1 if len(out_shape) == 5 else None)
-                            reshape_data_op = Reshape(graph, attrs={'dim': new_shape, 'name': node.id + '/Broadcast'})
-                            reshape_data_node = reshape_data_op.create_node_with_data([node])
-                            graph.add_edge(reshape_data_node.id, eltwise_op_node.id, **in_atts)
+                            dim_const = Const(graph, {'value': new_shape, 'name': node.id + '/Dim'}).create_node()
+                            reshape_op = Reshape(graph, attrs={'dim': new_shape, 'name': node.id + '/Broadcast'}).create_node()
+
+                            eltwise_op_node.in_port(port).get_source().node.out_port(0).get_connection().set_destination(reshape_op.in_port(0))
+                            reshape_op.in_port(1).connect(dim_const.out_port(0))
+
+                            reshape_op.out_port(0).connect(eltwise_op_node.in_port(port))
 
 
 class EltwiseInputReshape(MiddleReplacementPattern):
diff --git a/model-optimizer/extensions/middle/InsertSelect.py b/model-optimizer/extensions/middle/InsertSelect.py
new file mode 100644 (file)
index 0000000..ece6e98
--- /dev/null
@@ -0,0 +1,146 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.select import Select
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph, Node
+from mo.middle.pattern_match import find_pattern_matches, inverse_dict
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.concat import Concat
+from mo.ops.const import Const
+from mo.ops.crop import Crop
+from mo.ops.memory import Memory
+from mo.ops.result import Result
+from mo.utils.error import Error
+from mo.utils.graph import invert_sub_graph_between_nodes
+
+
+class AddSelectBeforeMemoryNodePattern(MiddleReplacementPattern):
+    """
+    Add Select before saving state with Memory to avoid garbage saving
+    """
+    enabled = False
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[('op', dict(op='Memory', index=0))],
+            edges=[])
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        node = match['op']
+
+        if node.name == 'iteration_number_out':
+            return
+
+        # calculate length of context when state of inference becomes meaningful
+        inputs = []
+        for n in graph.get_op_nodes(**{'op': 'Parameter'}):
+            inputs.append(n)
+
+        in_nodes = []
+        for inp in inputs:
+            for ins in inp.out_port(0).get_destinations():
+                in_nodes.append(ins.node.name)
+
+        context_len = 1
+        try:
+            subgraph = invert_sub_graph_between_nodes(graph, [node.in_port(0).get_source().node.name], in_nodes)
+        except Error:
+            return
+
+        for n in subgraph:
+            n_node = Node(graph, n)
+            if n_node.kind == 'op' and n_node.op == 'Splice':
+                context_len += len(n_node.context) - 1
+
+        if context_len == 1:
+            return
+
+        in_node_port = node.in_port(0).get_source()
+        in_node_shape = node.in_port(0).data.get_shape()
+        node.in_port(0).disconnect()
+
+        # add Select before saving state to avoid saving garbage
+        select_node = Select(graph, {'name': 'select_' + node.name}).create_node()
+        zero_else = Const(graph, {'name': 'zero_else', 'value': np.zeros(in_node_shape)}).create_node()
+        select_node.in_port(1).connect(in_node_port)
+        select_node.in_port(2).connect(zero_else.out_port(0))
+
+        # check if we have already appropriate iteration counter
+        existing_counters = find_pattern_matches(graph, nodes=[('mem_in', dict(op='Memory', index=1,
+                                                                               shape=int64_array([context_len]))),
+                                                               ('mem_in_data', dict()),
+                                                               ('crop_mem_in', dict(op='Crop', axis=int64_array([1]),
+                                                                                    offset=int64_array([1]),
+                                                                                    dim=int64_array([context_len-1]))),
+                                                               ('crop_mem_in_data', dict()),
+                                                               ('concat', dict(op='Concat', axis=1)),
+                                                               ('concat_data', dict()),
+                                                               ('const_1', dict(op='Const')),
+                                                               ('const_1_data', dict()),
+                                                               ('mem_out', dict(op='Memory', index=0,
+                                                                                shape=int64_array([context_len]))),
+                                                               ('crop_out', dict(op='Crop', axis=int64_array([1]),
+                                                                                 offset=int64_array([0]),
+                                                                                 dim=int64_array([1]))),
+                                                               ('crop_out_data', dict()),
+                                                               ('select', dict(op='Select'))
+                                                               ],
+                                                 edges=[('mem_in', 'mem_in_data'), ('mem_in_data', 'crop_mem_in'),
+                                                        ('crop_mem_in', 'crop_mem_in_data'),
+                                                        ('crop_mem_in_data', 'concat', {'in': 0}),
+                                                        ('const_1', 'const_1_data'),
+                                                        ('const_1_data', 'concat', {'in': 1}),
+                                                        ('concat', 'concat_data'), ('concat_data', 'mem_out'),
+                                                        ('concat_data', 'crop_out'), ('crop_out', 'crop_out_data'),
+                                                        ('crop_out_data', 'select')])
+        counter_match = next(existing_counters, None)
+        if counter_match is not None:
+            input_port = Node(graph, inverse_dict(counter_match)['crop_out']).out_port(0)
+        else:
+            mem_out = Memory(graph, {'name': 'iteration_number', 'size': 2,
+                                     'index': 1, 'id': 'iteration_'+node.name,
+                                     'shape': int64_array([context_len]),
+                                     'force_precision': 'I32'}).create_node()
+            cut_first = Crop(graph, {'name': 'cut_first', 'axis': int64_array([1]),
+                                     'offset': int64_array([1]), 'dim': int64_array([context_len-1]),
+                                     'force_precision': 'I32'}).create_node()
+            cut_first.in_port(0).connect(mem_out.out_port(0))
+            ones = Const(graph, {'name': 'ones', 'value': np.ones([1, 1], dtype=np.int64),
+                                 'force_precision': 'I32'}).create_node()
+            concat = Concat(graph, {'name': 'concat_ones', 'in_ports_count': 2, 'axis': 1,
+                                    'force_precision': 'I32'}).create_node()
+            concat.in_port(0).connect(cut_first.out_port(0))
+            concat.in_port(1).connect(ones.out_port(0))
+            mem_in = Memory(graph, {'name': 'iteration_number_out', 'size': 2,
+                                    'index': 0, 'id': 'iteration_'+node.name,
+                                    'shape': int64_array([context_len]),
+                                    'force_precision': 'I32'}).create_node()
+            mem_in.in_port(0).connect(concat.out_port(0))
+            res = Result(graph, {}).create_node()
+            mem_in.out_port(0).connect(res.in_port(0))
+            cut_last = Crop(graph, {'name': 'cut_last', 'axis': int64_array([1]),
+                                    'offset': int64_array([0]), 'dim': int64_array([1]),
+                                    'force_precision': 'I32'}).create_node()
+            cut_last.in_port(0).connect(concat.out_port(0))
+            input_port = cut_last.out_port(0)
+
+        select_node.in_port(0).connect(input_port)
+        select_node.out_port(0).connect(node.in_port(0))
+        select_node.out_port(0).data.set_shape(in_node_shape)
diff --git a/model-optimizer/extensions/middle/InsertSelect_test.py b/model-optimizer/extensions/middle/InsertSelect_test.py
new file mode 100644 (file)
index 0000000..2b0e972
--- /dev/null
@@ -0,0 +1,271 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+import unittest
+
+from extensions.middle.InsertSelect import AddSelectBeforeMemoryNodePattern
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.unittest.graph import build_graph, compare_graphs
+
+
+class InsertSelectTests(unittest.TestCase):
+
+    # graph have no splices - selects should not be inserted
+    def test_insert_select_0(self):
+        graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                             'placeholder_1': {'kind': 'op', 'op': None},
+                             'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                             'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                             },
+                            [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                             ('placeholder_data_1', 'memory')
+                             ],
+                            nodes_with_edges_only=True)
+        AddSelectBeforeMemoryNodePattern().find_and_replace_pattern(graph)
+        ref_graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                                 'placeholder_1': {'kind': 'op', 'op': None},
+                                 'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                                 'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                                 },
+                                [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                                 ('placeholder_data_1', 'memory')
+                                 ],
+                                nodes_with_edges_only=True
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'memory')
+        self.assertTrue(flag, resp)
+
+    # graph contains 1 splice with context length 5, should be inserted select with memory as counter with length 5
+    def test_insert_select_1(self):
+        graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                             'placeholder_1': {'kind': 'op', 'op': None},
+                             'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                             'splice_1': {'kind': 'op', 'op': 'Splice', 'context': np.array([-2, -1, 0, 1, 2])},
+                             'splice_data_1': {'kind': 'data', 'shape': [1, 13]},
+                             'placeholder_2': {'kind': 'op', 'op': None},
+                             'placeholder_data_2': {'kind': 'data', 'shape': [1, 26]},
+                             'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                             },
+                            [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                             ('placeholder_data_1', 'splice_1'), ('splice_1', 'splice_data_1'),
+                             ('splice_data_1', 'placeholder_2'), ('placeholder_2', 'placeholder_data_2'),
+                             ('placeholder_data_2', 'memory')
+                             ],
+                            nodes_with_edges_only=True)
+        AddSelectBeforeMemoryNodePattern().find_and_replace_pattern(graph)
+        ref_graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                                 'placeholder_1': {'kind': 'op', 'op': None},
+                                 'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                                 'splice_1': {'kind': 'op', 'op': 'Splice', 'context': np.array([-2, -1, 0, 1, 2])},
+                                 'splice_data_1': {'kind': 'data', 'shape': [1, 13]},
+                                 'placeholder_2': {'kind': 'op', 'op': None},
+
+                                 'memory_in': {'kind': 'op', 'op': 'Memory', 'shape': int64_array([5])},
+                                 'memory_in_data': {'kind': 'data'},
+                                 'memory_out': {'kind': 'op', 'op': 'Memory', 'shape': int64_array([5])},
+                                 'memory_out_data': {'kind': 'data'},
+                                 'result': {'kind': 'op', 'op': 'Result'},
+                                 'crop_in': {'kind': 'op', 'op': 'Crop', 'axis': 1, 'offset': 1, 'dim': 4},
+                                 'crop_in_data': {'kind': 'data'},
+                                 'crop_out': {'kind': 'op', 'op': 'Crop', 'axis': 1, 'offset': 0, 'dim': 1},
+                                 'crop_out_data': {'kind': 'data'},
+                                 'select': {'kind': 'op', 'op': 'Select'},
+                                 'select_out_data': {'kind': 'data', 'shape': [1, 26]},
+                                 'const_0': {'kind': 'op', 'op': 'Const'},
+                                 'const_0_data': {'kind': 'data'},
+                                 'const_1': {'kind': 'op', 'op': 'Const'},
+                                 'const_1_data': {'kind': 'data'},
+                                 'concat': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_data': {'kind': 'data'},
+
+                                 'placeholder_data_2': {'kind': 'data', 'shape': [1, 26]},
+                                 'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                                 },
+                                [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                                 ('placeholder_data_1', 'splice_1'), ('splice_1', 'splice_data_1'),
+                                 ('splice_data_1', 'placeholder_2'), ('placeholder_2', 'placeholder_data_2'),
+                                 ('placeholder_data_2', 'select', {'in': 1}),
+
+                                 ('memory_in', 'memory_in_data'), ('memory_in_data', 'crop_in'),
+                                 ('crop_in', 'crop_in_data'), ('crop_in_data', 'concat', {'in': 0}),
+                                 ('const_1', 'const_1_data'), ('const_1_data', 'concat', {'in': 1}),
+                                 ('concat', 'concat_data'), ('concat_data', 'memory_out'),
+                                 ('memory_out', 'memory_out_data'), ('memory_out_data', 'result'),
+                                 ('concat_data', 'crop_out'), ('crop_out', 'crop_out_data'),
+                                 ('crop_out_data', 'select', {'in': 0}),
+                                 ('const_0', 'const_0_data'), ('const_0_data', 'select', {'in': 2}),
+
+                                 ('select', 'select_out_data'),
+                                 ('select_out_data', 'memory')
+                                 ],
+                                nodes_with_edges_only=True
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'memory')
+        self.assertTrue(flag, resp)
+
+    # graph contains 1 splice with context length 5 on the path to memory and 1 out of path,
+    # should be inserted select with memory as counter with length 5
+    def test_insert_select_2(self):
+        graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                             'placeholder_1': {'kind': 'op', 'op': None},
+                             'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                             'splice_1': {'kind': 'op', 'op': 'Splice', 'context': np.array([-2, -1, 0, 1, 2])},
+                             'splice_data_1': {'kind': 'data', 'shape': [1, 65]},
+                             'splice_2': {'kind': 'op', 'op': 'Splice', 'context': np.array([-1, 0, 1])},
+                             'splice_data_2': {'kind': 'data', 'shape': [1, 39]},
+                             'placeholder_2': {'kind': 'op', 'op': None},
+                             'placeholder_data_2': {'kind': 'data', 'shape': [1, 26]},
+                             'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                             },
+                            [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                             ('placeholder_data_1', 'splice_1'), ('splice_1', 'splice_data_1'),
+                             ('placeholder_data_1', 'splice_2'), ('splice_2', 'splice_data_2'),
+                             ('splice_data_1', 'placeholder_2'), ('placeholder_2', 'placeholder_data_2'),
+                             ('placeholder_data_2', 'memory')
+                             ],
+                            nodes_with_edges_only=True)
+        AddSelectBeforeMemoryNodePattern().find_and_replace_pattern(graph)
+        ref_graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                                 'placeholder_1': {'kind': 'op', 'op': None},
+                                 'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                                 'splice_1': {'kind': 'op', 'op': 'Splice', 'context': np.array([-2, -1, 0, 1, 2])},
+                                 'splice_data_1': {'kind': 'data', 'shape': [1, 65]},
+                                 'splice_2': {'kind': 'op', 'op': 'Splice', 'context': np.array([-1, 0, 1])},
+                                 'splice_data_2': {'kind': 'data', 'shape': [1, 39]},
+                                 'placeholder_2': {'kind': 'op', 'op': None},
+
+                                 'memory_in': {'kind': 'op', 'op': 'Memory', 'shape': int64_array([5])},
+                                 'memory_in_data': {'kind': 'data'},
+                                 'memory_out': {'kind': 'op', 'op': 'Memory', 'shape': int64_array([5])},
+                                 'memory_out_data': {'kind': 'data'},
+                                 'result': {'kind': 'op', 'op': 'Result'},
+                                 'crop_in': {'kind': 'op', 'op': 'Crop', 'axis': 1, 'offset': 1, 'dim': 4},
+                                 'crop_in_data': {'kind': 'data'},
+                                 'crop_out': {'kind': 'op', 'op': 'Crop', 'axis': 1, 'offset': 0, 'dim': 1},
+                                 'crop_out_data': {'kind': 'data'},
+                                 'select': {'kind': 'op', 'op': 'Select'},
+                                 'select_out_data': {'kind': 'data', 'shape': [1, 26]},
+                                 'const_0': {'kind': 'op', 'op': 'Const'},
+                                 'const_0_data': {'kind': 'data'},
+                                 'const_1': {'kind': 'op', 'op': 'Const'},
+                                 'const_1_data': {'kind': 'data'},
+                                 'concat': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_data': {'kind': 'data'},
+
+                                 'placeholder_data_2': {'kind': 'data', 'shape': [1, 26]},
+                                 'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                                 },
+                                [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                                 ('placeholder_data_1', 'splice_1'), ('splice_1', 'splice_data_1'),
+                                 ('placeholder_data_1', 'splice_2'), ('splice_2', 'splice_data_2'),
+                                 ('splice_data_1', 'placeholder_2'), ('placeholder_2', 'placeholder_data_2'),
+                                 ('placeholder_data_2', 'select', {'in': 1}),
+
+                                 ('memory_in', 'memory_in_data'), ('memory_in_data', 'crop_in'),
+                                 ('crop_in', 'crop_in_data'), ('crop_in_data', 'concat', {'in': 0}),
+                                 ('const_1', 'const_1_data'), ('const_1_data', 'concat', {'in': 1}),
+                                 ('concat', 'concat_data'), ('concat_data', 'memory_out'),
+                                 ('memory_out', 'memory_out_data'), ('memory_out_data', 'result'),
+                                 ('concat_data', 'crop_out'), ('crop_out', 'crop_out_data'),
+                                 ('crop_out_data', 'select', {'in': 0}),
+                                 ('const_0', 'const_0_data'), ('const_0_data', 'select', {'in': 2}),
+
+                                 ('select', 'select_out_data'),
+                                 ('select_out_data', 'memory')
+                                 ],
+                                nodes_with_edges_only=True
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'memory')
+        self.assertTrue(flag, resp)
+
+    # graph contains 2 splices with sum context length 8 on the path to memory,
+    # should be inserted select with memory as counter with length 7
+    def test_insert_select_3(self):
+        graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                             'placeholder_1': {'kind': 'op', 'op': None},
+                             'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                             'splice_1': {'kind': 'op', 'op': 'Splice', 'context': np.array([-2, -1, 0, 1, 2])},
+                             'splice_data_1': {'kind': 'data', 'shape': [1, 65]},
+                             'splice_2': {'kind': 'op', 'op': 'Splice', 'context': np.array([-1, 0, 1])},
+                             'splice_data_2': {'kind': 'data', 'shape': [1, 39]},
+                             'placeholder_2': {'kind': 'op', 'op': None},
+                             'placeholder_data_2': {'kind': 'data', 'shape': [1, 26]},
+                             'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                             },
+                            [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                             ('placeholder_data_1', 'splice_1'), ('splice_1', 'splice_data_1'),
+                             ('splice_data_1', 'splice_2'), ('splice_2', 'splice_data_2'),
+                             ('splice_data_2', 'placeholder_2'), ('placeholder_2', 'placeholder_data_2'),
+                             ('placeholder_data_2', 'memory')
+                             ],
+                            nodes_with_edges_only=True)
+        AddSelectBeforeMemoryNodePattern().find_and_replace_pattern(graph)
+        ref_graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+                                 'placeholder_1': {'kind': 'op', 'op': None},
+                                 'placeholder_data_1': {'kind': 'data', 'shape': [1, 13]},
+                                 'splice_1': {'kind': 'op', 'op': 'Splice', 'context': np.array([-2, -1, 0, 1, 2])},
+                                 'splice_data_1': {'kind': 'data', 'shape': [1, 65]},
+                                 'splice_2': {'kind': 'op', 'op': 'Splice', 'context': np.array([-1, 0, 1])},
+                                 'splice_data_2': {'kind': 'data', 'shape': [1, 39]},
+                                 'placeholder_2': {'kind': 'op', 'op': None},
+
+                                 'memory_in': {'kind': 'op', 'op': 'Memory', 'shape': int64_array([7])},
+                                 'memory_in_data': {'kind': 'data'},
+                                 'memory_out': {'kind': 'op', 'op': 'Memory', 'shape': int64_array([7])},
+                                 'memory_out_data': {'kind': 'data'},
+                                 'result': {'kind': 'op', 'op': 'Result'},
+                                 'crop_in': {'kind': 'op', 'op': 'Crop', 'axis': 1, 'offset': 1, 'dim': 6},
+                                 'crop_in_data': {'kind': 'data'},
+                                 'crop_out': {'kind': 'op', 'op': 'Crop', 'axis': 1, 'offset': 0, 'dim': 1},
+                                 'crop_out_data': {'kind': 'data'},
+                                 'select': {'kind': 'op', 'op': 'Select'},
+                                 'select_out_data': {'kind': 'data', 'shape': [1, 26]},
+                                 'const_0': {'kind': 'op', 'op': 'Const'},
+                                 'const_0_data': {'kind': 'data'},
+                                 'const_1': {'kind': 'op', 'op': 'Const'},
+                                 'const_1_data': {'kind': 'data'},
+                                 'concat': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_data': {'kind': 'data'},
+
+                                 'placeholder_data_2': {'kind': 'data', 'shape': [1, 26]},
+                                 'memory': {'kind': 'op', 'op': 'Memory', 'index': 0},
+                                 },
+                                [('in_node', 'placeholder_1'), ('placeholder_1', 'placeholder_data_1'),
+                                 ('placeholder_data_1', 'splice_1'), ('splice_1', 'splice_data_1'),
+                                 ('splice_data_1', 'splice_2'), ('splice_2', 'splice_data_2'),
+                                 ('splice_data_2', 'placeholder_2'), ('placeholder_2', 'placeholder_data_2'),
+                                 ('placeholder_data_2', 'select', {'in': 1}),
+
+                                 ('memory_in', 'memory_in_data'), ('memory_in_data', 'crop_in'),
+                                 ('crop_in', 'crop_in_data'), ('crop_in_data', 'concat', {'in': 0}),
+                                 ('const_1', 'const_1_data'), ('const_1_data', 'concat', {'in': 1}),
+                                 ('concat', 'concat_data'), ('concat_data', 'memory_out'),
+                                 ('memory_out', 'memory_out_data'), ('memory_out_data', 'result'),
+                                 ('concat_data', 'crop_out'), ('crop_out', 'crop_out_data'),
+                                 ('crop_out_data', 'select', {'in': 0}),
+                                 ('const_0', 'const_0_data'), ('const_0_data', 'select', {'in': 2}),
+
+                                 ('select', 'select_out_data'),
+                                 ('select_out_data', 'memory')
+                                 ],
+                                nodes_with_edges_only=True
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'memory')
+        self.assertTrue(flag, resp)
index 1562360..a182c13 100644 (file)
@@ -15,7 +15,7 @@
 """
 import numpy as np
 
-from mo.graph.graph import Graph, Node
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.crop import Crop
 
@@ -34,41 +34,118 @@ class RemoveMemoryDuplicationPattern(MiddleReplacementPattern):
 
     @staticmethod
     def replace_pattern(graph: Graph, match: dict):
-        if len(match['op'].in_nodes()) == 0:
-            return
-
         mem = match['op']
-        in_mem = mem.in_node(0)
+        mem_shape = mem.in_port(0).data.get_shape()
+        mem_parent = mem.in_port(0).get_source()
         context = mem['context']
-        outs = in_mem.out_nodes()
 
-        for out in outs:
-            if out['op'] == 'Splice' and out.id != mem.id and set(out['context']).issubset(set(context)):
-                left_cont_out = out['context'][0]
+        for child_port in mem_parent.get_destinations():
+            child = child_port.node
+            # check if we find Splice containing context 'context'
+            if child['op'] == 'Splice' and child.id != mem.id and set(child['context']).issubset(set(context)):
+                left_cont_out = child['context'][0]
                 left_cont = context[0]
 
-                out_node = out.out_node()
-                for out_name, out_edge in out_node.get_outputs():
-                    out_transfer = Node(graph, out_name)
+                for child_of_child in child.out_port(0).get_destinations():
+                    out_transfer = child_of_child.node
+                    out_transfer_port = child_of_child
                     if out_transfer['op'] == 'Crop':
                         # modify existing Crop to get right data from larger Splice
-                        out_transfer['offset'] = out_transfer['offset'] + (left_cont_out - left_cont) * in_mem.shape[-1]
+                        out_transfer['offset'] = out_transfer['offset'] + (left_cont_out - left_cont) * mem_shape[-1]
                     else:
                         # insert Crop if we have not one
-                        out_transfer.in_port(out_edge['in']).disconnect()
+                        child_of_child.disconnect()
                         crop_node = Crop(graph, {'name': graph.unique_id(prefix='Splice_crop_'),
-                                                 'offset': (left_cont_out - left_cont) * in_mem.shape[-1],
-                                                 'dim': np.array([len(out['context']) * in_mem.shape[-1]]),
+                                                 'offset': (left_cont_out - left_cont) * mem_shape[-1],
+                                                 'dim': np.array([len(child['context']) * mem_shape[-1]]),
                                                  'axis': np.array([-1])}).create_node()
-                        out.out_port(0).connect(crop_node.in_port(0))
-                        crop_node.out_port(0).connect(out_transfer.in_port(out_edge['in']))
-                        crop_node.out_node(0).shape = out_node.shape
+                        child.out_port(0).connect(crop_node.in_port(0))
+                        crop_node.out_port(0).connect(child_of_child)
+                        crop_node.out_port(0).data.set_shape(child.out_port(0).data.get_shape())
+
+                        out_transfer_port = crop_node.in_port(0)
+
+                    # move edge to child from old Splice to larger
+                    out_transfer_port.disconnect()
+                    mem.out_port(0).connect(out_transfer_port)
+
+                graph.remove_node(child.id)
+
+
+class MergeNeighborSplicePattern(MiddleReplacementPattern):
+    """
+    Merge Splices with neighbor contexts, for example: [-5, 0] and [0, 3] to context [-5, 3]
+    """
+    enabled = False
 
-                        out_transfer = crop_node
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[('op', dict(op='Splice'))],
+            edges=[])
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        mem = match['op']
+        mem_shape = mem.in_port(0).data.get_shape()
+        mem_parent = mem.in_port(0).get_source()
+        context = mem['context']
+
+        for child_port in mem_parent.get_destinations():
+            child = child_port.node
+            if child['op'] == 'Splice' and child.id != mem.id and \
+               (child['context'][0] == context[-1] or child['context'][0] == context[-1]):
+
+                new_context = list(context)
+                new_context.extend(list(child['context']))
+                new_context = list(set(new_context))
+                new_context.sort()
+                if child['context'][0] == context[-1]:
+                    new_node = mem
+                    rem_node = child
+                else:
+                    new_node = child
+                    rem_node = mem
+
+                # reset edges from rem_node to new_node
+                for out_port_rem in rem_node.out_port(0).get_destinations():
+                    out_transfer = out_port_rem.node
+                    out_transfer_shape = out_port_rem.data.get_shape().copy()
+
+                    out_port_rem.disconnect()
+
+                    if out_transfer['op'] == 'Crop':
+                        # modify existing Crop to get right data from larger Splice
+                        out_transfer['offset'] = out_transfer['offset'] + (len(new_context) - len(rem_node.context)) * mem_shape[-1]
+                        out_port_rem.connect(new_node.out_port(0))
+                    else:
+                        # insert Crop if we have not one
+                        crop_node = Crop(graph, {'name': graph.unique_id(prefix='Splice_crop_'),
+                                                 'offset': (len(new_context) - len(rem_node.context)) * mem_shape[-1],
+                                                 'dim': np.array([len(rem_node['context']) * mem_shape[-1]]),
+                                                 'axis': np.array([-1])}).create_node()
+                        new_node.out_port(0).connect(crop_node.in_port(0))
+                        crop_node.out_port(0).connect(out_port_rem)
+                        crop_node.out_port(0).data.set_shape(out_transfer_shape)
+
+                for out_port_rem in new_node.out_port(0).get_destinations():
+                    out_transfer = out_port_rem.node
+                    out_transfer_shape = out_port_rem.data.get_shape().copy()
+
+                    if out_transfer['op'] != 'Crop':
+                        # insert Crop if we have not one
+                        crop_node = Crop(graph, {'name': graph.unique_id(prefix='Splice_crop_'),
+                                                 'offset': np.array([0]),
+                                                 'dim': np.array([len(new_node['context']) * mem_shape[-1]]),
+                                                 'axis': np.array([-1])}).create_node()
+                        new_node.out_port(0).connect(crop_node.in_port(0))
+                        out_port_rem.disconnect()
+                        crop_node.out_port(0).connect(out_port_rem)
+                        crop_node.out_port(0).data.set_shape(out_transfer_shape)
 
-                    # move edge from old Splice to larger
-                    in_port = graph.get_edge_data(out_node.id, out_transfer.id)[0]['in']
-                    out_transfer.in_port(0).disconnect()
-                    mem.out_port(0).connect(out_transfer.in_port(in_port))
+                new_shape = new_node.out_port(0).data.get_shape()
+                new_shape[1] += rem_node.out_port(0).data.get_shape()[1] - rem_node.in_port(0).data.get_shape()[1]
+                new_node.out_port(0).data.set_shape(new_shape)
+                new_node.context = new_context
 
-                graph.remove_node(out.id)
+                graph.remove_node(rem_node.id)
index bbef79f..db31f6a 100644 (file)
 """
 import unittest
 
-from extensions.middle.RemoveDuplicationMemory import RemoveMemoryDuplicationPattern
+from extensions.middle.RemoveDuplicationMemory import RemoveMemoryDuplicationPattern, MergeNeighborSplicePattern
 from mo.utils.unittest.graph import build_graph, compare_graphs
 
 
 class RemoveMemoryDuplicationPatternTests(unittest.TestCase):
 
     def test_remove_duplication(self):
-        graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+        graph = build_graph({'input': {'kind': 'op', 'op': 'Parameter'},
+                             'in_node': {'kind': 'data', 'shape': [1, 13]},
                              'splice_1': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6)},
                              'splice_data_1': {'kind': 'data', 'shape': [1, 143]},
                              'placeholder_1': {'kind': 'op', 'op': None},
@@ -30,12 +31,14 @@ class RemoveMemoryDuplicationPatternTests(unittest.TestCase):
                              'splice_data_2': {'kind': 'data', 'shape': [1, 39]},
                              'placeholder_2': {'kind': 'op', 'op': None},
                              },
-                            [('in_node', 'splice_1'), ('splice_1', 'splice_data_1'), ('splice_data_1', 'placeholder_1'),
+                            [('input', 'in_node'), ('in_node', 'splice_1'),
+                             ('splice_1', 'splice_data_1'), ('splice_data_1', 'placeholder_1'),
                              ('in_node', 'splice_2'), ('splice_2', 'splice_data_2'), ('splice_data_2', 'placeholder_2'),
                              ],
                             nodes_with_edges_only=True)
         RemoveMemoryDuplicationPattern().find_and_replace_pattern(graph)
-        ref_graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+        ref_graph = build_graph({'input': {'kind': 'op', 'op': 'Parameter'},
+                                 'in_node': {'kind': 'data', 'shape': [1, 13]},
                                  'splice_1': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6)},
                                  'splice_data_1': {'kind': 'data', 'shape': [1, 143]},
                                  'placeholder_1': {'kind': 'op'},
@@ -44,8 +47,8 @@ class RemoveMemoryDuplicationPatternTests(unittest.TestCase):
                                  'placeholder_2': {'kind': 'op'},
                                  },
                                 [
-                                    ('in_node', 'splice_1'), ('splice_1', 'splice_data_1'),
-                                    ('splice_data_1', 'placeholder_1'),
+                                    ('input', 'in_node'), ('in_node', 'splice_1'),
+                                    ('splice_1', 'splice_data_1'), ('splice_data_1', 'placeholder_1'),
                                     ('splice_data_1', 'crop_2'), ('crop_2', 'splice_data_2'),
                                     ('splice_data_2', 'placeholder_2'),
                                 ],
@@ -56,7 +59,8 @@ class RemoveMemoryDuplicationPatternTests(unittest.TestCase):
         self.assertTrue(flag, resp)
 
     def test_remove_duplication_with_crops(self):
-        graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+        graph = build_graph({'input': {'kind': 'op', 'op': 'Parameter'},
+                             'in_node': {'kind': 'data', 'shape': [1, 13]},
                              'splice_1': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6)},
                              'splice_data_1': {'kind': 'data', 'shape': [1, 143]},
                              'crop_1': {'kind': 'op', 'op': 'Crop', 'offset': 13, 'dim': 13, 'axis': -1},
@@ -64,24 +68,66 @@ class RemoveMemoryDuplicationPatternTests(unittest.TestCase):
                              'splice_data_2': {'kind': 'data', 'shape': [1, 39]},
                              'crop_2': {'kind': 'op', 'op': 'Crop', 'offset': 13, 'dim': 13, 'axis': -1},
                              },
-                            [('in_node', 'splice_1'), ('splice_1', 'splice_data_1'), ('splice_data_1', 'crop_1'),
+                            [('input', 'in_node'), ('in_node', 'splice_1'),
+                             ('splice_1', 'splice_data_1'), ('splice_data_1', 'crop_1'),
                              ('in_node', 'splice_2'), ('splice_2', 'splice_data_2'), ('splice_data_2', 'crop_2'),
                              ],
                             nodes_with_edges_only=True)
         RemoveMemoryDuplicationPattern().find_and_replace_pattern(graph)
-        ref_graph = build_graph({'in_node': {'kind': 'data', 'shape': [1, 13]},
+        ref_graph = build_graph({'input': {'kind': 'op', 'op': 'Parameter'},
+                                 'in_node': {'kind': 'data', 'shape': [1, 13]},
                                  'splice_1': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6)},
                                  'splice_data_1': {'kind': 'data', 'shape': [1, 143]},
                                  'crop_1': {'kind': 'op', 'op': 'Crop', 'offset': 13, 'dim': 13},
                                  'crop_2': {'kind': 'op', 'op': 'Crop', 'offset': 65, 'dim': 13, 'axis': -1},
                                  },
                                 [
-                                    ('in_node', 'splice_1'), ('splice_1', 'splice_data_1'),
-                                    ('splice_data_1', 'crop_1'),
-                                    ('splice_data_1', 'crop_2'),
+                                    ('input', 'in_node'), ('in_node', 'splice_1'),
+                                    ('splice_1', 'splice_data_1'),
+                                    ('splice_data_1', 'crop_1'), ('splice_data_1', 'crop_2'),
                                 ],
                                 nodes_with_edges_only=True
                                 )
 
         (flag, resp) = compare_graphs(graph, ref_graph, 'crop_2')
         self.assertTrue(flag, resp)
+
+    def test_remove_duplication_neibor(self):
+        graph = build_graph({'input': {'kind': 'op', 'op': 'Parameter'},
+                             'in_node': {'kind': 'data', 'shape': [1, 13]},
+                             'splice_1': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 1)},
+                             'splice_data_1': {'kind': 'data', 'shape': [1, 78], 'value': None},
+                             'placeholder_1': {'kind': 'op', 'op': None},
+                             'splice_2': {'kind': 'op', 'op': 'Splice', 'context': range(0, 2)},
+                             'splice_data_2': {'kind': 'data', 'shape': [1, 26], 'value': None},
+                             'placeholder_2': {'kind': 'op', 'op': None},
+                             },
+                            [('input', 'in_node'), ('in_node', 'splice_1'),
+                             ('splice_1', 'splice_data_1'), ('splice_data_1', 'placeholder_1'),
+                             ('in_node', 'splice_2'), ('splice_2', 'splice_data_2'), ('splice_data_2', 'placeholder_2'),
+                             ],
+                            nodes_with_edges_only=True)
+        MergeNeighborSplicePattern().find_and_replace_pattern(graph)
+        ref_graph = build_graph({'input': {'kind': 'op', 'op': 'Parameter'},
+                                 'in_node': {'kind': 'data', 'shape': [1, 13]},
+                                 'splice_1': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 2)},
+                                 'splice_data_1': {'kind': 'data', 'shape': [1, 91], 'value': None},
+                                 'crop_1': {'kind': 'op', 'op': 'Crop', 'offset': 0, 'dim': 78, 'axis': -1},
+                                 'crop_1_data': {'kind': 'data', 'shape': [1, 78]},
+                                 'placeholder_1': {'kind': 'op'},
+                                 'crop_2': {'kind': 'op', 'op': 'Crop', 'offset': 65, 'dim': 26, 'axis': -1},
+                                 'splice_data_2': {'kind': 'data', 'shape': [1, 26], 'value': None},
+                                 'placeholder_2': {'kind': 'op'},
+                                 },
+                                [
+                                    ('input', 'in_node'), ('in_node', 'splice_1'),
+                                    ('splice_1', 'splice_data_1'), ('splice_data_1', 'crop_1'),
+                                    ('crop_1', 'crop_1_data'), ('crop_1_data', 'placeholder_1'),
+                                    ('splice_data_1', 'crop_2'), ('crop_2', 'splice_data_2'),
+                                    ('splice_data_2', 'placeholder_2'),
+                                ],
+                                nodes_with_edges_only=True
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_2')
+        self.assertTrue(flag, resp)
index 5bee3c3..4333046 100644 (file)
@@ -13,7 +13,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-from mo.graph.graph import Graph, Node
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 
 
@@ -40,22 +40,23 @@ class RemoveUselessCropsPattern(MiddleReplacementPattern):
     @staticmethod
     def replace_pattern(graph: Graph, match: dict):
         crop_node = match['crop']
-        in_crop_node = crop_node.in_node(0)
+        crop_node_parent_port = crop_node.in_port(0).get_source()
         concat_node = match['concat']
-        data = match['data']
 
-        if len(data.out_nodes()) != 1:
+        if len(crop_node.out_port(0).get_destinations()) != 1:
             return
 
-        outs = in_crop_node.out_nodes()
+        outs = crop_node_parent_port.get_destinations()
         offsets_dims = list([])
         crop_list = list([])
         axis = crop_node['axis']
-        for out in outs:
+        for in_port in outs:
+            out = in_port.node
             if out['op'] == 'Crop' and out['axis'] == axis and \
-               len(out.out_node().out_nodes()) == 1 and out.out_node().out_node(0).id == concat_node.id:
+               len(out.out_port(0).get_destinations()) == 1 and \
+               out.out_port(0).get_destination().node == concat_node:
                 offsets_dims.append((out['offset'], out['dim']))
-                crop_list.append(out.id)
+                crop_list.append(out)
 
         offsets_dims.sort(key=lambda off_dim: off_dim[0])
         size = 0
@@ -64,21 +65,24 @@ class RemoveUselessCropsPattern(MiddleReplacementPattern):
                 return
             size = size + off_d[1]
 
-        if size != in_crop_node.shape[axis]:
+        if size != crop_node_parent_port.data.get_shape()[axis]:
             return
 
         remove_concat = True
-        for inp, attrs in concat_node.get_inputs():
-            in_node_id, a = Node(graph, inp).get_inputs()[0]
-            if in_node_id not in crop_list:
-                remove_concat = False
-            else:
-                Node(graph, in_node_id).out_port(0).disconnect()
+        free_port = None
+        for inp in concat_node.in_ports():
+            if not concat_node.in_port(inp).disconnected():
+                in_node = concat_node.in_port(inp).get_source().node
+                if in_node not in crop_list:
+                    remove_concat = False
+                else:
+                    in_node.out_port(0).disconnect()
+                    free_port = inp
 
         if remove_concat:
-            for crop in crop_list:
-                Node(graph, crop).in_port(0).disconnect()
-
-            concat_out = concat_node.out_node(0).out_node(0)
-            concat_out.in_port(0).disconnect()
-            in_crop_node.in_node(0).out_port(0).connect(concat_out.in_port(0))
+            concat_outs = concat_node.out_port(0).get_destinations()
+            for out in concat_outs:
+                out.disconnect()
+                crop_node_parent_port.connect(out)
+        else:
+            crop_node_parent_port.connect(concat_node.in_port(free_port))
index 976ad13..c3f9e0d 100644 (file)
@@ -54,10 +54,28 @@ class RemoveUselessCropsPatternTests(unittest.TestCase):
         RemoveUselessCropsPattern().find_and_replace_pattern(graph)
         ref_graph = build_graph({'placeholder_in': {'kind': 'op', 'op': 'Parameter'},
                                  'in_node': {'kind': 'data', 'shape': [1, 130]},
+                                 'crop1': {'kind': 'op', 'op': 'Crop', 'offset': 0, 'dim': 26, 'axis': -1},
+                                 'crop_data_1': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop2': {'kind': 'op', 'op': 'Crop', 'offset': 26, 'dim': 26, 'axis': -1},
+                                 'crop_data_2': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop3': {'kind': 'op', 'op': 'Crop', 'offset': 52, 'dim': 26, 'axis': -1},
+                                 'crop_data_3': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop4': {'kind': 'op', 'op': 'Crop', 'offset': 78, 'dim': 26, 'axis': -1},
+                                 'crop_data_4': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop5': {'kind': 'op', 'op': 'Crop', 'offset': 104, 'dim': 26, 'axis': -1},
+                                 'crop_data_5': {'kind': 'data', 'shape': [1, 26]},
+                                 'concat': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_data': {'kind': 'data', 'shape': [1, 130]},
                                  'placeholder': {'kind': 'op', 'op': 'Parameter'},
                                  },
                                 [
                                     ('placeholder_in', 'in_node'),
+                                    ('in_node', 'crop1'), ('crop1', 'crop_data_1'),
+                                    ('in_node', 'crop2'), ('crop2', 'crop_data_2'),
+                                    ('in_node', 'crop3'), ('crop3', 'crop_data_3'),
+                                    ('in_node', 'crop4'), ('crop4', 'crop_data_4'),
+                                    ('in_node', 'crop5'), ('crop5', 'crop_data_5'),
+                                    ('concat', 'concat_data'),
                                     ('in_node', 'placeholder')
                                 ]
                                 )
@@ -121,3 +139,72 @@ class RemoveUselessCropsPatternTests(unittest.TestCase):
                                 )
         (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder')
         self.assertTrue(flag, resp)
+
+    def test_useless_crops_without_concat(self):
+        graph = build_graph({'placeholder_in': {'kind': 'op', 'op': 'Parameter'},
+                             'in_node': {'kind': 'data', 'shape': [1, 130]},
+                             'crop1': {'kind': 'op', 'op': 'Crop', 'offset': 0, 'dim': 26, 'axis': -1},
+                             'crop_data_1': {'kind': 'data', 'shape': [1, 26]},
+                             'crop2': {'kind': 'op', 'op': 'Crop', 'offset': 26, 'dim': 26, 'axis': -1},
+                             'crop_data_2': {'kind': 'data', 'shape': [1, 26]},
+                             'crop3': {'kind': 'op', 'op': 'Crop', 'offset': 52, 'dim': 26, 'axis': -1},
+                             'crop_data_3': {'kind': 'data', 'shape': [1, 26]},
+                             'crop4': {'kind': 'op', 'op': 'Crop', 'offset': 78, 'dim': 26, 'axis': -1},
+                             'crop_data_4': {'kind': 'data', 'shape': [1, 26]},
+                             'crop5': {'kind': 'op', 'op': 'Crop', 'offset': 104, 'dim': 26, 'axis': -1},
+                             'crop_data_5': {'kind': 'data', 'shape': [1, 26]},
+                             'placeholder_concat': {'kind': 'op', 'op': None},
+                             'placeholder_concat_data': {'kind': 'data', 'shape': [1, 100]},
+                             'concat': {'kind': 'op', 'op': 'Concat'},
+                             'concat_data': {'kind': 'data', 'shape': [1, 230]},
+                             'placeholder': {'kind': 'op', 'op': None},
+                             },
+                            [('placeholder_in', 'in_node'),
+                             ('in_node', 'crop1'), ('crop1', 'crop_data_1'),
+                             ('in_node', 'crop2'), ('crop2', 'crop_data_2'),
+                             ('in_node', 'crop3'), ('crop3', 'crop_data_3'),
+                             ('in_node', 'crop4'), ('crop4', 'crop_data_4'),
+                             ('in_node', 'crop5'), ('crop5', 'crop_data_5'),
+                             ('placeholder_concat', 'placeholder_concat_data'),
+                             ('crop_data_1', 'concat', {'in': 0}),
+                             ('crop_data_2', 'concat', {'in': 1}),
+                             ('crop_data_3', 'concat', {'in': 2}),
+                             ('crop_data_4', 'concat', {'in': 3}),
+                             ('crop_data_5', 'concat', {'in': 4}),
+                             ('placeholder_concat_data', 'concat', {'in': 5}),
+                             ('concat', 'concat_data'),
+                             ('concat_data', 'placeholder')])
+        RemoveUselessCropsPattern().find_and_replace_pattern(graph)
+        ref_graph = build_graph({'placeholder_in': {'kind': 'op', 'op': 'Parameter'},
+                                 'in_node': {'kind': 'data', 'shape': [1, 130]},
+                                 'crop1': {'kind': 'op', 'op': 'Crop', 'offset': 0, 'dim': 26, 'axis': -1},
+                                 'crop_data_1': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop2': {'kind': 'op', 'op': 'Crop', 'offset': 26, 'dim': 26, 'axis': -1},
+                                 'crop_data_2': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop3': {'kind': 'op', 'op': 'Crop', 'offset': 52, 'dim': 26, 'axis': -1},
+                                 'crop_data_3': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop4': {'kind': 'op', 'op': 'Crop', 'offset': 78, 'dim': 26, 'axis': -1},
+                                 'crop_data_4': {'kind': 'data', 'shape': [1, 26]},
+                                 'crop5': {'kind': 'op', 'op': 'Crop', 'offset': 104, 'dim': 26, 'axis': -1},
+                                 'crop_data_5': {'kind': 'data', 'shape': [1, 26]},
+                                 'placeholder_concat': {'kind': 'op', 'op': None},
+                                 'placeholder_concat_data': {'kind': 'data', 'shape': [1, 100]},
+                                 'concat': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_data': {'kind': 'data', 'shape': [1, 230]},
+                                 'placeholder': {'kind': 'op', 'op': 'Parameter'},
+                                 },
+                                [
+                                    ('placeholder_in', 'in_node'),
+                                    ('in_node', 'crop1'), ('crop1', 'crop_data_1'),
+                                    ('in_node', 'crop2'), ('crop2', 'crop_data_2'),
+                                    ('in_node', 'crop3'), ('crop3', 'crop_data_3'),
+                                    ('in_node', 'crop4'), ('crop4', 'crop_data_4'),
+                                    ('in_node', 'crop5'), ('crop5', 'crop_data_5'),
+                                    ('placeholder_concat', 'placeholder_concat_data'),
+                                    ('in_node', 'concat', {'in': 4}),
+                                    ('placeholder_concat_data', 'concat', {'in': 5}),
+                                    ('concat', 'concat_data'),
+                                    ('concat_data', 'placeholder')])
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder')
+        self.assertTrue(flag, resp)
index 043a2e6..dfddc33 100644 (file)
@@ -46,53 +46,55 @@ class ReplaceMemoryOffsetNodePattern(MiddleReplacementPattern):
         if pair_node.has_default:
             return
 
-        if len(node.in_nodes()) != 0:
-            input_node = node.in_node(0)
-            op_output = node.out_node().out_node()
-            out_node = pair_node.out_node(0)
+        if node.in_port(0).get_source() is not None:
+            input_node_out_port = node.in_port(0).get_source()
+            op_output_id = node.out_port(0).get_destination().node.id
+            out_node_in_ports = pair_node.out_port(0).get_destinations()
         else:
-            input_node = pair_node.in_node(0)
-            op_output = pair_node.out_node().out_node()
-            out_node = node.out_node(0)
+            input_node_out_port = pair_node.in_port(0).get_source()
+            op_output_id = pair_node.out_port(0).get_destination().node.id
+            out_node_in_ports = node.out_port(0).get_destinations()
 
-        in_shape = input_node.shape
+        in_shape = input_node_out_port.data.get_shape().copy()
 
         node_id = node.id
         node_name = node.name
         node_t = node.t
 
-        graph.remove_node(op_output.id)
-        graph.remove_node(node.id)
-        graph.remove_node(pair_node.id)
-
         splice = Splice(graph, {'name': node_name,
                                 'id': node_id,
-                                'context': int64_array(range(-abs(node_t), abs(node_t) + 1))}).create_node([input_node])
+                                'context': int64_array(range(node_t, 1)) if node_t < 0 else int64_array(range(0, node_t+1))}).create_node()
+        splice.in_port(0).connect(input_node_out_port)
 
-        # offset of Crop will be 0 (first element) if node_t < 0 and in_shape[1]*2*node_t (last element) if node_t > 0
+        # offset of Crop will be 0 (first element) if node_t < 0 and in_shape[1]*node_t (last element) if node_t > 0
         crop = Crop(graph, {'name': 'Splice_Crop',
                             'axis': int64_array([1]),
-                            'offset': int64_array([max(0, in_shape[1] * 2 * node_t)]),
+                            'offset': int64_array([max(0, in_shape[1] * node_t)]),
                             'dim': int64_array([in_shape[1]])}).create_node()
 
         splice.out_port(0).connect(crop.in_port(0))
-        splice.out_node(0).shape = int64_array([in_shape[0], (2 * abs(node_t) + 1) * in_shape[1]])
+        splice.out_port(0).data.set_shape(int64_array([in_shape[0], (abs(node_t) + 1) * in_shape[1]]))
 
-        outs = input_node.out_nodes()
-        for out_ in outs:
+        outs = input_node_out_port.get_destinations()
+        for in_port in outs:
+            out_ = in_port.node
             if out_['op'] != 'MemoryOffset' and out_['op'] != 'Splice':
                 crop_input = Crop(graph, {'name': 'Splice_Crop',
                                           'axis': int64_array([1]),
-                                          'offset': int64_array([input_node.shape[1] * abs(node_t)]),
-                                          'dim': int64_array([input_node.shape[1]])}).create_node()
+                                          'offset': int64_array([-min(0, in_shape[1] * node_t)]),
+                                          'dim': int64_array([in_shape[1]])}).create_node()
                 splice.out_port(0).connect(crop_input.in_port(0))
 
-                in_port = graph.get_edge_data(input_node.id, out_.id)[0]['in']
-                graph.remove_edge(input_node.id, out_.id)
-                crop_input.out_port(0).connect(out_.in_port(in_port))
-                crop_input.out_node(0).shape = input_node.shape
+                in_port.disconnect()
+                crop_input.out_port(0).connect(in_port)
+                crop_input.out_port(0).data.set_shape(in_shape)
 
-        graph.add_edge(crop.id, out_node.id, **{'in': 0, 'out': 0})
+        for dest_port in out_node_in_ports:
+            dest_port.connect(crop.out_port(0))
+
+        graph.remove_node(op_output_id)
+        graph.remove_node(node.id)
+        graph.remove_node(pair_node.id)
 
 
 class ReplaceMemoryOffsetWithMemoryNodePattern(MiddleReplacementPattern):
@@ -115,15 +117,15 @@ class ReplaceMemoryOffsetWithMemoryNodePattern(MiddleReplacementPattern):
         if node.t >= 0:
             raise Error('Does not support IfDefined with t > 0')
 
-        if len(node.in_nodes()) != 0:
+        if node.in_port(0).get_source() is not None:
             input_port = node.in_port(0).get_source()
-            op_output = node.out_node().out_node()
+            op_output_id = node.out_port(0).get_destination().node.id
             out_port = pair_node.out_port(0)
             node_name = node.name
             pair_name = pair_node.name
         else:
             input_port = pair_node.in_port(0).get_source()
-            op_output = pair_node.out_node().out_node()
+            op_output_id = pair_node.out_port(0).get_destination().node.id
             out_port = node.out_port(0)
             node_name = pair_node.name
             pair_name = node.name
@@ -169,6 +171,6 @@ class ReplaceMemoryOffsetWithMemoryNodePattern(MiddleReplacementPattern):
             out_port.get_connection().set_source(memory_out.out_port(0))
             memory_out.out_port(0).data.set_shape(np.array([in_shape[0], memory_out.shape[0]]))
 
-        graph.remove_node(op_output.id)
+        graph.remove_node(op_output_id)
         graph.remove_node(node.id)
         graph.remove_node(pair_node.id)
index 57d34b6..9d3c406 100644 (file)
@@ -30,8 +30,10 @@ class ReplaceMemoryOffsetNodePatternTests(unittest.TestCase):
                              'pair_name': 'memoryoffset_2', 'has_default': False},
             'memoryoffset_data': {'kind': 'data', 'shape': [1, 13]},
             'memoryoffset_2': {'kind': 'op', 'op': 'MemoryOffset', 't': -5,
-                               'pair_name': 'memoryoffset', 'has_default': False},
+                               'pair_name': 'memoryoffset', 'has_default': False,
+                               'in_ports_count': 1},
             'memoryoffset_2_data': {'kind': 'data', 'shape': [1, 13]},
+            'crop_data': {'kind': 'data', 'shape': [1, 13]},
             'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
             'opoutput': {'kind': 'op', 'op': 'OpOutput'},
         }
@@ -49,10 +51,10 @@ class ReplaceMemoryOffsetNodePatternTests(unittest.TestCase):
         ReplaceMemoryOffsetNodePattern().find_and_replace_pattern(graph)
         ref_graph = build_graph({'in_placeholder': {'kind': 'op', 'op': 'placeholder'},
                                  'in_node': {'kind': 'data', 'shape': [1, 13]},
-                                 'splice': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6)},
-                                 'splice_data': {'kind': 'data', 'shape': [1, 143]},
+                                 'splice': {'kind': 'op', 'op': 'Splice', 'context': range(0, 6)},
+                                 'splice_data': {'kind': 'data', 'shape': [1, 78]},
                                  'crop': {'kind': 'op', 'op': 'Crop', 'offset': 130, 'dim': 13},
-                                 'memoryoffset_2_data': {'kind': 'data', 'shape': [1, 13]},
+                                 'crop_data': {'kind': 'data', 'shape': [1, 13]},
                                  'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
                                  },
                                 [
@@ -60,12 +62,12 @@ class ReplaceMemoryOffsetNodePatternTests(unittest.TestCase):
                                     ('in_node', 'splice'),
                                     ('splice', 'splice_data'),
                                     ('splice_data', 'crop'),
-                                    ('crop', 'memoryoffset_2_data'),
-                                    ('memoryoffset_2_data', 'out_placeholder')
+                                    ('crop', 'crop_data'),
+                                    ('crop_data', 'out_placeholder')
                                 ]
                                 )
 
-        (flag, resp) = compare_graphs(graph, ref_graph, 'memoryoffset_2_data')
+        (flag, resp) = compare_graphs(graph, ref_graph, 'out_placeholder')
         self.assertTrue(flag, resp)
 
     def test_memoryoffset_neg(self):
@@ -81,8 +83,8 @@ class ReplaceMemoryOffsetNodePatternTests(unittest.TestCase):
         ReplaceMemoryOffsetNodePattern().find_and_replace_pattern(graph)
         ref_graph = build_graph({'in_placeholder': {'kind': 'op', 'op': 'placeholder'},
                                  'in_node': {'kind': 'data', 'shape': [1, 13]},
-                                 'splice': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6)},
-                                 'splice_data': {'kind': 'data', 'shape': [1, 143]},
+                                 'splice': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 1)},
+                                 'splice_data': {'kind': 'data', 'shape': [1, 78]},
                                  'crop': {'kind': 'op', 'op': 'Crop', 'offset': 0, 'dim': 13},
                                  'memoryoffset_2_data': {'kind': 'data', 'shape': [1, 13]},
                                  'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
@@ -113,8 +115,8 @@ class ReplaceMemoryOffsetNodePatternTests(unittest.TestCase):
         ReplaceMemoryOffsetNodePattern().find_and_replace_pattern(graph)
         ref_graph = build_graph({'in_placeholder': {'kind': 'op', 'op': 'placeholder'},
                                  'in_node': {'kind': 'data', 'shape': [1, 13]},
-                                 'splice': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6)},
-                                 'splice_data': {'kind': 'data', 'shape': [1, 143]},
+                                 'splice': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 1)},
+                                 'splice_data': {'kind': 'data', 'shape': [1, 78]},
                                  'crop': {'kind': 'op', 'op': 'Crop', 'offset': 0, 'dim': 13},
                                  'crop_input': {'kind': 'op', 'op': 'Crop', 'offset': 65, 'dim': 13},
                                  'crop_input_data': {'kind': 'data', 'shape': [1, 13]},
diff --git a/model-optimizer/extensions/middle/ReplacePNorm.py b/model-optimizer/extensions/middle/ReplacePNorm.py
new file mode 100644 (file)
index 0000000..065f085
--- /dev/null
@@ -0,0 +1,62 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.ReduceOps import ReduceSum
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.power import Power
+from mo.ops.reshape import Reshape
+
+
+class ReplacePNormNodePattern(MiddleReplacementPattern):
+    """
+    PNorm operation should be replaced by operations: Power(P) -> Reshape(n,c*g->n,g,c)-> ReduceSum(axis=1)-> Power(1/P)
+    """
+    enabled = False
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[('op', dict(op='pnorm'))],
+            edges=[])
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        node = match['op']
+        shape = node.in_port(0).data.get_shape().copy()
+
+        assert shape[1] % node.group == 0
+
+        power_node = Power(graph, attrs={'name': node.id + '_power',
+                                         'power': node.p}).create_node()
+
+        reshape_node = create_op_node_with_second_input(graph, Reshape,
+                                                        int64_array([shape[0], shape[1] / node.group, node.group]),
+                                                        {'name': node.id + '_reshape'})
+        reshape_node.in_port(0).connect(power_node.out_port(0))
+
+        reducesum_node = create_op_node_with_second_input(graph, ReduceSum,
+                                                          int64_array([2]),
+                                                          {'name': node.id + '_sum', 'keep_dims': False})
+        reducesum_node.in_port(0).connect(reshape_node.out_port(0))
+
+        invpower_node = Power(graph, attrs={'name': node.id + '_invpower',
+                                            'power': 1.0 / node.p}).create_node()
+        invpower_node.in_port(0).connect(reducesum_node.out_port(0))
+
+        node.in_port(0).get_connection().set_destination(power_node.in_port(0))
+        node.out_port(0).get_connection().set_source(invpower_node.out_port(0))
diff --git a/model-optimizer/extensions/middle/ReplacePNormNodePattern_test.py b/model-optimizer/extensions/middle/ReplacePNormNodePattern_test.py
new file mode 100644 (file)
index 0000000..c5d1a45
--- /dev/null
@@ -0,0 +1,76 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+from extensions.middle.ReplacePNorm import ReplacePNormNodePattern
+from mo.utils.unittest.graph import build_graph, compare_graphs
+
+
+class ReplacePNormNodePatternTests(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.nodes_attributes = {
+            'placeholder': {'kind': 'op', 'op': None},
+            'in_node': {'kind': 'data', 'shape': [1, 3500]},
+            'pnorm': {'kind': 'op', 'op': 'pnorm', 'group': 10, 'p': 2.0},
+            'pnorm_data': {'kind': 'data', 'shape': [1, 350]},
+            'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
+        }
+
+    def test_pnorm(self):
+        graph = build_graph(self.nodes_attributes,
+                            [('placeholder', 'in_node'),
+                             ('in_node', 'pnorm'),
+                             ('pnorm', 'pnorm_data'),
+                             ('pnorm_data', 'out_placeholder')])
+        ReplacePNormNodePattern().find_and_replace_pattern(graph)
+
+        ref_graph = build_graph({'in_placeholder': {'kind': 'op', 'op': None},
+                                 'in_node': {'kind': 'data', 'shape': [1, 3500]},
+                                 'power': {'kind': 'op', 'op': 'Power', 'power': 2.0},
+                                 'power_data': {'kind': 'data'},
+                                 'reshape':  {'kind': 'op', 'op': 'Reshape'},
+                                 'reshape_data': {'kind': 'data'},
+                                 'const': {'kind': 'op', 'op': 'Const', 'value': [1, 350, 10]},
+                                 'const_data': {'kind': 'data'},
+                                 'reduce': {'kind': 'op', 'op': 'ReduceSum'},
+                                 'reduce_data': {'kind': 'data'},
+                                 'const_1': {'kind': 'op', 'op': 'Const', 'value': 2},
+                                 'const_data_1': {'kind': 'data'},
+                                 'invpower': {'kind': 'op', 'op': 'Power', 'power': 0.5},
+                                 'invpower_data': {'kind': 'data'},
+                                 'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
+                                 },
+                                [
+                                    ('in_placeholder', 'in_node'),
+                                    ('in_node', 'power'),
+                                    ('power', 'power_data'),
+                                    ('power_data', 'reshape', {'in': 0}),
+                                    ('reshape', 'reshape_data'),
+                                    ('const', 'const_data'),
+                                    ('const_data', 'reshape', {'in': 1}),
+                                    ('reshape_data', 'reduce', {'in': 0}),
+                                    ('const_1', 'const_data_1'),
+                                    ('const_data_1', 'reduce', {'in': 1}),
+                                    ('reduce', 'reduce_data'),
+                                    ('reduce_data', 'invpower'),
+                                    ('invpower', 'invpower_data'),
+                                    ('invpower_data', 'out_placeholder'),
+                                ]
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'out_placeholder')
+        self.assertTrue(flag, resp)
index 935174a..39dbe4a 100644 (file)
@@ -13,9 +13,9 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-import numpy as np
-
 from extensions.front.kaldi.replace_lstm_node_pattern import unique_id
+from extensions.ops.splitv import SplitV
+from mo.front.common.partial_infer.utils import int64_array
 from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.concat import Concat
@@ -55,11 +55,9 @@ class ReplaceSpliceNodePattern(MiddleReplacementPattern):
     @staticmethod
     def replace_pattern(graph: Graph, match: dict):
         node = match['op']
-        input_node = node.in_nodes()[0]
-        out_node = node.out_node(0)
-
-        graph.remove_edge(input_node.id, node.id)
-        graph.remove_edge(node.id, out_node.id)
+        in_shape = node.in_port(0).data.get_shape().copy()
+        memory_element = in_shape[1] - node.const_dim
+        memory_size = memory_element * len(node.context)
 
         memory_pair_id = unique_id('id')
         # Memory(in)
@@ -67,33 +65,81 @@ class ReplaceSpliceNodePattern(MiddleReplacementPattern):
                                       'id': memory_pair_id,
                                       'index': 1,
                                       'size': 2,
-                                      'shape': np.array(([input_node.shape[1] * len(node.context)]),
-                                                        dtype=np.int64)}).create_node_with_data()
+                                      'shape': int64_array([memory_size])}).create_node()
         # Memory(in)  \
         #             Crop
         # Input(temp) /
         crop = Crop(graph, {'name': 'Splice_Crop',
-                            'axis': np.array([1], dtype=np.int64),
-                            'offset': np.array([input_node.shape[1]], dtype=np.int64),
-                            'dim': np.array([input_node.shape[1] * (len(node.context) - 1)],
-                                            dtype=np.int64)}).create_node_with_data([input_memory])
+                            'axis': int64_array([1]),
+                            'offset': int64_array([memory_element]),
+                            'dim': int64_array([memory_size - memory_element])}).create_node()
+        crop.in_port(0).connect(input_memory.out_port(0))
 
         # Crop   \
         #         Concat
         # Input  /
         concat_node = Concat(graph, {'name': 'Splice_Concat',
                                      'in_ports_count': 2,
-                                     'axis': 1}).create_node([crop, input_node])
+                                     'axis': 1}).create_node()
+        concat_node.in_port(0).connect(crop.out_port(0))
 
         # Concat -> Memory(out)
         mem_out = Memory(graph, {'name': 'out_splice_memory',
                                  'id': memory_pair_id,
                                  'index': 0,
                                  'size': 2,
-                                 'shape': np.array([input_node.shape[1] * len(node.context)], dtype=np.int64)}).create_node_with_data()
-
-        Result(graph).create_node([mem_out])
-
-        graph.add_edge(concat_node.id, out_node.id, **{'in': 0, 'out': 0})
-        out_node.add_output_port(1)
-        graph.add_edge(out_node.id, mem_out.in_node(0).id, **{'in': 0, 'out': 1})
+                                 'shape': int64_array([memory_size])}).create_node()
+        mem_out.in_port(0).connect(concat_node.out_port(0))
+        Result(graph).create_node().in_port(0).connect(mem_out.out_port(0))
+
+        if node.const_dim != 0:
+            memory_element_constdim = node.const_dim
+            memory_size_constdim = memory_element_constdim * len(node.context)
+            split = SplitV(graph, {'name': node.id + '_split_const', 'axis': 1, 'out_ports_count': 2,
+                                   'size_splits': int64_array([memory_element, memory_element_constdim])}).create_node()
+            split.out_port(0).connect(concat_node.in_port(1))
+
+            # create separate splice construction for const_dim
+            memory_pair_id = unique_id('memory_for_const_dim')
+            input_memory_const_dim = Memory(graph, {'name': 'const_dim_in_memory',
+                                                    'id': memory_pair_id,
+                                                    'index': 1,
+                                                    'size': 2,
+                                                    'shape': int64_array([memory_size_constdim])}).create_node()
+            crop_const_dim = Crop(graph, {'name': 'const_dim_crop',
+                                          'axis': int64_array([1]),
+                                          'offset': int64_array([memory_element_constdim]),
+                                          'dim': int64_array([memory_size_constdim - memory_element_constdim])}).create_node()
+            crop_const_dim.in_port(0).connect(input_memory_const_dim.out_port(0))
+
+            concat_node_const_dim = Concat(graph, {'name': 'const_dim_concat',
+                                                   'in_ports_count': 2,
+                                                   'axis': 1}).create_node()
+            concat_node_const_dim.in_port(0).connect(crop_const_dim.out_port(0))
+
+            mem_out_const_dim = Memory(graph, {'name': 'const_dim_out_memory',
+                                               'id': memory_pair_id,
+                                               'index': 0,
+                                               'size': 2,
+                                               'shape': int64_array([memory_size_constdim])}).create_node()
+            mem_out_const_dim.in_port(0).connect(concat_node_const_dim.out_port(0))
+            Result(graph).create_node().in_port(0).connect(mem_out_const_dim.out_port(0))
+
+            # connect splice to Split as begin and Concat as the end
+            split.out_port(1).connect(concat_node_const_dim.in_port(1))
+            crop_first = Crop(graph, {'name': 'const_dim_crop_first',
+                                      'axis': int64_array([1]),
+                                      'offset': int64_array([0]),
+                                      'dim': int64_array([memory_element_constdim])}).create_node()
+            crop_first.in_port(0).connect(concat_node_const_dim.out_port(0))
+
+            concat_const = Concat(graph, {'name': node.id+'_concat_const', 'axis': 1,
+                                          'in_ports_count': 2}).create_node()
+            concat_const.in_port(1).connect(crop_first.out_port(0))
+            concat_const.in_port(0).connect(concat_node.out_port(0))
+
+            node.in_port(0).get_connection().set_destination(split.in_port(0))
+            node.out_port(0).get_connection().set_source(concat_const.out_port(0))
+        else:
+            node.in_port(0).get_connection().set_destination(concat_node.in_port(1))
+            node.out_port(0).get_connection().set_source(concat_node.out_port(0))
index ca40336..4689784 100644 (file)
@@ -17,43 +17,129 @@ import unittest
 
 from extensions.middle.ReplaceSpliceNodePattern import ReplaceSpliceNodePattern
 from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph
+from mo.utils.unittest.graph import build_graph, compare_graphs
 
 
 class ReplaceSpliceNodePatternTests(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.nodes_attributes = {
+            'placeholder': {'kind': 'op', 'op': None},
             'in_node': {'kind': 'data', 'shape': [1, 13]},
-            'slice': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 5)},
+            'splice': {'kind': 'op', 'op': 'Splice', 'context': range(-5, 6), 'const_dim': 0},
             'splice_data': {'kind': 'data', 'shape': [1, 143]},
+            'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
         }
-        cls.graph = build_graph(cls.nodes_attributes,
-                                [('in_node', 'slice'),
-                                 ('slice', 'splice_data')])
-
-        ReplaceSpliceNodePattern().find_and_replace_pattern(cls.graph)
-
-    def test_memory(self):
-        memory_nodes = [node for node in self.graph.nodes(data=True) if node[1]['kind'] == 'op' and node[1]['op'] == 'Memory']
-        self.assertEqual(len(memory_nodes), 2)
-        for memory_node in memory_nodes:
-            node = Node(self.graph, memory_node[0])
-            if len(node.in_nodes()):
-                self.assertEqual(node.index, 0)
-            elif len(node.out_nodes()):
-                self.assertEqual(node.index, 1)
-        self.assertEqual(memory_nodes[0][1]['id'], memory_nodes[1][1]['id'])
-
-    def test_crop(self):
-        crop_node = [node for node in self.graph.nodes(data=True) if node[1]['kind'] == 'op' and node[1]['op'] == 'Crop']
-        self.assertEqual(len(crop_node), 1)
-        crop_node = Node(self.graph, crop_node[0][0])
-        self.assertEqual(crop_node.offset, [13])
-        self.assertEqual(crop_node.dim, [13 * 9])
-
-    def test_concat(self):
-        concat_node = [node for node in self.graph.nodes(data=True) if node[1]['kind'] == 'op' and node[1]['op'] == 'Concat']
-        self.assertEqual(len(concat_node), 1)
-        crop_node = Node(self.graph, concat_node[0][0])
-        self.assertEqual(crop_node.axis, 1)
+
+    def test_splice(self):
+        graph = build_graph(self.nodes_attributes,
+                            [('placeholder', 'in_node'),
+                             ('in_node', 'splice'),
+                             ('splice', 'splice_data'),
+                             ('splice_data', 'out_placeholder')])
+        ReplaceSpliceNodePattern().find_and_replace_pattern(graph)
+
+        ref_graph = build_graph({'in_placeholder': {'kind': 'op', 'op': None},
+                                 'in_node': {'kind': 'data', 'shape': [1, 13]},
+                                 'memory_in': {'kind': 'op', 'op': 'Memory'},
+                                 'memory_in_data': {'kind': 'data'},
+                                 'crop_mem':  {'kind': 'op', 'op': 'Crop', 'offset': 13, 'dim': 130},
+                                 'crop_mem_data': {'kind': 'data'},
+                                 'concat': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_data': {'kind': 'data', 'shape': [1, 143]},
+                                 'memory_out': {'kind': 'op', 'op': 'Memory'},
+                                 'memory_out_data': {'kind': 'data'},
+                                 'result': {'kind': 'op', 'op': 'Result'},
+                                 'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
+                                 },
+                                [
+                                    ('in_placeholder', 'in_node'),
+                                    ('memory_in', 'memory_in_data'),
+                                    ('memory_in_data', 'crop_mem'),
+                                    ('crop_mem', 'crop_mem_data'),
+                                    ('crop_mem_data', 'concat', {'in': 0}),
+                                    ('in_node', 'concat', {'in': 1}),
+                                    ('concat', 'concat_data'),
+                                    ('concat_data', 'memory_out'),
+                                    ('memory_out', 'memory_out_data'),
+                                    ('memory_out_data', 'result'),
+                                    ('concat_data', 'out_placeholder'),
+                                ]
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'out_placeholder')
+        self.assertTrue(flag, resp)
+
+    def test_splice_with_constdim(self):
+        graph = build_graph(self.nodes_attributes,
+                            [('placeholder', 'in_node'),
+                             ('in_node', 'splice'),
+                             ('splice', 'splice_data'),
+                             ('splice_data', 'out_placeholder')])
+        Node(graph, 'splice')['const_dim'] = 10
+        Node(graph, 'splice_data')['shape'] = [1, 43]
+        ReplaceSpliceNodePattern().find_and_replace_pattern(graph)
+
+        ref_graph = build_graph({'in_placeholder': {'kind': 'op', 'op': None},
+                                 'in_node': {'kind': 'data', 'shape': [1, 13]},
+                                 'split': {'kind': 'op', 'op': 'Split'},
+                                 'split_data_0': {'kind': 'data'},
+                                 'split_data_1': {'kind': 'data'},
+                                 'memory_in': {'kind': 'op', 'op': 'Memory'},
+                                 'memory_in_data': {'kind': 'data'},
+                                 'crop_mem': {'kind': 'op', 'op': 'Crop', 'offset': 3, 'dim': 30},
+                                 'crop_mem_data': {'kind': 'data'},
+                                 'concat': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_data': {'kind': 'data'},
+                                 'memory_out': {'kind': 'op', 'op': 'Memory'},
+                                 'memory_out_data': {'kind': 'data'},
+                                 'result': {'kind': 'op', 'op': 'Result'},
+                                 'memory_in_constdims': {'kind': 'op', 'op': 'Memory'},
+                                 'memory_in_constdims_data': {'kind': 'data'},
+                                 'crop_mem_constdims': {'kind': 'op', 'op': 'Crop', 'offset': 10, 'dim': 100},
+                                 'crop_mem_constdims_data': {'kind': 'data'},
+                                 'concat_constdims': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_constdims_data': {'kind': 'data'},
+                                 'memory_out_constdims': {'kind': 'op', 'op': 'Memory'},
+                                 'memory_out_constdims_data': {'kind': 'data'},
+                                 'result_constdims': {'kind': 'op', 'op': 'Result'},
+                                 'crop_first_constdims': {'kind': 'op', 'op': 'Crop', 'offset': 0, 'dim': 10},
+                                 'crop_first_constdims_data': {'kind': 'data'},
+                                 'concat_all': {'kind': 'op', 'op': 'Concat'},
+                                 'concat_all_data': {'kind': 'data', 'shape': [1, 43]},
+                                 'out_placeholder': {'kind': 'op', 'op': 'placeholder'},
+                                 },
+                                [
+                                    ('in_placeholder', 'in_node'),
+                                    ('in_node', 'split'),
+                                    ('split', 'split_data_0', {'out': 0}),
+                                    ('split', 'split_data_1', {'out': 1}),
+                                    ('memory_in', 'memory_in_data'),
+                                    ('memory_in_data', 'crop_mem'),
+                                    ('crop_mem', 'crop_mem_data'),
+                                    ('crop_mem_data', 'concat', {'in': 0}),
+                                    ('split_data_0', 'concat', {'in': 1}),
+                                    ('concat', 'concat_data'),
+                                    ('concat_data', 'memory_out'),
+                                    ('memory_out', 'memory_out_data'),
+                                    ('memory_out_data', 'result'),
+                                    ('memory_in_constdims', 'memory_in_constdims_data'),
+                                    ('memory_in_constdims_data', 'crop_mem_constdims'),
+                                    ('crop_mem_constdims', 'crop_mem_constdims_data'),
+                                    ('crop_mem_constdims_data', 'concat_constdims', {'in': 0}),
+                                    ('split_data_1', 'concat_constdims', {'in': 1}),
+                                    ('concat_constdims', 'concat_constdims_data'),
+                                    ('concat_constdims_data', 'memory_out_constdims'),
+                                    ('memory_out_constdims', 'memory_out_constdims_data'),
+                                    ('memory_out_constdims_data', 'result_constdims'),
+                                    ('concat_constdims_data', 'crop_first_constdims'),
+                                    ('crop_first_constdims', 'crop_first_constdims_data'),
+                                    ('crop_first_constdims_data', 'concat_all', {'in': 1}),
+                                    ('concat_data', 'concat_all', {'in': 0}),
+                                    ('concat_all', 'concat_all_data'),
+                                    ('concat_all_data', 'out_placeholder'),
+                                ]
+                                )
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'out_placeholder')
+        self.assertTrue(flag, resp)
index 7d660f7..112995f 100644 (file)
@@ -18,6 +18,7 @@ import unittest
 import numpy as np
 
 from extensions.middle.SliceConverter import ConvertSlice
+from mo.front.common.partial_infer.utils import int64_array
 from mo.graph.graph import Node
 from mo.utils.unittest.graph import build_graph, compare_graphs
 from mo.ops.slice import Slice
@@ -176,3 +177,204 @@ class ConvertSliceTests(unittest.TestCase):
 
         (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
         self.assertTrue(flag, resp)
+
+
+class ConvertSliceONNXOpset10Tests(unittest.TestCase):
+    nodes_attributes = {
+        # input data
+        'placeholder_1': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+        'placeholder_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+        # Slice layer inputs
+        'starts': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
+        'starts_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+        'ends': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
+        'ends_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+        'strides': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
+        'strides_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+        'axes': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
+        'axes_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+        'steps': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
+        'steps_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+        # Slice layer
+        'slice': {'type': 'Slice', 'kind': 'op', 'op': 'Slice', 'format': 'onnx', 'end': None},
+        'slice_data': {'value': None, 'shape': None, 'kind': 'data'},
+        # Output operation
+        'output_op': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
+        'output_data': {'shape': None, 'kind': 'data', 'data_type': None},
+        'op_output': {'kind': 'op', 'op': 'Result'},
+        # StridedSlice layer
+        'strided_slice': {'kind': 'op', 'op': 'StridedSlice', 'slices': None, 'shrink_axis_mask': None}
+    }
+
+    def test_no_steps_no_axes(self):
+        input_shape = int64_array([5, 10, 20])
+        starts_value = int64_array([3, 2, 7])
+        ends_value = int64_array([5, 8, 15])
+        steps_value = int64_array([1, 1, 1])
+        masks_value = np.zeros([len(input_shape)], dtype=np.int64)
+        graph = build_graph(self.nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'slice', {'in': 0}),
+                             ('starts', 'starts_data'),
+                             ('starts_data', 'slice', {'in': 1}),
+                             ('ends', 'ends_data'),
+                             ('ends_data', 'slice', {'in': 2}),
+                             ('slice', 'slice_data'),
+                             ('slice_data', 'output_op'),
+                             ('output_op', 'output_data'),
+                             ('output_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': input_shape},
+                             'starts': {'shape': starts_value.shape, 'value': starts_value},
+                             'starts_data': {'shape': starts_value.shape, 'value': starts_value},
+                             'ends': {'shape': ends_value.shape, 'value': ends_value},
+                             'ends_data': {'shape': ends_value.shape, 'value': ends_value},
+                             }, nodes_with_edges_only=True
+                            )
+        slice_node = Node(graph, 'slice')
+        Slice.infer(slice_node)
+
+        pattern = ConvertSlice()
+        pattern.find_and_replace_pattern(graph)
+
+        graph_ref = build_graph(self.nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'strided_slice', {'in': 0}),
+                                 ('starts', 'starts_data'),
+                                 ('starts_data', 'strided_slice', {'in': 1}),
+                                 ('ends', 'ends_data'),
+                                 ('ends_data', 'strided_slice', {'in': 2}),
+                                 ('strides', 'strides_data'),
+                                 ('strides_data', 'strided_slice', {'in': 3}),
+                                 ('strided_slice', 'slice_data'),
+                                 ('slice_data', 'output_op'),
+                                 ('output_op', 'output_data'),
+                                 ('output_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': input_shape},
+                                 'strided_slice': {'new_axis_mask': masks_value, 'shrink_axis_mask': masks_value,
+                                                   'ellipsis_mask': masks_value, 'begin_mask': np.ones([3]),
+                                                   'end_mask': np.ones([3])},
+                                 'slice_data': {'shape': int64_array([2, 6, 8])}
+                                 }, nodes_with_edges_only=True
+                                )
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_no_axes(self):
+        input_shape = int64_array([5, 10, 20])
+        starts_value = int64_array([3, 2, 7])
+        ends_value = int64_array([5, 8, 15])
+        steps_value = int64_array([2, 3, 1])
+        masks_value = np.zeros([len(input_shape)], dtype=np.int64)
+        graph = build_graph(self.nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'slice', {'in': 0}),
+                             ('starts', 'starts_data'),
+                             ('starts_data', 'slice', {'in': 1}),
+                             ('ends', 'ends_data'),
+                             ('ends_data', 'slice', {'in': 2}),
+                             ('steps', 'steps_data'),
+                             ('steps_data', 'slice', {'in': 4}),
+                             ('slice', 'slice_data'),
+                             ('slice_data', 'output_op'),
+                             ('output_op', 'output_data'),
+                             ('output_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': input_shape},
+                             'starts': {'shape': starts_value.shape, 'value': starts_value},
+                             'starts_data': {'shape': starts_value.shape, 'value': starts_value},
+                             'ends': {'shape': ends_value.shape, 'value': ends_value},
+                             'ends_data': {'shape': ends_value.shape, 'value': ends_value},
+                             'steps': {'shape': steps_value.shape, 'value': steps_value},
+                             'steps_data': {'shape': steps_value.shape, 'value': steps_value},
+                             }, nodes_with_edges_only=True
+                            )
+        slice_node = Node(graph, 'slice')
+        Slice.infer(slice_node)
+
+        pattern = ConvertSlice()
+        pattern.find_and_replace_pattern(graph)
+
+        graph_ref = build_graph(self.nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'strided_slice', {'in': 0}),
+                                 ('starts', 'starts_data'),
+                                 ('starts_data', 'strided_slice', {'in': 1}),
+                                 ('ends', 'ends_data'),
+                                 ('ends_data', 'strided_slice', {'in': 2}),
+                                 ('strides', 'strides_data'),
+                                 ('strides_data', 'strided_slice', {'in': 3}),
+                                 ('strided_slice', 'slice_data'),
+                                 ('slice_data', 'output_op'),
+                                 ('output_op', 'output_data'),
+                                 ('output_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': input_shape},
+                                 'strided_slice': {'new_axis_mask': masks_value, 'shrink_axis_mask': masks_value,
+                                                   'ellipsis_mask': masks_value, 'begin_mask': np.ones([3]),
+                                                   'end_mask': np.ones([3])},
+                                 'slice_data': {'shape': int64_array([1, 2, 8])}
+                                 }, nodes_with_edges_only=True
+                                )
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_no_steps(self):
+        input_shape = int64_array([5, 10, 20])
+        starts_value = int64_array([4, 2])
+        ends_value = int64_array([15, 8])
+        axes_value = int64_array([2, 1])
+        masks_value = np.zeros([len(input_shape)], dtype=np.int64)
+        graph = build_graph(self.nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'slice', {'in': 0}),
+                             ('starts', 'starts_data'),
+                             ('starts_data', 'slice', {'in': 1}),
+                             ('ends', 'ends_data'),
+                             ('ends_data', 'slice', {'in': 2}),
+                             ('axes', 'axes_data'),
+                             ('axes_data', 'slice', {'in': 3}),
+                             ('slice', 'slice_data'),
+                             ('slice_data', 'output_op'),
+                             ('output_op', 'output_data'),
+                             ('output_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': input_shape},
+                             'starts': {'shape': starts_value.shape, 'value': starts_value},
+                             'starts_data': {'shape': starts_value.shape, 'value': starts_value},
+                             'ends': {'shape': ends_value.shape, 'value': ends_value},
+                             'ends_data': {'shape': ends_value.shape, 'value': ends_value},
+                             'axes': {'shape': axes_value.shape, 'value': axes_value},
+                             'axes_data': {'shape': axes_value.shape, 'value': axes_value},
+                             }, nodes_with_edges_only=True
+                            )
+        slice_node = Node(graph, 'slice')
+        Slice.infer(slice_node)
+
+        pattern = ConvertSlice()
+        pattern.find_and_replace_pattern(graph)
+
+        graph_ref = build_graph(self.nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'strided_slice', {'in': 0}),
+                                 ('starts', 'starts_data'),
+                                 ('starts_data', 'strided_slice', {'in': 1}),
+                                 ('ends', 'ends_data'),
+                                 ('ends_data', 'strided_slice', {'in': 2}),
+                                 ('strides', 'strides_data'),
+                                 ('strides_data', 'strided_slice', {'in': 3}),
+                                 ('strided_slice', 'slice_data'),
+                                 ('slice_data', 'output_op'),
+                                 ('output_op', 'output_data'),
+                                 ('output_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': input_shape},
+                                 'strided_slice': {'new_axis_mask': masks_value, 'shrink_axis_mask': masks_value,
+                                                   'ellipsis_mask': masks_value, 'begin_mask': int64_array([0, 1, 1]),
+                                                   'end_mask': int64_array([0, 1, 1])},
+                                 'slice_data': {'shape': int64_array([5, 6, 11])}
+                                 }, nodes_with_edges_only=True
+                                )
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
+        self.assertTrue(flag, resp)
index 241afb0..5eba145 100644 (file)
 
 import numpy as np
 
-from mo.graph.graph import Graph
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph, Node
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.const import Const
 from mo.ops.crop import Crop
 from mo.ops.strided_slice import StridedSlice
+from mo.utils.error import Error
 
 
 def convert_negative_indices(indices: np.array, shape: np.array):
@@ -31,11 +33,12 @@ def convert_negative_indices(indices: np.array, shape: np.array):
 
 class ConvertSlice(MiddleReplacementPattern):
     """
-    This class convert Slice operation to Crop or Split depends on parameters
+    This class convert Slice operation to Crop, Split or StridedSlice depends on parameters
     """
 
     enabled = True
     op = "Slice"
+    force_clean_up = True
 
     def run_after(self):
         from extensions.middle.pass_separator import MiddleStart
@@ -49,8 +52,83 @@ class ConvertSlice(MiddleReplacementPattern):
             edges=[]
         )
 
+    @staticmethod
+    def convert_onnx_slice_opset10(node: Node):
+        """
+        Converts the Slice node from ONNX opset10 to StridedSlice.
+        :param node: Slice node
+        :return: None
+        """
+        graph = node.graph
+
+        input_shape = node.in_port(0).data.get_shape()
+        output_shape = node.out_port(0).data.get_shape()
+        starts = node.in_port(1).data.get_value()
+        ends = node.in_port(2).data.get_value()
+        if starts is None or ends is None:
+            raise Error('The input with starts or end is not constant for node {}'.format(node.id))
+
+        # in ONNX the value for 'ends' is usually -1 which is translated to maximum possible value of int64. This
+        # value must be converted to maximum of int32 because such big values do not fit into the int32 which is
+        # supported by the StridedSlice layer
+        ends = int64_array([np.iinfo(np.int32).max if item > np.iinfo(np.int32).max else item for item in ends])
+        if node.is_in_port_connected(3):
+            axes = node.in_port(3).data.get_value()
+            if axes is None:
+                raise Error('The input with axes is not constant for node {}'.format(node.id))
+        else:
+            axes = int64_array(list(range(starts.size)))
+
+        if node.is_in_port_connected(4):
+            steps = node.in_port(4).data.get_value()
+            if steps is None:
+                raise Error('The input with steps is not constant for node {}'.format(node.id))
+        else:
+            steps = np.ones([starts.size])
+
+        ss_begin_mask = np.zeros(len(input_shape), dtype=np.int32)
+        ss_end_mask = np.zeros(len(input_shape), dtype=np.int32)
+        ss_begin = np.zeros(len(input_shape), dtype=np.int32)
+        ss_end = np.zeros(len(input_shape), dtype=np.int32)
+        ss_steps = np.ones(len(input_shape), dtype=np.int32)
+
+        # prepare inputs and attributes for the StridedSlice layer
+        for i, axis in enumerate(axes):
+            if starts[i] != 0:
+                ss_begin_mask[axis] = 1
+                ss_begin[axis] = starts[i]
+
+            ss_end_mask[axis] = 1
+            ss_end[axis] = ends[i]
+
+            ss_steps[axis] = steps[i]
+
+        begin_node = Const(graph, {'value': ss_begin, 'force_precision': 'I32'}).create_node()
+        end_node = Const(graph, {'value': ss_end, 'force_precision': 'I32'}).create_node()
+        strides_node = Const(graph, {'value': ss_steps, 'force_precision': 'I32'}).create_node()
+
+        ss = StridedSlice(graph, dict(new_axis_mask=np.zeros(len(output_shape), dtype=np.int32),
+                                      shrink_axis_mask=np.zeros(len(output_shape), dtype=np.int32),
+                                      ellipsis_mask=np.zeros(len(output_shape), dtype=np.int32),
+                                      begin_mask=ss_begin_mask,
+                                      end_mask=ss_end_mask)).create_node()
+        node.in_port(0).get_connection().set_destination(ss.in_port(0))
+        begin_node.out_port(0).connect(ss.in_port(1))
+        end_node.out_port(0).connect(ss.in_port(2))
+        strides_node.out_port(0).connect(ss.in_port(3))
+        node.out_port(0).get_connection().set_source(ss.out_port(0))
+
     def replace_pattern(self, graph: Graph, match: dict):
         node = match['slice']
+
+        input = node.in_node(0)
+        output_data = node.out_node()
+
+        # ONNX 10 opset case
+        if len(node.in_nodes()) >= 3 and node.has_valid('format') and node['format'] == 'onnx':
+            self.convert_onnx_slice_opset10(node)
+            return
+
         # Caffe case
         if not node.has_valid('start') or not node.has_valid('end'):
             return
@@ -58,16 +136,12 @@ class ConvertSlice(MiddleReplacementPattern):
         begin = node.start
         end = node.end
         axis = node.axis if node.has_valid('axis') else np.arange(begin.size)
-        
-
-        input = node.in_node(0)
-        output_data = node.out_node()
 
         # Check whether operation use only one axis or not
         axes_begin = np.zeros(len(input.shape), dtype=np.int32)
         axes_end = np.zeros(len(input.shape), dtype=np.int32)
-        begin_ext = np.zeros(len(input.shape), dtype=np.int32)
-        end_ext = np.zeros(len(input.shape), dtype=np.int32)
+        ss_begin = np.zeros(len(input.shape), dtype=np.int32)
+        ss_end = np.zeros(len(input.shape), dtype=np.int32)
         dims = 0
         axes = np.zeros(begin.size)
         for i in range(len(axis)):
@@ -76,10 +150,10 @@ class ConvertSlice(MiddleReplacementPattern):
                 axes[i] = 1
                 if begin[i] != 0:
                     axes_begin[axis[i]] = 1
-                    begin_ext[axis[i]] = begin[i]
+                    ss_begin[axis[i]] = begin[i]
                 if end[i] < input.shape[axis[i]]:
                     axes_end[axis[i]] = 1
-                    end_ext[axis[i]] = end[i]
+                    ss_end[axis[i]] = end[i]
         axes = np.array(axes, dtype=bool)
 
         if dims == 1 or dims == 0:
@@ -91,11 +165,11 @@ class ConvertSlice(MiddleReplacementPattern):
                                           begin_mask=axes_begin,
                                           end_mask=axes_end))
 
-            convert_negative_indices(begin_ext, input.shape)
-            convert_negative_indices(end_ext, input.shape)
+            convert_negative_indices(ss_begin, input.shape)
+            convert_negative_indices(ss_end, input.shape)
 
-            begin_node = Const(graph, {'name': 'begin', 'value': begin_ext, 'force_precision': 'I32'}).create_node_with_data()
-            end_node = Const(graph, {'name': 'end', 'value': end_ext, 'force_precision': 'I32'}).create_node_with_data()
+            begin_node = Const(graph, {'value': ss_begin, 'force_precision': 'I32'}).create_node_with_data()
+            end_node = Const(graph, {'value': ss_end, 'force_precision': 'I32'}).create_node_with_data()
 
             ss.create_node_with_data(inputs=[input, begin_node, end_node], data_nodes=[output_data])
             # Remove unnecessary edges from and to to Slice vertex
index 7722753..98bbe2d 100644 (file)
@@ -22,6 +22,7 @@ import numpy as np
 
 from extensions.ops.elementwise import Mul
 from extensions.ops.interpolate import Interpolate
+from mo.front.common.layout import get_height_dim, get_width_dim
 from mo.front.common.partial_infer.utils import int64_array
 from mo.graph.graph import Graph, Node
 from mo.middle.replacement import MiddleReplacementPattern
@@ -33,7 +34,6 @@ from mo.ops.strided_slice import StridedSlice
 class UpsampleToResample(MiddleReplacementPattern):
     enabled = True
     force_clean_up = True
-    graph_condition = [lambda graph: graph.graph['fw'] == 'onnx']
 
     def run_after(self):
         from extensions.middle.pass_separator import MiddleStart
@@ -54,6 +54,7 @@ class UpsampleToResample(MiddleReplacementPattern):
     def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
         log.debug('UpsampleToResample is triggered')
         upsample = match['upsample']
+        input_shape = upsample.in_port(0).data.get_shape()
 
         if len(upsample.in_nodes()) == 2:
             if upsample.in_node(1).value is None:
@@ -79,13 +80,15 @@ class UpsampleToResample(MiddleReplacementPattern):
 
         shape = Shape(graph, {'name': upsample.name + '/0_port'}).create_node()
 
-        begin = Const(graph, {'value': np.array([2])}).create_node()
-        end = Const(graph, {'value': np.array([4])}).create_node()
-        stride = Const(graph, {'value': np.array([1])}).create_node()
+        begin = Const(graph, {'value': int64_array([get_height_dim(graph.graph['layout'],
+                                                                   len(input_shape))])}).create_node()
+        end = Const(graph, {'value': int64_array([get_width_dim(graph.graph['layout'],
+                                                                len(input_shape)) + 1])}).create_node()
+        stride = Const(graph, {'value': int64_array([1])}).create_node()
         ss = StridedSlice(graph, {'name': upsample.name + '/ss_0_port', 'begin_mask': np.array([1]),
                                   'end_mask': np.array([0]), 'new_axis_mask': np.array([0]),
-                                  'shrink_axis_mask': np.array([0]),
-                                  'ellipsis_mask': np.array([0])}).create_node()
+                                  'shrink_axis_mask': int64_array([0]),
+                                  'ellipsis_mask': int64_array([0])}).create_node()
 
         mul = Mul(graph, {'name': upsample.name + '/factor_mul_'}).create_node()
 
@@ -99,7 +102,8 @@ class UpsampleToResample(MiddleReplacementPattern):
         factor.out_port(0).connect(mul.in_port(1))
 
         # Create Interpolate operation
-        axes = int64_array([2, 3]) if graph.graph['layout'] == 'NCHW' else int64_array([1, 2])
+        axes = int64_array([get_height_dim(graph.graph['layout'], len(input_shape)),
+                            get_width_dim(graph.graph['layout'], len(input_shape))])
         resample_op = Interpolate(graph, dict(name='Interpolate/{}'.format(upsample.name),
                                               factor=factor_value, axes=axes,
                                               mode=upsample.attrs()['mode'],
index 5176994..2c31644 100644 (file)
@@ -18,6 +18,7 @@ import numpy as np
 
 from mo.front.common.partial_infer.elemental import copy_shape_infer
 from mo.graph.graph import Node, Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_precision
 from mo.ops.op import Op
 
 
@@ -27,13 +28,23 @@ class Cast(Op):
     def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
+            'type': 'Convert',
             'infer': __class__.infer,
+            'type_infer': __class__.type_infer,
             'dst_type': None,
             'in_ports_count': 1,
             'out_ports_count': 1,
         }
         super().__init__(graph, mandatory_props, attrs)
 
+    def backend_attrs(self):
+        return [('precision', lambda node: np_data_type_to_precision(node.dst_type))]
+
+    @staticmethod
+    def type_infer(node: Node):
+        assert node.has_valid('dst_type'), 'Destination type of "Cast" operation should be extracted earlier'
+        node.out_port(0).set_data_type(node.dst_type)
+
     @staticmethod
     def infer(node: Node):
         assert node.has_valid('dst_type'), 'Destination type of "Cast" operation should be extracted earlier'
index 5faadf4..457e7b3 100644 (file)
@@ -25,26 +25,28 @@ reduce_map = {
     'ReduceSum': np.sum,
     'ReduceProd': np.prod,
     'ReduceMax': np.max,
+    'ReduceMin': np.min,
     'ReduceMean': np.mean,
     'ReduceAnd': np.all,
 }
 
 
 def reduce_infer(node: Node):
-    in_ports = node.in_ports()
-    assert len(in_ports) == 2 and 0 in in_ports and 1 in in_ports, \
+    connected_in_ports = [port for port in node.in_ports().values() if not port.disconnected()]
+    assert len(connected_in_ports) == 2, \
         "{} node `{}` should have 2 input ports, where 0-input is data input and 1-input represent " \
         "`reduction_indices`".format(node.op, node.id)
 
-    axis = node.in_port(1).data.get_value()
-
     in_data = node.in_port(0).data
     in_shape = in_data.get_shape()
-    assert in_shape is not None, "Can not infer {} node `{}`: shape of 0-input unknown".format(node.op, node.id)
+    axis = node.in_port(1).data.get_value()
 
-    # The default axis == None is to reduce over all the dimensions of the input tensor
-    if axis is None:
+    # If the axis is None then reduce over all the dimensions of the input tensor
+    if axis.size == 1 and axis.item() is None:
         axis = int64_array(list(range(len(in_shape))))
+        node.in_port(1).data.set_value(axis)
+
+    assert in_shape is not None, "Can not infer {} node `{}`: shape of 0-input unknown".format(node.op, node.id)
 
     axis = axis.copy()
     if axis.size == 1:
@@ -102,6 +104,11 @@ class ReduceProd(ReduceOp):
     enabled = True
 
 
+class ReduceMin(ReduceOp):
+    op = 'ReduceMin'
+    enabled = True
+
+
 class ReduceMax(ReduceOp):
     op = 'ReduceMax'
     enabled = True
index 4646b76..6da7eec 100644 (file)
@@ -23,7 +23,7 @@ from mo.graph.graph import Node
 from mo.ops.clamp import Clamp
 from mo.ops.op import Op
 
-activation_ops = ['Sigmoid', 'Tanh', 'ReLU6', 'Exp', 'Elu', 'Not']
+activation_ops = ['Sigmoid', 'Tanh', 'ReLU6', 'Exp', 'Elu', 'Not', 'Floor']
 
 
 class Activation(Op):
@@ -80,6 +80,11 @@ class Erf(Activation):
     operation = None
 
 
+class Floor(Activation):
+    op = 'Floor'
+    operation = staticmethod(lambda x: np.floor(x))
+
+
 class Elu(Activation):
     op = 'Elu'
 
index 8566e8a..5e5cf2c 100644 (file)
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+from mo.front.common.partial_infer.utils import int64_array
 from mo.ops.op import Op
 
 
@@ -27,7 +28,9 @@ class ExperimentalDetectronDetectionOutput(Op):
         mandatory_props = dict(
             type=__class__.op,
             op=__class__.op,
-            infer=__class__.infer
+            infer=__class__.infer,
+            in_ports_count=4,
+            out_ports_count=4,
         )
 
         super().__init__(graph, mandatory_props, attrs)
@@ -48,12 +51,7 @@ class ExperimentalDetectronDetectionOutput(Op):
         rois_num = node.max_detections_per_image
         # boxes
         node.out_node(0).shape = np.array([rois_num, 4], dtype=np.int64)
-        try:
-            # classes
-            node.out_node(1).shape = np.array([rois_num], dtype=np.int64)
-            # scores
-            node.out_node(2).shape = np.array([rois_num], dtype=np.int64)
-            # batch_ids
-            node.out_node(3).shape = np.array([rois_num], dtype=np.int64)
-        except Exception as ex:
-            print(ex)
+        # classes, scores, batch indices
+        for port_ind in range(1, 4):
+            if not node.out_port(port_ind).disconnected():
+                node.out_port(port_ind).data.set_shape(int64_array([rois_num]))
index 758f1e3..234b218 100644 (file)
 
 import numpy as np
 
-from mo.front.common.partial_infer.eltwise import eltwise_infer
+from mo.front.common.partial_infer.eltwise import eltwise_infer, bias_add_infer
 from mo.graph.graph import Graph
+from mo.middle.passes.convert_data_type import data_type_str_to_np
 from mo.ops.op import Op
+from mo.utils.error import Error
 
 
 class Elementwise(Op):
@@ -32,13 +34,23 @@ class Elementwise(Op):
             'op': self.op,
             'type': self.op_type,
             'infer': lambda node: eltwise_infer(node, self.operation),
+            'type_infer': self.type_infer,
             'can_be_bias': True,
             'can_be_fused': True,
             'in_ports_count': 2,
             'out_ports_count': 1,
-            'is_eltwise': True
+            'is_eltwise': True,
         }, attrs)
 
+    @staticmethod
+    def type_infer(node):
+        in_type_0 = node.in_port(0).get_data_type()
+        in_type_1 = node.in_port(1).get_data_type()
+        if in_type_0 != in_type_1:
+            raise Error('Elementwise operation {} has inputs of different data types: {} and {}'.format(
+                        node.soft_get('name'), in_type_0, in_type_1))
+        node.out_port(0).set_data_type(in_type_0)
+
 
 class Add(Elementwise):
     enabled = False
@@ -47,6 +59,14 @@ class Add(Elementwise):
     operation = staticmethod(lambda a, b: a + b)
 
 
+class BiasAdd(Add):
+    op_type = 'BiasAdd'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        attrs.update({'infer': lambda node: bias_add_infer(node, self.operation)})
+        super().__init__(graph, attrs)
+
+
 class Sub(Elementwise):
     enabled = False
     op = 'Sub'
@@ -72,7 +92,22 @@ class Pow(Elementwise):
     enabled = False
     op = 'Pow'
     op_type = 'Pow'
-    operation = staticmethod(lambda a, b: a ** b)
+
+    @staticmethod
+    def operation(a, b):
+        if np.any(b < 0) and np.issubdtype(a.dtype, np.signedinteger):
+            return np.array(a.astype(np.float32) ** b, dtype=np.float32)
+        return a ** b
+
+    @staticmethod
+    def type_infer(node):
+        # dynamic power output data type is complicate to predict, so we set float data type by default,
+        # if we haven't got actual value
+        value = node.out_port(0).data.get_value()
+        if value is not None:
+            node.out_port(0).set_data_type(value.dtype)
+        else:
+            node.out_port(0).set_data_type(data_type_str_to_np(node.graph.graph['cmd_params'].data_type))
 
 
 class Greater(Elementwise):
index eddbd9d..2622d6a 100644 (file)
@@ -61,7 +61,7 @@ class Gather(Op):
         # both inputs are constant
         if data.value is not None and indices.value is not None:
             indices.value = np.array(indices.value, dtype=np.int64)
-            node.out_node(0).value = np.take(data.value, indices.value, axis)
+            node.out_node(0).value = np.array(np.take(data.value, indices.value, axis), dtype=data.value.dtype)
             node.out_node(0).shape = np.array(node.out_node(0).value.shape, dtype=np.int64)
             return
 
diff --git a/model-optimizer/extensions/ops/non_max_suppression.py b/model-optimizer/extensions/ops/non_max_suppression.py
new file mode 100644 (file)
index 0000000..1ade624
--- /dev/null
@@ -0,0 +1,55 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class NonMaxSuppression(Op):
+    op = 'NonMaxSuppression'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'center_point_box': 0,
+            'in_ports_count': 5,
+            'out_ports_count': 1,
+            'force_precision_in_ports': {2: 'int32'},
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return [
+            'center_point_box',
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        boxes_shape = node.in_port(0).data.get_shape()
+        assert boxes_shape is not None, 'The shape of tensor with boxes is not defined'
+        scores_shape = node.in_port(1).data.get_shape()
+        assert scores_shape is not None, 'The shape of tensor with scores is not defined'
+        assert len(boxes_shape) == 3, 'Length of tensors with boxes must be equal to 3'
+        assert len(scores_shape) == 3, 'Length of tensors with scores must be equal to 3'
+
+        num_classes = scores_shape[1]
+        num_input_boxes = boxes_shape[1]
+        assert scores_shape[2] == num_input_boxes, 'Number of boxes mismatch'
+
+        node.out_port(0).data.set_shape(int64_array([num_input_boxes * num_classes, 3]))
diff --git a/model-optimizer/extensions/ops/pnorm.py b/model-optimizer/extensions/ops/pnorm.py
new file mode 100644 (file)
index 0000000..8f1a226
--- /dev/null
@@ -0,0 +1,42 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Graph, Node
+from mo.ops.op import Op
+
+
+class PNormOp(Op):
+    """
+     PNorm operation should be replaced by operations:
+     Power(P) -> Reshape(n,c*g->n,g,c)-> ReduceSum(axis=1)-> Power(1/P)
+    """
+    op = 'pnorm'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': None,
+            'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+            'infer': __class__.infer
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def infer(node: Node):
+        shape = node.in_port(0).data.get_shape().copy()
+        shape[1] = shape[1] / node.group
+        node.out_port(0).data.set_shape(shape)
index 2b02ce1..4e23c3b 100644 (file)
@@ -45,7 +45,7 @@ class Range(Op):
         if not start.has_valid('value') or not limit.has_valid('value') or not delta.has_valid('value'):
             log.error("Range operation is supported with constant inputs only")
             return
-        if 'type' in node.pb.attr:
+        if node.has_valid('pb') and 'type' in node.pb.attr:
             from mo.front.tf.extractors.utils import tf_dtype_extractor
             result_data_type = tf_dtype_extractor(node.pb.attr["type"].type)
         else:
index 5477d9b..5a00a05 100644 (file)
@@ -26,7 +26,9 @@ class ExperimentalDetectronROIFeatureExtractor(Op):
         mandatory_props = dict(
             type=__class__.op,
             op=__class__.op,
-            infer=__class__.infer
+            infer=__class__.infer,
+            in_ports_count=5,
+            out_ports_count=2,
         )
 
         super().__init__(graph, mandatory_props, attrs)
@@ -47,7 +49,5 @@ class ExperimentalDetectronROIFeatureExtractor(Op):
         input_features_level_0_shape = node.in_node(1).shape
         channels_num = input_features_level_0_shape[1]
         node.out_node(0).shape = np.array([rois_num, channels_num, node.output_size, node.output_size], dtype=np.int64)
-        try:
+        if not node.out_port(1).disconnected():
             node.out_node(1).shape = np.array([rois_num, 4], dtype=np.int64)
-        except Exception as ex:
-            print(ex)
diff --git a/model-optimizer/extensions/ops/sparse_fill_empty_rows.py b/model-optimizer/extensions/ops/sparse_fill_empty_rows.py
new file mode 100644 (file)
index 0000000..18d076e
--- /dev/null
@@ -0,0 +1,84 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import networkx as nx
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class SparseFillEmptyRows(Op):
+    ''' The operation fills empty rows in the input 2-D sparse tensor with a default value.
+        For more details see https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/sparse-fill-empty-rows
+
+        4 inputs:
+            - [0, required] input indices of the sparse tensor (2D),
+            - [1, required] input values of the sparse tensor (1D),
+            - [2, required] shape of the sparse tensor. Value of this input is required for the Model Optimizer (1D),
+            - [3, required] default value to insert at rows missing from the input sparse tensor (0D),
+        
+        3 outputs:
+            - [0, optional] indices of the filled sparse tensor (2D)
+            - [1, optional] values of the filled sparse tensor (1D)
+            - [2, optional] indicator of whether the dense row was missing in the input sparse tensor (1D)
+    '''
+    op = 'SparseFillEmptyRows'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'in_ports_count': 4,
+            'out_ports_count': 3
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return []
+
+    @staticmethod
+    def infer(node: Node):
+        assert len(node.in_nodes()) == 4
+
+        # check that shape value is defined that is needed for shape inference
+        shape = node.in_node(2)
+        assert shape.value is not None and shape.value.size == 2, \
+            "SparseFillEmptyRows is supported only with constant shape value"
+
+        shape_value = np.array(shape.value, dtype=np.int64)
+
+        # check that default value is scalar
+        default_value = node.in_node(3)
+        assert default_value.shape is not None and len(default_value.shape) == 0, \
+            "Default value for SparseFillEmptyRows must be scalar"
+
+        for out_node_ind in node.out_nodes():
+            if out_node_ind == 0: # set a shape for output indices
+                node.out_node(0).shape = np.array([np.prod(shape_value), 2], dtype=np.int64)
+                continue
+            elif out_node_ind == 1: # set a shape for output values
+                node.out_node(1).shape = np.array([np.prod(shape_value)], dtype=np.int64)
+                continue
+            elif out_node_ind == 2: # set a shape for empty row indicator
+                node.out_node(2).shape = np.array([shape_value[0]], dtype=np.int64)
+                continue
+            else:
+                log.error("SparseFillEmptyRows has only three outputs")
+                return
diff --git a/model-optimizer/extensions/ops/sparse_fill_empty_rows_test.py b/model-optimizer/extensions/ops/sparse_fill_empty_rows_test.py
new file mode 100644 (file)
index 0000000..e5cecb0
--- /dev/null
@@ -0,0 +1,128 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.sparse_fill_empty_rows import SparseFillEmptyRows
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
+
+
+nodes_attributes = {'input_indices': {'shape': None, 'value': None, 'kind': 'data'},
+                    'input_values': {'shape': None, 'value': None, 'kind': 'data'},
+                    'dense_shape': {'shape': None, 'value': None, 'kind': 'data'},
+                    'default_value': {'shape': None, 'value': None, 'kind': 'data'},
+                    'sparse_fill_empty_rows_node': {'op': 'SparseFillEmptyRows', 'kind': 'op'},
+                    'output_indices': {'shape': None, 'value': None, 'kind': 'data'},
+                    'output_values': {'shape': None, 'value': None, 'kind': 'data'},
+                    'empty_row_indicator': {'shape': None, 'value': None, 'kind': 'data'},
+                    }
+
+# graph 1
+edges1 = [('input_indices', 'sparse_fill_empty_rows_node', {'in': 0}),
+          ('input_values', 'sparse_fill_empty_rows_node', {'in': 1}),
+          ('dense_shape', 'sparse_fill_empty_rows_node', {'in': 2}),
+          ('default_value', 'sparse_fill_empty_rows_node', {'in': 3}),
+          ('sparse_fill_empty_rows_node', 'output_indices', {'out': 0}),
+          ('sparse_fill_empty_rows_node', 'output_values', {'out': 1}),
+          ('sparse_fill_empty_rows_node', 'empty_row_indicator', {'out': 2})]
+
+inputs1 = {'input_indices': {'shape': int64_array([20, 2]), 'value': None},
+           'input_values': {'shape': int64_array([20]), 'value': None},
+           'dense_shape': {'shape': int64_array([2]), 'value': np.array([4, 5])},
+           'default_value': {'shape': int64_array([]), 'value': None}}
+
+class TestSparseFillEmptyRows(unittest.TestCase):
+    def test_partial_infer(self):
+        graph = build_graph(nodes_attributes, edges1, inputs1)
+
+        sparse_fill_empty_rows_node = Node(graph, 'sparse_fill_empty_rows_node')
+        SparseFillEmptyRows.infer(sparse_fill_empty_rows_node)
+
+        # prepare reference results
+        ref_output_indices_shape = int64_array([20, 2])
+        ref_output_values_shape = int64_array([20])
+        ref_empty_row_indicator_shape = int64_array([4])
+
+        # get resulted shapes
+        res_output_indices_shape = graph.node['output_indices']['shape']
+        res_output_values_shape = graph.node['output_values']['shape']
+        res_empty_row_indicator_shape = graph.node['empty_row_indicator']['shape']
+
+        self.assertTrue(np.array_equal(ref_output_indices_shape, res_output_indices_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_indices_shape, res_output_indices_shape))
+
+        self.assertTrue(np.array_equal(ref_output_values_shape, res_output_values_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_values_shape, res_output_values_shape))
+
+        self.assertTrue(np.array_equal(ref_empty_row_indicator_shape, res_empty_row_indicator_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_empty_row_indicator_shape, res_empty_row_indicator_shape))
+
+    def test_partial_infer_for_some_out_ports(self):
+        edges = [('input_indices', 'sparse_fill_empty_rows_node', {'in': 0}),
+                 ('input_values', 'sparse_fill_empty_rows_node', {'in': 1}),
+                 ('dense_shape', 'sparse_fill_empty_rows_node', {'in': 2}),
+                 ('default_value', 'sparse_fill_empty_rows_node', {'in': 3}),
+                 ('sparse_fill_empty_rows_node', 'output_indices', {'out': 0}),
+                 ('sparse_fill_empty_rows_node', 'empty_row_indicator', {'out': 2})]
+        graph = build_graph(nodes_attributes, edges, inputs1)
+
+        sparse_fill_empty_rows_node = Node(graph, 'sparse_fill_empty_rows_node')
+        SparseFillEmptyRows.infer(sparse_fill_empty_rows_node)
+
+        # prepare reference results
+        ref_output_indices_shape = int64_array([20, 2])
+        ref_empty_row_indicator_shape = int64_array([4])
+
+        # get resulted shapes
+        res_output_indices_shape = graph.node['output_indices']['shape']
+        res_empty_row_indicator_shape = graph.node['empty_row_indicator']['shape']
+
+        self.assertTrue(np.array_equal(ref_output_indices_shape, res_output_indices_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_indices_shape, res_output_indices_shape))
+
+        self.assertTrue(np.array_equal(ref_empty_row_indicator_shape, res_empty_row_indicator_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_empty_row_indicator_shape, res_empty_row_indicator_shape))
+
+    def test_incorrect_shape_of_default_value(self):
+        inputs = {'input_indices': {'shape': int64_array([20, 2]), 'value': None},
+                   'input_values': {'shape': int64_array([20]), 'value': None},
+                   'dense_shape': {'shape': int64_array([2]), 'value': np.array([4, 5])},
+                   'default_value': {'shape': int64_array([3]), 'value': None}}
+        graph = build_graph(nodes_attributes, edges1, inputs)
+        sparse_fill_empty_rows_node = Node(graph, 'sparse_fill_empty_rows_node')
+        self.assertRaises(AssertionError, SparseFillEmptyRows.infer, sparse_fill_empty_rows_node)
+
+    def test_no_value_of_dense_shape(self):
+        inputs = {'input_indices': {'shape': int64_array([20, 2]), 'value': None},
+                   'input_values': {'shape': int64_array([20]), 'value': None},
+                   'dense_shape': {'shape': int64_array([2]), 'value': None},
+                   'default_value': {'shape': int64_array([]), 'value': None}}
+        graph = build_graph(nodes_attributes, edges1, inputs)
+        sparse_fill_empty_rows_node = Node(graph, 'sparse_fill_empty_rows_node')
+        self.assertRaises(AssertionError, SparseFillEmptyRows.infer, sparse_fill_empty_rows_node)
+
+    def test_incorrect_shape_of_dense_shape(self):
+        inputs = {'input_indices': {'shape': int64_array([20, 2]), 'value': None},
+                   'input_values': {'shape': int64_array([20]), 'value': None},
+                   'dense_shape': {'shape': int64_array([2, 2]), 'value': np.array([[4, 5],[1, 2]])},
+                   'default_value': {'shape': int64_array([]), 'value': None}}
+        graph = build_graph(nodes_attributes, edges1, inputs)
+        sparse_fill_empty_rows_node = Node(graph, 'sparse_fill_empty_rows_node')
+        self.assertRaises(AssertionError, SparseFillEmptyRows.infer, sparse_fill_empty_rows_node)
index 330f885..9062feb 100644 (file)
@@ -25,6 +25,7 @@ class Splice(Op):
         mandatory_props = {
             'type': None,
             'op': __class__.op,
+            'const_dim': 0,
             'in_ports_count': 1,
             'out_ports_count': 1,
             'infer': __class__.infer,
@@ -35,4 +36,4 @@ class Splice(Op):
     def infer(node: Node):
         out_node = node.out_node()
         out_node.shape = node.in_node().shape.copy()
-        out_node.shape[1] = node.in_node().shape[1] * len(node.context)
+        out_node.shape[1] = node.const_dim + (node.in_node().shape[1] - node.const_dim) * len(node.context)
diff --git a/model-optimizer/extensions/ops/unique.py b/model-optimizer/extensions/ops/unique.py
new file mode 100644 (file)
index 0000000..23b51f2
--- /dev/null
@@ -0,0 +1,171 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import networkx as nx
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class Unique(Op):
+    ''' The operation finds unique elements in 1-D tensor.
+        For more details see https://www.tensorflow.org/api_docs/python/tf/unique
+
+        attributes:
+            - sorted, indicates whether to sort the unique elements in ascending order or
+                      to return in the same order as they occur in the input
+            - return_inverse, indicates whether to output indices
+            - return_counts, indicates whether to output the counts of each unique element
+
+        1 input:
+            - [0, required] input tensor (1D)
+        
+        2 outputs:
+            - [0, required] tensor containing all of the unique elements of the input
+                            and sorted in the same order as in the input (1D)
+            - [1, optional] tensor of indices for each value of the input
+                            in the tensor of unique elements (1D)
+            - [2, optional] tensor with a number of occurences for each unique element
+                            in the input (1D)
+    '''
+    op = 'Unique'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 3
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return [
+            'sorted',
+            'return_inverse',
+            'return_counts',
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        # check that all required attributes are set
+        assert node.has('sorted') and node.sorted in ['true', 'false'], \
+            "Unique does not have valid sorted attribute"
+        assert node.has('return_inverse') and node.return_inverse in ['true', 'false'], \
+            "Unique does not have valid return_inverse attribute"
+        assert node.has('return_counts') and node.return_counts in ['true', 'false'], \
+            "Unique does not have valid return_counts attribute"
+
+        # check a number of input and output nodes
+        assert len(node.in_nodes()) == 1, "Unique must have one input"
+        assert len(node.out_nodes()) <= 3, "Unique must have less or equal to 3 outputs"
+
+        # compute maximum number of outputs if no output port is pruned
+        max_num_outputs = 1
+        if node.return_inverse == 'true':
+            max_num_outputs += 1
+        if node.return_counts == 'true':
+            max_num_outputs += 1
+
+        # check a number of outputs
+        assert len(node.out_nodes()) <= max_num_outputs, \
+            "The number of outputs in IR Unique layer must be less or equal to framework graph one"
+        
+        # check that the output with unique elements remains in a graph after pruning
+        # since this is required output
+        assert 0 in node.out_nodes(), \
+            "The output with unique elements must remain in a graph"
+
+        # check if outputs with indices and counts remain in a graph after pruning
+        # and update attributes
+        if len(node.out_nodes()) == 1:
+            node.return_inverse = 'false'
+            node.return_counts = 'false'
+        if len(node.out_nodes()) == 2 and 1 in node.out_nodes() \
+        and node.return_inverse == 'true' and node.return_counts == 'true':
+            node.return_counts = 'false'
+        if len(node.out_nodes()) == 2 and 2 in node.out_nodes() \
+        and node.return_inverse == 'true' and node.return_counts == 'true':
+            node.return_inverse = 'false'
+
+        # check that input is 1-D tensor
+        input_shape = node.in_node(0).shape
+        assert input_shape is not None and input_shape.size == 1, \
+            "Unique accepts only 1-D input"
+
+        # determine a shape for each output
+        for out_node_ind in node.out_nodes():
+            assert (out_node_ind < max_num_outputs), "Unique has three outputs at most"
+            # all outputs have the same shape equal to the input shape
+            node.out_node(out_node_ind).shape = input_shape
+
+        input_value = node.in_node(0).value
+        if input_value is None:
+            return
+
+        # check that input value is 1-D
+        assert len(input_value.shape) == 1, \
+            "Unique accepts only 1-D input"
+
+        is_sorted = (node.sorted == 'true')
+        return_inverse = (node.return_inverse == 'true')
+        return_counts = (node.return_counts == 'true')
+
+        # infer if the input is constant
+        if is_sorted:
+            unique_output = np.unique(input_value, return_inverse = return_inverse,
+                                      return_counts = return_counts, return_index = False)
+            if not return_inverse and not return_counts:
+                unique_output = [unique_output]
+        else:
+            # np.unique can only return unique elements in sorted order
+            # so this case should be handled separately
+            sorted_uniques, sorted_index, sorted_inverse, sorted_counts = np.unique(input_value, return_index = True,
+                                                               return_inverse = True, return_counts = True)
+            # compute uniques that are in the same order as they occur in the input,
+            # indices of input values in uniques, counts for each unique element
+            uniques = []
+            inverse = []
+            counts = []
+            old_ind_by_elem = dict(zip(sorted_uniques, range(len(sorted_index))))
+            new_ind_by_elem = dict()
+            new_ind = 0
+            for ind in np.sort(sorted_index):
+                uniques.append(input_value[ind])
+                old_ind = old_ind_by_elem[input_value[ind]]
+                counts.append(sorted_counts[old_ind])
+                new_ind_by_elem[input_value[ind]] = new_ind
+                new_ind += 1
+            inverse = [new_ind_by_elem[input_value[ind]] for ind in range(len(input_value))]
+
+            # pack unique_output
+            unique_output = []
+            unique_output.append(uniques)
+            if return_inverse:
+                unique_output.append(inverse)
+            if return_counts:
+                unique_output.append(counts)
+
+        # write result to output nodes
+        j = 0
+        for out_node_ind in node.out_nodes():
+            node.out_node(out_node_ind).value = np.array(unique_output[j], dtype=np.float)
+            node.out_node(out_node_ind).shape = np.array(node.out_node(out_node_ind).value.shape, dtype=np.int64)
+            j += 1
diff --git a/model-optimizer/extensions/ops/unique_test.py b/model-optimizer/extensions/ops/unique_test.py
new file mode 100644 (file)
index 0000000..9475900
--- /dev/null
@@ -0,0 +1,273 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.unique import Unique
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
+
+
+# graph 1 with two outputs: uniques and indices
+nodes_attributes = {'input': {'shape': None, 'value': None, 'kind': 'data'},
+                    'unique_node': {'op': 'Unique', 'kind': 'op'},
+                    'output_uniques': {'shape': None, 'value': None, 'kind': 'data'},
+                    'output_indices': {'shape': None, 'value': None, 'kind': 'data'},
+                    }
+edges1 = [('input', 'unique_node', {'in': 0}),
+          ('unique_node', 'output_uniques', {'out': 0}),
+          ('unique_node', 'output_indices', {'out': 1})]
+inputs1 = {'input': {'shape': int64_array([20]), 'value': None},
+           'unique_node': {
+               'sorted': 'false',
+               'return_inverse': 'true',
+               'return_counts': 'false'
+               }
+           }
+
+# graph 2 with three outputs: uniques, indices and counts
+nodes_attributes2 = {'input': {'shape': None, 'value': None, 'kind': 'data'},
+                    'unique_node': {'op': 'Unique', 'kind': 'op'},
+                    'output_uniques': {'shape': None, 'value': None, 'kind': 'data'},
+                    'output_indices': {'shape': None, 'value': None, 'kind': 'data'},
+                    'output_counts': {'shape': None, 'value': None, 'kind': 'data'}
+                    }
+edges2 = [('input', 'unique_node', {'in': 0}),
+          ('unique_node', 'output_uniques', {'out': 0}),
+          ('unique_node', 'output_indices', {'out': 1}),
+          ('unique_node', 'output_counts', {'out': 2})]
+inputs2 = {'input': {'shape': int64_array([20]), 'value': None},
+           'unique_node': {
+               'sorted': 'false',
+               'return_inverse': 'true',
+               'return_counts': 'true'
+               }
+           }
+
+
+class TestUnique(unittest.TestCase):
+    # case 1: a graph with two outputs: uniques and indices
+    def test_partial_infer1(self):
+        graph = build_graph(nodes_attributes, edges1, inputs1)
+
+        unique_node = Node(graph, 'unique_node')
+        Unique.infer(unique_node)
+
+        # prepare reference results
+        ref_output_uniques_shape = int64_array([20])
+        ref_output_indices_shape = int64_array([20])
+
+        # get resulted shapes
+        res_output_uniques_shape = graph.node['output_uniques']['shape']
+        res_output_indices_shape = graph.node['output_indices']['shape']
+
+        self.assertTrue(np.array_equal(ref_output_uniques_shape, res_output_uniques_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_uniques_shape, res_output_uniques_shape))
+
+        self.assertTrue(np.array_equal(ref_output_indices_shape, res_output_indices_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_indices_shape, res_output_indices_shape))
+
+    # case 2: a graph with three outputs: uniques, indices and counts
+    def test_partial_infer2(self):
+        graph = build_graph(nodes_attributes2, edges2, inputs2)
+
+        unique_node = Node(graph, 'unique_node')
+        Unique.infer(unique_node)
+
+        # prepare reference results
+        ref_output_uniques_shape = int64_array([20])
+        ref_output_indices_shape = int64_array([20])
+        ref_output_counts_shape = int64_array([20])
+
+        # get resulted shapes
+        res_output_uniques_shape = graph.node['output_uniques']['shape']
+        res_output_indices_shape = graph.node['output_indices']['shape']
+        res_output_counts_shape = graph.node['output_counts']['shape']
+
+        self.assertTrue(np.array_equal(ref_output_uniques_shape, res_output_uniques_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_uniques_shape, res_output_uniques_shape))
+
+        self.assertTrue(np.array_equal(ref_output_indices_shape, res_output_indices_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_indices_shape, res_output_indices_shape))
+
+        self.assertTrue(np.array_equal(ref_output_counts_shape, res_output_counts_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_counts_shape, res_output_counts_shape))
+
+    # case 3: a graph with just unique output
+    def test_partial_infer_just_unique(self):
+        edges = [('input', 'unique_node', {'in': 0}),
+                 ('unique_node', 'output_uniques', {'out': 0})]
+        graph = build_graph(nodes_attributes, edges, inputs1)
+
+        unique_node = Node(graph, 'unique_node')
+        Unique.infer(unique_node)
+
+        # prepare reference results
+        ref_output_uniques_shape = int64_array([20])
+
+        # get resulted shapes
+        res_output_uniques_shape = graph.node['output_uniques']['shape']
+
+        self.assertTrue(np.array_equal(ref_output_uniques_shape, res_output_uniques_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_uniques_shape, res_output_uniques_shape))
+
+    # case 4: an invalid graph with 2D input
+    def test_incorrect_input_shape(self):
+        inputs = {'input': {'shape': int64_array([20, 2]), 'value': None}}
+
+        graph = build_graph(nodes_attributes, edges1, inputs)
+
+        unique_node = Node(graph, 'unique_node')
+        self.assertRaises(AssertionError, Unique.infer, unique_node)
+
+    # case 5: an invalid graph with return_counts = false and three outputs
+    def test_more_output_ports(self):
+        nodes_attributes1 = {'input': {'shape': None, 'value': None, 'kind': 'data'},
+                             'unique_node': {'op': 'Unique', 'kind': 'op'},
+                             'output_uniques': {'shape': None, 'value': None, 'kind': 'data'},
+                             'output_indices': {'shape': None, 'value': None, 'kind': 'data'},
+                             'output3': {'shape': None, 'value': None, 'kind': 'data'},
+                             }
+        edges = [('input', 'unique_node', {'in': 0}),
+                 ('unique_node', 'output_uniques', {'out': 0}),
+                 ('unique_node', 'output_indices', {'out': 1}),
+                 ('unique_node', 'output3', {'out': 2})]
+        graph = build_graph(nodes_attributes1, edges, inputs1)
+
+        unique_node = Node(graph, 'unique_node')
+        self.assertRaises(AssertionError, Unique.infer, unique_node)
+
+    # case 6: an invalid graph without unique output
+    def test_no_uniques_output(self):
+        edges = [('input', 'unique_node', {'in': 0}),
+                 ('unique_node', 'output_indices', {'out': 1})]
+        graph = build_graph(nodes_attributes, edges, inputs1)
+
+        unique_node = Node(graph, 'unique_node')
+        self.assertRaises(AssertionError, Unique.infer, unique_node)
+
+    # case 7: infer for constant input
+    # graph with a constant input, three outputs, sorted = 'false'
+    def test_constant_input(self):
+        nodes_attributes_ = {'input': {'shape': None, 'value': None, 'kind': 'data'},
+                            'unique_node': {'op': 'Unique', 'kind': 'op'},
+                            'output_uniques': {'shape': None, 'value': None, 'kind': 'data'},
+                            'output_indices': {'shape': None, 'value': None, 'kind': 'data'},
+                            'output_counts': {'shape': None, 'value': None, 'kind': 'data'}
+                            }
+        edges_ = [('input', 'unique_node', {'in': 0}),
+                  ('unique_node', 'output_uniques', {'out': 0}),
+                  ('unique_node', 'output_indices', {'out': 1}),
+                  ('unique_node', 'output_counts', {'out': 2})]
+        inputs_ = {'input': {'shape': int64_array([10]),
+                             'value': np.array([8.0, 1.0, 2.0, 1.0, 8.0, 5.0, 1.0, 5.0, 0.0, 0.0], dtype=np.float)},
+                   'unique_node': {
+                       'sorted': 'false',
+                       'return_inverse': 'true',
+                       'return_counts': 'true'
+                       }
+                   }
+        graph = build_graph(nodes_attributes_, edges_, inputs_)
+        unique_node = Node(graph, 'unique_node')
+        Unique.infer(unique_node)
+
+        # prepare reference results
+        ref_output_uniques_shape = int64_array([5])
+        ref_output_uniques_value = np.array([8.0, 1.0, 2.0, 5.0, 0.0], dtype=np.float)
+        ref_output_indices_shape = int64_array([10])
+        ref_output_indices_value = np.array([0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 4.0, 4.0], dtype=np.float)
+        ref_output_counts_shape = int64_array([5])
+        ref_output_counts_value = np.array([2.0, 3.0, 1.0, 2.0, 2.0], dtype=np.float)
+
+        # get resulted shapes
+        res_output_uniques_shape = graph.node['output_uniques']['shape']
+        res_output_uniques_value = graph.node['output_uniques']['value']
+        res_output_indices_shape = graph.node['output_indices']['shape']
+        res_output_indices_value = graph.node['output_indices']['value']
+        res_output_counts_shape = graph.node['output_counts']['shape']
+        res_output_counts_value = graph.node['output_counts']['value']
+
+        # verify the results
+        self.assertTrue(np.array_equal(ref_output_uniques_shape, res_output_uniques_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_uniques_shape, res_output_uniques_shape))
+        self.assertTrue(np.array_equal(ref_output_uniques_value, res_output_uniques_value),
+                        'values do not match expected: {} and given: {}'.format(ref_output_uniques_value, res_output_uniques_value))
+        self.assertTrue(np.array_equal(ref_output_indices_shape, res_output_indices_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_indices_shape, res_output_indices_shape))
+        self.assertTrue(np.array_equal(ref_output_indices_value, res_output_indices_value),
+                        'values do not match expected: {} and given: {}'.format(ref_output_indices_value, res_output_indices_value))
+        self.assertTrue(np.array_equal(ref_output_counts_shape, res_output_counts_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_counts_shape, res_output_counts_shape))
+        self.assertTrue(np.array_equal(ref_output_counts_value, res_output_counts_value),
+                        'values do not match expected: {} and given: {}'.format(ref_output_counts_value, res_output_counts_value))
+
+    # case 8: infer for constant input
+    # graph with a constant input, three outputs, sorted = 'true'
+    def test_constant_input(self):
+        nodes_attributes_ = {'input': {'shape': None, 'value': None, 'kind': 'data'},
+                            'unique_node': {'op': 'Unique', 'kind': 'op'},
+                            'output_uniques': {'shape': None, 'value': None, 'kind': 'data'},
+                            'output_indices': {'shape': None, 'value': None, 'kind': 'data'},
+                            'output_counts': {'shape': None, 'value': None, 'kind': 'data'}
+                            }
+        edges_ = [('input', 'unique_node', {'in': 0}),
+                  ('unique_node', 'output_uniques', {'out': 0}),
+                  ('unique_node', 'output_indices', {'out': 1}),
+                  ('unique_node', 'output_counts', {'out': 2})]
+        inputs_ = {'input': {'shape': int64_array([10]),
+                             'value': np.array([8.0, 1.0, 2.0, 1.0, 8.0, 5.0, 1.0, 5.0, 0.0, 0.0], dtype=np.float)},
+                   'unique_node': {
+                       'sorted': 'true',
+                       'return_inverse': 'true',
+                       'return_counts': 'true'
+                       }
+                   }
+        graph = build_graph(nodes_attributes_, edges_, inputs_)
+        unique_node = Node(graph, 'unique_node')
+        Unique.infer(unique_node)
+
+        # prepare reference results
+        ref_output_uniques_shape = int64_array([5])
+        ref_output_uniques_value = np.array([0.0, 1.0, 2.0, 5.0, 8.0], dtype=np.float)
+        ref_output_indices_shape = int64_array([10])
+        ref_output_indices_value = np.array([4.0, 1.0, 2.0, 1.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0], dtype=np.float)
+        ref_output_counts_shape = int64_array([5])
+        ref_output_counts_value = np.array([2.0, 3.0, 1.0, 2.0, 2.0], dtype=np.float)
+
+        # get resulted shapes
+        res_output_uniques_shape = graph.node['output_uniques']['shape']
+        res_output_uniques_value = graph.node['output_uniques']['value']
+        res_output_indices_shape = graph.node['output_indices']['shape']
+        res_output_indices_value = graph.node['output_indices']['value']
+        res_output_counts_shape = graph.node['output_counts']['shape']
+        res_output_counts_value = graph.node['output_counts']['value']
+
+        # verify the results
+        self.assertTrue(np.array_equal(ref_output_uniques_shape, res_output_uniques_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_uniques_shape, res_output_uniques_shape))
+        self.assertTrue(np.array_equal(ref_output_uniques_value, res_output_uniques_value),
+                        'values do not match expected: {} and given: {}'.format(ref_output_uniques_value, res_output_uniques_value))
+        self.assertTrue(np.array_equal(ref_output_indices_shape, res_output_indices_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_indices_shape, res_output_indices_shape))
+        self.assertTrue(np.array_equal(ref_output_indices_value, res_output_indices_value),
+                        'values do not match expected: {} and given: {}'.format(ref_output_indices_value, res_output_indices_value))
+        self.assertTrue(np.array_equal(ref_output_counts_shape, res_output_counts_shape),
+                        'shapes do not match expected: {} and given: {}'.format(ref_output_counts_shape, res_output_counts_shape))
+        self.assertTrue(np.array_equal(ref_output_counts_value, res_output_counts_value),
+                        'values do not match expected: {} and given: {}'.format(ref_output_counts_value, res_output_counts_value))
index 2d318ee..0d68c9d 100644 (file)
@@ -402,8 +402,8 @@ def generate_ie_ir(graph: Graph, file_name: str, input_names: tuple = (), mean_o
         unsupported.report(log.error, "List of operations that cannot be converted to Inference Engine IR:")
         raise Error('Part of the nodes was not converted to IR. Stopped. ' +
                     refer_to_faq_msg(24))
-    with open(file_name, 'w') as file:
-        file.write(pretty_xml_as_string)
+    with open(file_name, 'wb') as file:
+        file.write(bytes(pretty_xml_as_string, "UTF-8"))
 
 
 def port_renumber(graph: Graph):
index 372a124..d6ada30 100644 (file)
@@ -67,7 +67,7 @@ def concat_infer(node):
     if any(v is None for v in values):
         return
 
-    node.out_node(0).value = np.concatenate(values, axis=node.axis)
+    node.out_node(0).value = np.array(np.concatenate(values, axis=node.axis), dtype=values[0].dtype)
     node.out_node(0).shape = np.array(node.out_node(0).value.shape, dtype=np.int64)
 
 
index 12d4b80..7cfdb15 100644 (file)
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import numpy as np
-import logging as log
 import networkx as nx
+import numpy as np
 
 from mo.front.common.partial_infer.utils import int64_array
 from mo.graph.graph import Node
@@ -90,3 +89,10 @@ def eltwise_infer(node, op=None, **kwargs):
         node.out_node().value = values[0]
         for i in range(len(values) - 1):
             node.out_node().value = op(node.out_node().value, values[i + 1])
+
+
+def bias_add_infer(node, op):
+    if node.in_port(0).data.get_value() is not None and node.in_port(1).data.get_value() is not None and op is not None:
+        node.out_port(0).data.set_value(op(node.in_port(0).data.get_value(), node.in_port(1).data.get_value()))
+    else:
+        node.out_port(0).data.set_shape(node.in_port(0).data.get_shape())
index 755451a..0efc280 100644 (file)
@@ -26,6 +26,10 @@ def multi_box_detection_infer(node: Node):
     conf_shape = node.in_node(1).shape
     prior_boxes_shape = node.in_node(2).shape
 
+    if loc_shape is None or conf_shape is None or prior_boxes_shape is None:
+        log.warning('Shapes for the Detection Output are not defined')
+        return
+
     prior_size = 4
     if node.has('normalized') and not node.normalized:
         prior_size = 5
@@ -42,10 +46,6 @@ def multi_box_detection_infer(node: Node):
     if node.has_and_set('share_location') and node.share_location:
         num_loc_classes = 1
 
-    if loc_shape is None or conf_shape is None or prior_boxes_shape is None:
-        log.warning('Shapes for the Detection Output are not defined')
-        return
-
     if num_priors * num_loc_classes * 4 != loc_shape[-1]:
         log.warning('Locations and prior boxes shapes mismatch: "{}" vs "{}"'.format(loc_shape, prior_boxes_shape))
         return
index d157364..00f3300 100644 (file)
@@ -16,6 +16,8 @@
 
 import numpy as np
 
+from mo.front.common.partial_infer.utils import int64_array
+
 
 def space_to_batch_infer(node):
     """
@@ -36,8 +38,9 @@ def space_to_batch_infer(node):
 
     pads = pad[:, 0] + input_shape[1:len(block_size)+1] + pad[:, 1]
 
-    output_shape = [input_shape[0] * np.prod(block_size), *[int(x) for x in (pads / block_size)], input_shape[-1]]
-    node.out_node().shape = np.array(output_shape)
+    node.out_node().shape = int64_array([input_shape[0] * np.prod(block_size),
+                                         *[int(x) for x in (pads / block_size)],
+                                         *input_shape[len(block_size) + 1:]])
 
 
 def batch_to_space_infer(node):
@@ -62,5 +65,4 @@ def batch_to_space_infer(node):
     sizes = pads - crop[:, 0] - crop[:, 1]
     batch = int(input_shape[0] / (np.prod(block_size)))
 
-    output_shape = [batch, *sizes, input_shape[-1]]
-    node.out_node().shape = np.array(output_shape)
+    node.out_node().shape = int64_array([batch, *sizes, *input_shape[len(block_size) + 1:]])
index 6e08147..0dbb3cd 100644 (file)
@@ -88,8 +88,10 @@ def split(input_data_node: Node, node: Node, axis: int, part_sizes: list):
         return
 
     splitted = None
-    if input_data_node.value is not None:
-        splitted = np.split(input_data_node.value, part_sizes_to_indices(part_sizes), axis)
+    input_value = input_data_node.value
+    if input_value is not None:
+        splitted = [np.array(part, dtype=input_value.dtype)
+                    for part in np.split(input_value, part_sizes_to_indices(part_sizes), axis)]
 
     # not all outputs from the split could be used so it is necessary to iterate over output edges and infer shape for
     # necessary nodes only
@@ -104,7 +106,6 @@ def split(input_data_node: Node, node: Node, axis: int, part_sizes: list):
             out_node.value = splitted[out_port]
             assert all(out_node.value.shape == out_node.shape)
 
-    assert not node.has_valid('axis') or node.axis == axis
     node.axis = axis
     # WARNING: != 4 is supposed to work for NHWC to NCHW translation only.
     # if other global permutations happen this will fail
diff --git a/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext.py
new file mode 100644 (file)
index 0000000..80b6907
--- /dev/null
@@ -0,0 +1,58 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.kaldi.loader.utils import collect_until_token, read_binary_integer32_token, read_binary_float_token
+from extensions.ops.pnorm import PNormOp
+from mo.utils.error import Error
+
+
+class PNormComponentFrontExtractor(FrontExtractorOp):
+    op = 'pnormcomponent'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        pb = node.parameters
+        try:
+            collect_until_token(pb, b'<InputDim>')
+        except Error:
+            raise Error("<InputDim> was not found")
+        in_dim = read_binary_integer32_token(pb)
+
+        try:
+            collect_until_token(pb, b'<OutputDim>')
+        except Error:
+            raise Error("<OutputDim> was not found")
+        out_dim = read_binary_integer32_token(pb)
+
+        assert in_dim % out_dim == 0
+
+        group = in_dim / out_dim
+
+        try:
+            collect_until_token(pb, b'<P>')
+        except Error:
+            raise Error("<P> was not found")
+        p = read_binary_float_token(pb)
+
+        attrs = {
+                 'group': group,
+                 'p': p,
+        }
+
+        PNormOp.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext_test.py
new file mode 100644 (file)
index 0000000..5e725f5
--- /dev/null
@@ -0,0 +1,41 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.pnorm import PNormOp
+from mo.front.kaldi.extractors.pnorm_component_ext import PNormComponentFrontExtractor
+from mo.front.kaldi.extractors.common_ext_test import KaldiFrontExtractorTest
+from mo.front.kaldi.loader.utils_test import TestKaldiUtilsLoading
+from mo.ops.op import Op
+
+
+class PNormComponentFrontExtractorTest(KaldiFrontExtractorTest):
+    @classmethod
+    def register_op(cls):
+        Op.registered_ops['pnorm'] = PNormOp
+
+    @classmethod
+    def create_pb_for_test_node(cls):
+        pb = KaldiFrontExtractorTest.write_tag_with_value('<InputDim>', 3500)
+        pb += KaldiFrontExtractorTest.write_tag_with_value('<OutputDim>', 350)
+        pb += KaldiFrontExtractorTest.write_tag_with_value('<P>', 2, np.float32)
+        cls.test_node['parameters'] = TestKaldiUtilsLoading.bytesio_from(pb)
+
+    def test_extract(self):
+        PNormComponentFrontExtractor.extract(self.test_node)
+        self.assertEqual(self.test_node['p'], 2)
+        self.assertEqual(self.test_node['group'], 10)
index da39914..4a0b95f 100644 (file)
@@ -49,5 +49,12 @@ class SpliceFrontExtractor(FrontExtractorOp):
             mapping_rule['context'] = read_binary_vector(pb, False, dtype=np.int32)
         else:
             raise Error('Unknown token {} in SpliceComponent node {}'.format(tag, node.id))
+
+        tag = find_next_tag(pb)
+        if tag == '<ConstComponentDim>':
+            read_placeholder(pb, 1)
+            const_dim = read_binary_integer32_token(pb)
+            mapping_rule['const_dim'] = const_dim
+
         Splice.update_node_stat(node, mapping_rule)
         return __class__.enabled
index 288a4dc..232e280 100644 (file)
@@ -81,8 +81,9 @@ def load_parallel_component(file_descr, graph: Graph, prev_layer_id):
     for i in range(nnet_count):
         read_token_value(file_descr, b'<NestedNnet>')
         collect_until_token(file_descr, b'<Nnet>')
-        g, shape = load_kalid_nnet1_model(file_descr, 'Nested_net_{}'.format(i))
+        g = load_kalid_nnet1_model(file_descr, 'Nested_net_{}'.format(i))
         input_nodes = [n for n in graph.nodes(data=True) if n[1]['op'] == 'Parameter']
+        shape = input_nodes[0][1]['shape']
         if i != nnet_count - 1:
             slices_points.append(shape[1])
         g.remove_node(input_nodes[0][0])
@@ -157,7 +158,6 @@ def load_kalid_nnet1_model(file_descr, name):
 
     prev_layer_id = 'Parameter'
     graph.add_node(prev_layer_id, name=prev_layer_id, kind='op', op='Parameter', parameters=None)
-    input_shape = np.array([])
 
     while True:
         component_type = find_next_component(file_descr)
@@ -185,13 +185,13 @@ def load_kalid_nnet1_model(file_descr, name):
         prev_node = Node(graph, prev_layer_id)
         if prev_node.op == 'Parameter':
             prev_node['shape'] = np.array([1, layer_i], dtype=np.int64)
-            input_shape = np.array([1, layer_i], dtype=np.int64)
+
         prev_node.add_output_port(0)
         Node(graph, layer_id).add_input_port(0)
         graph.create_edge(prev_node, Node(graph, layer_id), 0, 0)
         prev_layer_id = layer_id
         log.debug('{} (type is {}) was loaded'.format(prev_layer_id, component_type))
-    return graph, input_shape
+    return graph
 
 
 def load_kalid_nnet2_model(file_descr, nnet_name):
@@ -203,38 +203,35 @@ def load_kalid_nnet2_model(file_descr, nnet_name):
 
     all_components = load_components(file_descr, graph)
 
-    input_shape = np.array([])
-
     for layer_id in all_components:
         prev_node = Node(graph, prev_layer_id)
         if prev_node.op == 'Parameter':
             parameters = Node(graph, layer_id).parameters
             input_dim = read_token_value(parameters, b'<InputDim>')
             prev_node['shape'] = np.array([1, input_dim], dtype=np.int64)
-            input_shape = np.array([1, input_dim], dtype=np.int64)
         prev_node.add_output_port(0)
         Node(graph, layer_id).add_input_port(0)
         graph.create_edge(prev_node, Node(graph, layer_id), 0, 0)
         prev_layer_id = layer_id
         log.debug('{} and {} were connected'.format(prev_layer_id, layer_id))
-    return graph, input_shape
+    return graph
 
 
 def load_kaldi_nnet3_model(file_descr, nnet_name):
     graph = Graph(name=nnet_name)
     file_descr.read(1)
-    component_layer_map, input_shape, input_name = load_topology_map(file_descr, graph)
+    component_layer_map = load_topology_map(file_descr, graph)
     # add information for shape calculation for MemoryOffset
     # shape calculation for MemoryOffset can't be done through shape of previous layer because
     # it is separated in 2 parts to remove cycle from graph
-    node = Node(graph, input_name)
-    for o_n_name, params in node.get_outputs():
-        o_n = Node(graph, o_n_name)
-        if o_n['op'] == 'MemoryOffset':
-            o_n['parameters']['element_size'] = input_shape[1]
+    for node in graph.get_op_nodes(**{'op': 'Parameter'}):
+        for o_n_name, params in node.get_outputs():
+            o_n = Node(graph, o_n_name)
+            if o_n['op'] == 'MemoryOffset':
+                o_n['parameters']['element_size'] = node['shape'][1]
 
     load_components(file_descr, graph, component_layer_map)
-    return graph, input_shape
+    return graph
 
 
 def load_components(file_descr, graph, component_layer_map=None):
@@ -308,18 +305,15 @@ def load_topology_map(file_descr, graph):
     not_finished = True
     component_layer_map = {}
     layer_node_map = {}
-    input_shape = np.array([], dtype=np.int64)
-    input_name = ""
     while not_finished:
-        not_finished, input_shape, input_name = read_node(file_descr, graph, component_layer_map, layer_node_map,
-                                                          input_shape, input_name)
-    return component_layer_map, input_shape, input_name
+        not_finished = read_node(file_descr, graph, component_layer_map, layer_node_map)
+    return component_layer_map
 
 
-def read_node(file_descr, graph, component_layer_map, layer_node_map, input_shape, input_name):
+def read_node(file_descr, graph, component_layer_map, layer_node_map):
     s = file_descr.readline()
     if s == b'\n':
-        return False, input_shape, input_name
+        return False
     tokens = s.split(b' ')
     if tokens[0] == b'input-node':
         in_name = s[s.find(b'name=')+len(b'name='):].split(b' ')[0]
@@ -332,9 +326,6 @@ def read_node(file_descr, graph, component_layer_map, layer_node_map, input_shap
         else:
             Node(graph, in_name)['op'] = 'Parameter'
             Node(graph, in_name)['shape'] = in_shape
-
-        input_shape = in_shape
-        input_name = in_name
     elif tokens[0] == b'component-node':
         layer_name = s[s.find(b'name=')+len(b'name='):].split(b' ')[0]
         layer_name = str(layer_name).strip('b').replace('\'', "")
@@ -430,7 +421,7 @@ def read_node(file_descr, graph, component_layer_map, layer_node_map, input_shap
                 o_n['parameters']['element_size'] = dim
     else:
         raise Error("Unsupported node specifier {}".format(tokens[0]))
-    return True, input_shape, input_name
+    return True
 
 
 def parse_input_for_node(string, graph, component_layer_map):
@@ -536,11 +527,5 @@ def parse_specifier(string, graph, layer_node_map):
             node['parameters']['has_default'] = True
         return node_id
     elif spec == b'ReplaceIndex':
-        spec_name = graph.unique_id(prefix='ReplaceIndex_')
-        graph.add_node(spec_name,
-                       parameters=dict(),
-                       op='ReplaceIndex',
-                       kind='op')
         node = parse_specifier(args[0], graph, layer_node_map)
-        graph.add_edge(node, spec_name, **create_edge_attrs(node, spec_name))
-        return spec_name
+        return node
index d9b7a22..b3f1b26 100644 (file)
@@ -19,7 +19,7 @@ import struct
 import unittest
 
 from mo.front.kaldi.loader.loader import load_topology_map, load_components
-from mo.graph.graph import Graph
+from mo.graph.graph import Graph, Node
 from mo.utils.unittest.graph import build_graph, compare_graphs
 
 
@@ -33,15 +33,15 @@ class TestKaldiModelsLoading(unittest.TestCase):
                    "component-node name=tdnn1.batchnorm component=tdnn1.batchnorm input=tdnn1.relu \n\n"
         graph = Graph(name="test_graph_component_map_loading_sequence")
 
-        test_top_map, input_shape, input_name = load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
+        test_top_map = load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
 
         ref_map = {b"lda": ["lda"],
                    b"tdnn1.affine": ["tdnn1.affine"],
                    b"tdnn1.relu": ["tdnn1.relu"],
                    b"tdnn1.batchnorm": ["tdnn1.batchnorm"]}
         self.assertEqual(test_top_map, ref_map)
-        self.assertListEqual(list(input_shape), [1, 16])
-        self.assertEquals(input_name, "input")
+        self.assertTrue("input" in graph.nodes())
+        self.assertListEqual(list(Node(graph, 'input')['shape']), [1, 16])
 
         ref_graph = build_graph({'input': {'shape': np.array([1, 16]), 'kind': 'op', 'op': 'Parameter'},
                                  'lda': {'kind': 'op'},
@@ -70,15 +70,15 @@ class TestKaldiModelsLoading(unittest.TestCase):
                    "\n"
         graph = Graph(name="test_graph_component_map_loading_swap")
 
-        test_top_map, input_shape, input_name = load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
+        test_top_map = load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
 
         ref_map = {b"lda": ["lda"],
                    b"tdnn1.affine": ["tdnn1.affine"],
                    b"tdnn1.relu": ["tdnn1.relu"],
                    b"tdnn1.batchnorm": ["tdnn1.batchnorm"]}
         self.assertEqual(test_top_map, ref_map)
-        self.assertListEqual(list(input_shape), [1, 16])
-        self.assertEquals(input_name, "input")
+        self.assertTrue("input" in graph.nodes())
+        self.assertListEqual(list(Node(graph, 'input')['shape']), [1, 16])
 
         ref_graph = build_graph({'input': {'shape': np.array([1, 16]), 'kind': 'op', 'op': 'Parameter'},
                                  'lda': {'kind': 'op'},
@@ -104,14 +104,14 @@ class TestKaldiModelsLoading(unittest.TestCase):
                    "\n"
         graph = Graph(name="test_graph_component_map_loading_append")
 
-        test_top_map, input_shape, input_name = load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
+        test_top_map= load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
 
         ref_map = {b"lda": ["lda"],
                    b"tdnn1.affine": ["tdnn1.affine"],
                    b"tdnn1.relu": ["tdnn1.relu"]}
         self.assertEqual(test_top_map, ref_map)
-        self.assertListEqual(list(input_shape), [1, 16])
-        self.assertEqual(input_name, "input")
+        self.assertTrue("input" in graph.nodes())
+        self.assertListEqual(list(Node(graph, 'input')['shape']), [1, 16])
 
         ref_graph = build_graph({'input': {'shape': np.array([1, 16]), 'kind': 'op', 'op': 'Parameter'},
                                  'lda': {'kind': 'op'},
@@ -143,14 +143,14 @@ class TestKaldiModelsLoading(unittest.TestCase):
                    "\n"
         graph = Graph(name="test_graph_component_map_loading_offset")
 
-        test_top_map, input_shape, input_name = load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
+        test_top_map= load_topology_map(io.BytesIO(bytes(test_map, 'ascii')), graph)
 
         ref_map = {b"lda": ["lda"],
                    b"tdnn1.affine": ["tdnn1.affine"],
                    b"tdnn1.relu": ["tdnn1.relu"]}
         self.assertEqual(test_top_map, ref_map)
-        self.assertListEqual(list(input_shape), [1, 16])
-        self.assertEqual(input_name, "input")
+        self.assertTrue("input" in graph.nodes())
+        self.assertListEqual(list(Node(graph, 'input')['shape']), [1, 16])
 
         ref_graph = build_graph({'input': {'shape': np.array([1, 16]), 'kind': 'op', 'op': 'Parameter'},
                                  'lda': {'kind': 'op'},
index c7ac3ec..5502368 100644 (file)
@@ -28,35 +28,37 @@ end_of_component_tag = '<!EndOfComponent>'
 supported_components = [
     'addshift',
     'affinecomponent',
+    'affinecomponentpreconditionedonline',
     'affinetransform',
+    'backproptruncationcomponent',
+    'batchnormcomponent',
+    'clipgradientcomponent',
     'convolutional1dcomponent',
     'convolutionalcomponent',
     'copy',
+    'elementwiseproductcomponent',
     'fixedaffinecomponent',
+    'linearcomponent',
+    'logsoftmaxcomponent',
+    'lstmnonlinearitycomponent',
     'lstmprojected',
     'lstmprojectedstreams',
     'maxpoolingcomponent',
+    'naturalgradientaffinecomponent',
+    'naturalgradientperelementscalecomponent',
+    'noopcomponent',
+    'normalizecomponent',
     'parallelcomponent',
+    'pnormcomponent',
+    'rectifiedlinearcomponent',
     'rescale',
     'sigmoid',
+    'sigmoidcomponent',
     'softmax',
     'softmaxcomponent',
     'splicecomponent',
+    'sumgroupcomponent',
     'tanhcomponent',
-    'normalizecomponent',
-    'affinecomponentpreconditionedonline',
-    'rectifiedlinearcomponent',
-    'batchnormcomponent',
-    'naturalgradientaffinecomponent',
-    'logsoftmaxcomponent',
-    'naturalgradientperelementscalecomponent',
-    'sigmoidcomponent',
-    'tanhcomponent',
-    'elementwiseproductcomponent',
-    'clipgradientcomponent',
-    'noopcomponent',
-    'lstmnonlinearitycomponent',
-    'backproptruncationcomponent',
 ]
 
 
@@ -191,6 +193,7 @@ def find_next_component(file_desc: io.BufferedReader) -> str:
     :param file_desc:file descriptor
     :return: string like '<component>'
     """
+    is_start = True
     while True:
         tag = find_next_tag(file_desc)
         # Tag is <NameOfTheLayer>. But we want get without '<' and '>'
@@ -201,6 +204,9 @@ def find_next_component(file_desc: io.BufferedReader) -> str:
             return component_name
         elif tag == '<ComponentName>':
             raise Error('Component has unsupported or not specified type')
+        elif not (is_start and tag == end_of_component_tag) and tag.find('Component') != -1:
+            raise Error('Component has unsupported type {}'.format(tag))
+        is_start = False
 
 
 def get_name_from_path(path: str) -> str:
index 47e6258..1b221c0 100644 (file)
@@ -88,9 +88,14 @@ class TestKaldiUtilsLoading(unittest.TestCase):
         test_file = b'<Nnet>somefakeinfo<another>info' + component + b'<tag><!EndOfComponent></Nnet>'
         self.assertEqual(find_next_component(self.bytesio_from(test_file)), component.decode('ascii').lower()[1:-1])
 
+    def test_find_next_component_eoc(self):
+        component = b'<LstmProjectedStreams>'
+        test_file = b'<!EndOfComponent>' + component + b'<tag><!EndOfComponent></Nnet>'
+        self.assertEqual(find_next_component(self.bytesio_from(test_file)), component.decode('ascii').lower()[1:-1])
+
     def test_find_next_component_end_of_nnet(self):
         test_file = b'<Nnet>somefakeinfo<another>info<tag><!EndOfComponent></Nnet>'
-        self.assertEqual(find_next_component(self.bytesio_from(test_file)), end_of_nnet_tag.lower()[1:-1])
+        self.assertRaises(Error, find_next_component, self.bytesio_from(test_file))
 
     def test_find_end_of_component(self):
         component = '<AffineComponent>'
index 86a0c17..12c0888 100644 (file)
@@ -37,7 +37,6 @@ def extractor_wrapper(mxnet_extractor):
 
 mxnet_op_extractors = {
     'BatchNorm': extractor_wrapper(batch_norm_ext),
-    'Crop': extractor_wrapper(crop_ext),
     'ScaleShift': extractor_wrapper(scale_shift_ext),
     'slice_axis': extractor_wrapper(slice_axis_ext),
     'null': lambda node: null_ext(node.symbol_dict),
index 006b07e..ab17ee1 100644 (file)
@@ -73,6 +73,7 @@ tf_op_extractors = {
     'ConcatV2': node_pb_arg(tf_concat_ext),
     'MatMul': node_pb_arg(tf_matmul_ext),
     'BatchMatMul': node_pb_arg(tf_batchmatmul_ext),
+    'BatchMatMulV2': node_pb_arg(tf_batchmatmul_ext),
     'Pack': node_pb_arg(tf_pack_ext),
     'Unpack': node_pb_arg(tf_unpack_ext),
     'Const': node_pb_arg(tf_const_ext),
index adbd902..6f5d1a1 100644 (file)
@@ -145,6 +145,12 @@ class Node:
         else:
             return self.has_valid('_out_ports') and idx in self.out_ports(control_flow=control_flow)
 
+    def is_in_port_connected(self, idx, control_flow=False):
+        return self.has_port('in', idx, control_flow) and not self.in_port(idx, control_flow).disconnected()
+
+    def is_out_port_connected(self, idx, control_flow=False):
+        return self.has_port('out', idx, control_flow) and not self.out_port(idx, control_flow).disconnected()
+
     def attrs(self):
         return self.graph.node[self.node]
 
@@ -240,8 +246,8 @@ class Node:
         return sorted([x for x in self.get_outputs(control_flow=control_flow) if 'out' in x[1]],
                       key=lambda x: x[1]['out'])
 
-    def soft_get(self, k):
-        return self[k] if self.has_valid(k) else '<UNKNOWN>'
+    def soft_get(self, k, default='<UNKNOWN>'):
+        return self[k] if self.has_valid(k) else default
 
     def edges(self, attrs: dict = None):
         """ Get a single edge with specified set of attributes.
@@ -911,7 +917,8 @@ def dict_includes_compare_attrs(attr, attr_probe):
     if callable(attr_probe) and not isinstance(attr_probe, type):
         return attr_probe(attr)
     else:
-        return attr == attr_probe
+        res = (attr == attr_probe)
+        return res if isinstance(res, bool) else all(res)
 
 
 def dict_includes(big: dict, sub_dict: dict, skip_attr_names=[]):
index e24ad44..8512bd3 100644 (file)
@@ -13,6 +13,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
+import numpy as np
 from copy import deepcopy
 
 from mo.front.common.partial_infer.utils import int64_array
@@ -107,7 +108,8 @@ class Port:
                 assert self.node.in_node(self.idx, control_flow=self.control_flow).value is None
                 self.node.in_node(self.idx, control_flow=self.control_flow).shape = int64_array(shape)
             else:
-                assert self.node.out_node(self.idx, control_flow=self.control_flow).value is None
+                data_node = self.node.out_node(self.idx, control_flow=self.control_flow)
+                assert data_node.value is None or np.array_equal(data_node.shape, int64_array(shape))
                 self.node.out_node(self.idx, control_flow=self.control_flow).shape = int64_array(shape)
 
     def _get_value(self):
index 3f28bd9..ce785db 100644 (file)
@@ -41,6 +41,13 @@ def data_type_str_to_precision(data_type_str: str):
     return SUPPORTED_DATA_TYPES[data_type_str][1] if data_type_str in SUPPORTED_DATA_TYPES else None
 
 
+def np_data_type_to_precision(np_data_type):
+    for np_t, precision in SUPPORTED_DATA_TYPES.values():
+        if np_t == np_data_type:
+            return precision
+    raise Error('Data type "{}" is not supported'.format(np_data_type))
+
+
 def convert_blob(graph: Graph, node: Node, data_type: type, force_precision: str):
     out_edges = graph.out_edges(node.node, data=True)
 
index cc90cff..ab860df 100644 (file)
@@ -158,10 +158,13 @@ def shape_inference(graph: Graph):
             old_out_shapes = [port.data.get_shape() for port in node.out_ports().values() if not port.disconnected()]
             node.infer(node)
             new_out_shapes = [port.data.get_shape() for port in node.out_ports().values() if not port.disconnected()]
-            for shape1, shape2 in zip(old_out_shapes, new_out_shapes):
-                if shape1 is not None and not np.array_equal(shape1, shape2):
-                    raise Error("After partial shape inference were found shape collision for node {} (old shape: {}, "
-                                "new shape: {})".format(node.name, shape1, shape2))
+            if not node.has_and_set('override_output_shape'):
+                for shape1, shape2 in zip(old_out_shapes, new_out_shapes):
+                    if shape1 is not None and not np.array_equal(shape1, shape2):
+                        raise Error("After partial shape inference were found shape collision for node {} (old shape: "
+                                    "{}, new shape: {})".format(node.name, shape1, shape2))
+            else:
+                del node['override_output_shape']
             node.need_shape_inference = False
 
 
index b3894a5..d2d8070 100644 (file)
@@ -49,7 +49,13 @@ class Broadcast(Op):
     @staticmethod
     def infer(node: Node):
         # TODO Add necessary checks and asserts
-        node.out_node().shape = node.in_node(1).value
+        b_value = node.in_port(0).data.get_value()
+        b_shape = node.in_port(1).data.get_value()
+        assert b_shape is not None
+        node.out_port(0).data.set_shape(b_shape)
+
         PermuteInputs().set_input_permutation(node.in_node(1), node, 'output:0', 'shape')
-        if node.in_node(0).value is not None and node.in_node(1).value is not None:
-            node.out_node().value = np.broadcast_to(node.in_node(0).value, node.in_node(1).value)
+        if b_value is not None:
+            new_value = np.broadcast_to(b_value, b_shape)
+            node.out_port(0).data.set_value(new_value)
+
diff --git a/model-optimizer/mo/ops/constant_of_shape.py b/model-optimizer/mo/ops/constant_of_shape.py
new file mode 100644 (file)
index 0000000..ef17fa8
--- /dev/null
@@ -0,0 +1,41 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class ConstantOfShape(Op):
+    """ Create a tensor of the shape specified in the first input with all values equal to attribute 'value'.
+    The operation is converted to Broadcast operation
+    """
+
+    op = 'ConstantOfShape'
+    enabled = True
+
+    def __init__(self, graph: Graph, attrs: dict):
+        super().__init__(graph, {
+            'kind': 'op',
+            'type': None,
+            'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+            'fill_value': 0,
+            'infer': None,
+        }, attrs)
+
+    def supported_attrs(self):
+        return ['fill_value']
index d314ba7..8539b16 100644 (file)
@@ -90,7 +90,11 @@ class Crop(Op):
             if len(node.crop_begin) != len(node.axis) or len(node.crop_end) != len(node.axis):
                 log.error('number of crop_begin/crop_end should match number of axis')
                 return
-            output_shape[node.axis] = output_shape[node.axis] - node.crop_begin - node.crop_end
+            if type(node.axis) in [list, tuple]:
+                for i in range(len(node.axis)):
+                    output_shape[node.axis[i]] = output_shape[node.axis[i]] - node.crop_begin[i] - node.crop_end[i]
+            else:
+                output_shape[node.axis] = output_shape[node.axis] - node.crop_begin - node.crop_end
         else:
             log.error('Crop node {} should have either dim or crop_begin and crop_end attributes'.format(node.name))
             return
index 22215ba..5ad5f46 100644 (file)
@@ -18,7 +18,7 @@ import numpy as np
 
 from mo.front.common.partial_infer.utils import int64_array
 from mo.graph.graph import Node
-from mo.ops.op import Op, PermuteAttrs
+from mo.ops.op import Op
 from mo.utils.error import Error
 
 
@@ -75,4 +75,4 @@ class ExpandDims(Op):
         # convert data type of the shape to int64 explicitly
         output_node.shape = output_node.shape.astype(np.int64)
         if input_node.value is not None:
-            output_node.value = np.array(np.reshape(input_node.value, output_node.shape))
+            output_node.value = input_node.value.reshape(output_node.shape)
index 515bb5c..da9f526 100644 (file)
@@ -43,33 +43,24 @@ class MemoryOffset(Op):
         # MemoryOffset is splitted in 2 parts to avoid cycle in graph
         # Calculate shape from shape of previous layer where possible
         # In other cases information about shapes from initial Kaldi model used
-        if len(node.in_nodes()) > 0:
+        if not node.in_port(0).disconnected():
             copy_shape_infer(node)
             pair_node = Node(node.graph, node.pair_name)
-            for out_node_name, params in pair_node.get_outputs():
-                out_node = Node(node.graph, out_node_name)
-                out_node.shape = node.out_node().shape
+            pair_node.out_port(0).data.set_shape(node.out_port(0).data.get_shape())
         else:
             pair_node = Node(node.graph, node.pair_name)
-            if pair_node.in_node().shape is not None:
-                for out_node_name, params in node.get_outputs():
-                    out_node = Node(node.graph, out_node_name)
-                    out_node.shape = pair_node.in_node().shape
+            if pair_node.in_port(0).data.get_shape() is not None:
+                node.out_port(0).data.set_shape(pair_node.in_port(0).data.get_shape())
                 copy_shape_infer(pair_node)
             elif pair_node.has_valid('element_size'):
                 # TODO Add here real batch
-                for out_node_name, params in node.get_outputs():
-                    out_node = Node(node.graph, out_node_name)
-                    out_node.shape = np.array([1, pair_node['element_size']])
-            elif pair_node.in_node().in_node().op == 'FullyConnected':
-                out_size = pair_node.in_node().in_node()['out-size']
-                for out_node_name, params in node.get_outputs():
-                    out_node = Node(node.graph, out_node_name)
-                    out_node.shape = np.array([1, out_size])
-            elif pair_node.in_node().in_node().op == 'Normalize':
-                    out_size = pair_node.in_node().in_node()['in_dim']
-                    for out_node_name, params in node.get_outputs():
-                        out_node = Node(node.graph, out_node_name)
-                        out_node.shape = np.array([1, out_size])
+                node.out_port(0).data.set_shape(np.array([1, pair_node['element_size']]))
+            elif pair_node.in_port(0).get_source().node.has_valid('out-size'):
+                out_size = pair_node.in_port(0).get_source().node['out-size']
+                node.out_port(0).data.set_shape(np.array([1, out_size]))
+            elif pair_node.in_port(0).get_source().node.has_valid('in_dim'):
+                    out_size = pair_node.in_port(0).get_source().node['in_dim']
+                    node.out_port(0).data.set_shape(np.array([1, out_size]))
             else:
-                raise Error("Can't calculate MemoryOffset shape for node {}. Possibly you need to add shape for it through --input_shape".format(node.id))
+                raise Error("Can't calculate MemoryOffset shape for node {}. ".format(node.id) +
+                            "Possibly you need to add shape for it through --input_shape")
index fda2acd..f146aaa 100644 (file)
@@ -40,8 +40,10 @@ class Slice(Op):
 
     @staticmethod
     def infer(node: Node):
+        axis = None
+        steps = None
         if len(node.in_nodes()) == 1:
-            # Caffe or ONNX
+            # Caffe or ONNX before 10 opset
             if node.has('start') and node.has('end') and node.has('axis'):
                 # ONNX case
                 if node.has_valid('start') and node.has_valid('end') and node.has('axis'):
@@ -55,26 +57,49 @@ class Slice(Op):
                 # Caffe case
                 from mo.front.common.partial_infer.slice import caffe_slice_infer
                 caffe_slice_infer(node)
-        elif len(node.in_nodes()) == 3:
-            # TF case
-            start_node = node.in_node(1)
-            size_node = node.in_node(2)
-            if start_node.has_valid('value') and size_node.has_valid('value'):
-                start = np.array(node.in_node(1).value, dtype=np.int64)
-                size = np.array(node.in_node(2).value, dtype=np.int64)
-                end = start + size
-                axis = None
-
-                # Delete edges to start, size nodes
-                node.graph.remove_edge(node.in_node(1).id, node.id)
-                node.graph.remove_edge(node.in_node(2).id, node.id)
-
-                node['start'] = start
-                node['end'] = end
-                node['axis'] = None
+        elif len(node.in_nodes()) >= 3:
+            if node.has('format') and node['format'] == 'onnx':
+                # ONNX 10 opset case
+                starts_node = node.in_node(1)
+                ends_node = node.in_node(2)
+                if starts_node.has_valid('value') and ends_node.has_valid('value'):
+                    start = np.array(node.in_node(1).value, dtype=np.int64)
+                    end = np.array(node.in_node(2).value, dtype=np.int64)
+                    if 3 in node.in_nodes():
+                        if node.in_node(3).has_valid('value'):
+                            axis = np.array(node.in_node(3).value, dtype=np.int64)
+                        else:
+                            log.warning('Incorrect slice operation: axes should be const')
+                            return
+                    if 4 in node.in_nodes():
+                        if node.in_node(4).has_valid('value'):
+                            steps = np.array(node.in_node(4).value, dtype=np.int64)
+                        else:
+                            log.warning('Incorrect slice operation: steps should be const')
+                            return
+                else:
+                    log.warning('Incorrect slice operation: no starts or ends attr')
+                    return
             else:
-                log.warning('Incorrect slice operation: no starts or end attr')
-                return
+                # TF case
+                start_node = node.in_node(1)
+                size_node = node.in_node(2)
+                if start_node.has_valid('value') and size_node.has_valid('value'):
+                    start = np.array(node.in_node(1).value, dtype=np.int64)
+                    size = np.array(node.in_node(2).value, dtype=np.int64)
+                    end = start + size
+                    axis = None
+
+                    # Delete edges to start, size nodes
+                    node.graph.remove_edge(node.in_node(1).id, node.id)
+                    node.graph.remove_edge(node.in_node(2).id, node.id)
+
+                    node['start'] = start
+                    node['end'] = end
+                    node['axis'] = None
+                else:
+                    log.warning('Incorrect slice operation: no starts or end attr')
+                    return
         else:
             log.warning('Incorrect number of input nodes in slice operation')
             return
@@ -96,12 +121,15 @@ class Slice(Op):
         if axis is None:
             axis = [x for x in range(len(start))]
 
+        if steps is None:
+            steps = np.ones(start.size, dtype=np.int64)
+
         # Calculate output value for slice operation
         slice_idx = [None for x in range(len(node.in_node().shape))]
         shrink_axis_mask = [False for x in range(len(node.in_node().shape))]
         for id in range(len(axis)):
             # Ranged for output value for specified axis
-            slice_idx[axis[id]] = slice(start[id], end[id], 1)
+            slice_idx[axis[id]] = slice(start[id], end[id], steps[id])
 
         # TODO: check whether this check is really important
         for axis, s in enumerate(slice_idx):
@@ -113,5 +141,5 @@ class Slice(Op):
         node['shrink_axis_mask'] = np.array(shrink_axis_mask)
 
         value = value[tuple(slice_idx)]
-        node.out_node().value = np.array(value) if node.in_node(0).value is not None else None
+        node.out_node().value = value.copy() if node.in_node(0).value is not None else None
         node.out_node().shape = np.array(value.shape)
index 9e513b7..0244472 100644 (file)
@@ -69,8 +69,8 @@ class Squeeze(Op):
         if node.in_port(1).get_source().node.op == 'Const':
             node.in_port(1).data.set_value(real_squeeze_dims)
 
-        if node.in_node().value is not None:
-            node.out_node().value = np.array(np.reshape(node.in_node().value, output_shape))
+        if node.in_port(0).data.get_value() is not None:
+            node.out_port(0).data.set_value(node.in_port(0).data.get_value().reshape(output_shape))
 
         # the squeeze_dim attribute will be converted to the second input in the end of the Middle phase
         PermuteInputs().set_input_permutation(node.in_node(1), node, 'input:0', 'axis')
index 3cb314b..ee4a788 100644 (file)
@@ -89,6 +89,7 @@ class StridedSlice(Op):
 
         def convert(attr):
             return lambda node: array_to_str(node, attr)
+
         for a in list(['new_axis_mask', 'shrink_axis_mask', 'ellipsis_mask', 'begin_mask', 'end_mask']):
             al.append((a, convert(a)))
         return al
@@ -97,7 +98,7 @@ class StridedSlice(Op):
     def infer(node: Node):
         tf_strided_slice_infer(node)
 
-        if node.graph.graph['layout'] == 'NHWC':
+        if node.graph.graph['layout'] == 'NHWC' and node.out_port(0).data.get_value() is None:
             PermuteAttrs.create_permute_attrs(node, attrs=[('shrink_axis_mask', 'input:0', permute_masks),
                                                            ('new_axis_mask', 'input:0', permute_masks),
                                                            ('ellipsis_mask', 'input:0', permute_masks),
index dd3c13f..4923ff9 100644 (file)
@@ -68,9 +68,9 @@ class Unsqueeze(Op):
         for dim in unsqueeze_dims:
             output_shape = np.insert(output_shape, dim, 1)
 
-        node.out_port(0).data.set_shape(int64_array(output_shape))
-
         if input_value is not None:
-            node.out_port(0).data.set_value(np.reshape(input_value, output_shape))
+            node.out_port(0).data.set_value(input_value.reshape(output_shape))
+        else:
+            node.out_port(0).data.set_shape(int64_array(output_shape))
 
         PermuteInputs().set_input_permutation(node.in_node(1), node, 'input:0', 'axis')
index f98c5ec..98f49b8 100644 (file)
@@ -41,12 +41,14 @@ from mo.utils import class_registration
 from mo.utils.cli_parser import get_meta_info
 from mo.utils.error import Error
 from mo.utils.find_inputs import find_inputs
+from mo.utils.logger import log_step
 from mo.utils.utils import refer_to_faq_msg
 
 
 def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str, output_model_name: str,
            output_dir: str, caffe_proto_path: str, mean_file: str = "",
-           mean_file_offsets: tuple = None, custom_layers_mapping_path: str = None):
+           mean_file_offsets: tuple = None, custom_layers_mapping_path:str = None):
+    log_step(argv.steps, 'LOAD')
     meta_info = get_meta_info(argv)
 
     caffe_pb2 = loader.import_caffe_pb2(caffe_proto_path)
@@ -91,7 +93,9 @@ def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str,
     extract_node_attrs(graph, lambda node: caffe_extractor(node, check_for_duplicates(caffe_type_extractors)))
 
     # --------------------------------- LOAD END ------------------------------------------------------
+    log_step(argv.steps, 'FRONT')
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
+    log_step(argv.steps, 'MIDDLE')
     class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
 
     # Mark nodes with attr 'can_be_fused': False to disable fusing for specified nodes
@@ -150,13 +154,14 @@ def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str,
     permute_op_nodes_attrs(graph)
 
     graph_clean_up(graph)
+    log_step(argv.steps, 'BACK')
     class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
 
     remove_const_ops(graph)
     CreateConstNodesReplacement().find_and_replace_pattern(graph)
 
     remove_output_ops(graph)
-
+    log_step(argv.steps, 'EMIT')
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     mean_data=mf,
                     input_names=input_names,
index 8d833ff..cc91fda 100644 (file)
@@ -18,7 +18,9 @@ import logging as log
 import numpy as np
 
 from extensions.back.CreateConstNodes import CreateConstNodesReplacement
+from extensions.back.CutMemory import CutMemory
 from extensions.back.ElementwiseOpsToEltwiseOps import DivideToEltwises, SubtractToEltwises, SimpleEltwiseToEltwiseOp
+from extensions.back.ForceStrictPrecision import ForceStrictPrecision
 from extensions.back.LeakyReluToReluWithNegativeSlope import LeakyReluToReluWithNegativeSlope
 from extensions.back.ParameterToPlaceholder import ParameterToInput
 from extensions.back.TransposeToPermute import TransposeToPermute
@@ -28,10 +30,13 @@ from extensions.front.kaldi.eliminate_redundant_reshape import EliminateRedundan
 from extensions.front.kaldi.fuse_repeated_reshape import FuseRepeatedReshapes
 from extensions.front.kaldi.replace_lstm_node_pattern import ReplaceLSTMNodePattern
 from extensions.middle.EltwiseChecker import EltwiseChecker
-from extensions.middle.RemoveDuplicationMemory import RemoveMemoryDuplicationPattern
+from extensions.middle.InsertSelect import AddSelectBeforeMemoryNodePattern
+from extensions.middle.RemoveDuplicationMemory import RemoveMemoryDuplicationPattern, MergeNeighborSplicePattern
 from extensions.middle.RemoveIdentity import RemoveIdentity
 from extensions.middle.RemoveUselessCrops import RemoveUselessCropsPattern
-from extensions.middle.ReplaceMemoryOffsetWithSplice import ReplaceMemoryOffsetNodePattern, ReplaceMemoryOffsetWithMemoryNodePattern
+from extensions.middle.ReplaceMemoryOffsetWithSplice import ReplaceMemoryOffsetNodePattern, \
+    ReplaceMemoryOffsetWithMemoryNodePattern
+from extensions.middle.ReplacePNorm import ReplacePNormNodePattern
 from extensions.middle.ReplaceSpliceNodePattern import ReplaceSpliceNodePattern
 from mo.front.common.register_custom_ops import update_extractors_with_extensions
 from mo.front.extractor import extract_node_attrs, remove_output_ops
@@ -47,6 +52,7 @@ from mo.utils import class_registration
 from mo.utils.cli_parser import get_meta_info
 from mo.utils.error import Error
 from mo.utils.find_inputs import find_outputs
+from mo.utils.logger import log_step
 from mo.utils.utils import refer_to_faq_msg
 
 
@@ -113,14 +119,15 @@ def apply_biases_to_last_layer(graph, counts):
 
 
 def driver(argv, input_model, output_model_name, output_dir):
+    log_step(argv.steps, 'LOAD')
     meta_info = get_meta_info(argv)
 
     EltwiseChecker.enabled = False
 
     try:
-        graph, input_shapes = load_kaldi_model(input_model)
+        graph = load_kaldi_model(input_model)
     except Exception as e:
-        raise Error('Model Optimizer is not able to read Kaldi model {}. '.format(input_model) +
+        raise Error('Model Optimizer is not able to parse Kaldi model {}. '.format(input_model) +
                     refer_to_faq_msg(91)) from e
     graph.check_empty_graph('load_kaldi_nnet_model')
     graph.graph['cmd_params'] = argv
@@ -136,18 +143,23 @@ def driver(argv, input_model, output_model_name, output_dir):
     extract_node_attrs(graph, lambda node: kaldi_extractor(node))
 
     # --------------------------------- LOAD END ------------------------------------------------------
+    log_step(argv.steps, 'FRONT')
     ReplaceLSTMNodePattern().find_and_replace_pattern(graph)
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
-
+    log_step(argv.steps, 'MIDDLE')
     graph = partial_infer(graph)
 
+    ReplacePNormNodePattern().find_and_replace_pattern(graph)
     ReplaceMemoryOffsetNodePattern().find_and_replace_pattern(graph)
     ReplaceMemoryOffsetWithMemoryNodePattern().find_and_replace_pattern(graph)
     RemoveMemoryDuplicationPattern().find_and_replace_pattern(graph)
+    MergeNeighborSplicePattern().find_and_replace_pattern(graph)
     RemoveUselessCropsPattern().find_and_replace_pattern(graph)
     RemoveIdentity().find_and_replace_pattern(graph)
     graph_clean_up(graph)
 
+    AddSelectBeforeMemoryNodePattern().find_and_replace_pattern(graph)
+
     ReplaceSpliceNodePattern().find_and_replace_pattern(graph)
     graph_clean_up(graph)
 
@@ -171,7 +183,7 @@ def driver(argv, input_model, output_model_name, output_dir):
         log.debug("After removing softmax")
         graph.print_graph_stat()
 
-    ParameterToInput().find_and_replace_pattern(graph)
+    log_step(argv.steps, 'BACK')
     LeakyReluToReluWithNegativeSlope().find_and_replace_pattern(graph)
     TransposeToPermute().find_and_replace_pattern(graph)
     DivideToEltwises().find_and_replace_pattern(graph)
@@ -180,10 +192,17 @@ def driver(argv, input_model, output_model_name, output_dir):
     for_graph_and_each_sub_graph_recursively(graph, convert_matmul_to_fully_connected)
 
     # Intentionally after all transformations
+    if argv.remove_memory:
+        CutMemory().find_and_replace_pattern(graph)
+        graph_clean_up(graph)
+    ParameterToInput().find_and_replace_pattern(graph)
+
     KaldiRemoveMemoryOutputBackReplacementPattern().find_and_replace_pattern(graph)
+    ForceStrictPrecision().find_and_replace_pattern(graph)
     remove_const_ops(graph)
     CreateConstNodesReplacement().find_and_replace_pattern(graph)
 
     remove_output_ops(graph)
+    log_step(argv.steps, 'EMIT')
     prepare_emit_ir(graph, argv.data_type, output_dir, output_model_name, meta_info=meta_info)
     return 0
index e9f6ac8..db0bcbc 100644 (file)
@@ -16,6 +16,7 @@
 from extensions.back.CreateConstNodes import CreateConstNodesReplacement
 from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
 from mo.utils.error import Error, FrameworkError
+from mo.utils.logger import log_step
 from mo.utils.utils import refer_to_faq_msg
 
 try:
@@ -50,6 +51,7 @@ from extensions.middle.EltwiseInputNormalization import EltwiseInputNormalize
 
 
 def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, output_dir: str):
+    log_step(argv.steps, 'LOAD')
     meta_info = get_meta_info(argv)
 
     try:
@@ -88,7 +90,9 @@ def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, o
     extract_node_attrs(graph, mxnet_op_extractor)
 
     # --------------------------------- LOAD END ------------------------------------------------------
+    log_step(argv.steps, 'FRONT')
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
+    log_step(argv.steps, 'MIDDLE')
     class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
 
     fuse_pad(graph)
@@ -142,6 +146,7 @@ def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, o
     permute_op_nodes_attrs(graph)
 
     graph_clean_up(graph)
+    log_step(argv.steps, 'BACK')
     class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
 
     for_graph_and_each_sub_graph_recursively(graph, remove_const_ops)
@@ -149,6 +154,7 @@ def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, o
 
     for_graph_and_each_sub_graph_recursively(graph, remove_output_ops)
 
+    log_step(argv.steps, 'EMIT')
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     meta_info=meta_info)
     return 0
index 9a4af95..4ef9ea6 100644 (file)
@@ -48,10 +48,12 @@ from mo.pipeline.common import prepare_emit_ir
 from mo.utils import class_registration
 from mo.utils.cli_parser import get_meta_info
 from mo.utils.error import Error
+from mo.utils.logger import log_step
 from mo.utils.utils import refer_to_faq_msg
 
 
 def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: str, output_dir: str):
+    log_step(argv.steps, 'LOAD')
     meta_info = get_meta_info(argv)
 
     model_proto = load_onnx_model(model_file_name)
@@ -92,7 +94,9 @@ def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: st
     extract_node_attrs(graph, lambda node: onnx_op_extractor(node, check_for_duplicates(onnx_op_extractors)))
 
     # --------------------------------- LOAD END ------------------------------------------------------
+    log_step(argv.steps, 'FRONT')
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
+    log_step(argv.steps, 'MIDDLE')
     class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
 
     fuse_pad(graph)
@@ -164,6 +168,8 @@ def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: st
     permute_op_nodes_attrs(graph)
 
     graph_clean_up_onnx(graph)
+
+    log_step(argv.steps, 'BACK')
     class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
 
     for_graph_and_each_sub_graph_recursively(graph, remove_const_ops)
@@ -172,6 +178,7 @@ def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: st
 
     for_graph_and_each_sub_graph_recursively(graph, remove_output_ops)
 
+    log_step(argv.steps, 'EMIT')
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     meta_info=meta_info)
 
index e024bf7..65bab2a 100644 (file)
@@ -51,6 +51,7 @@ from mo.pipeline.common import prepare_emit_ir
 from mo.utils import class_registration, tensorboard
 from mo.utils.cli_parser import get_meta_info
 from mo.utils.error import Error
+from mo.utils.logger import log_step
 from mo.utils.utils import refer_to_faq_msg
 
 try:
@@ -66,6 +67,7 @@ def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str
     The specific TF structure assumes each GraphDef node is converted to a single
     NetworkX node, node id is an original TF node name, and edges go directly from one op   to another op.
     """
+    log_step(argv.steps, 'LOAD')
     meta_info = get_meta_info(argv)
 
     if argv.tensorflow_custom_layer_libraries:
@@ -130,7 +132,9 @@ def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str
     extract_node_attrs(graph, lambda node: tf_op_extractor(node, check_for_duplicates(tf_op_extractors)))
 
     # --------------------------------- LOAD END ------------------------------------------------------
+    log_step(argv.steps, 'FRONT')
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
+    log_step(argv.steps, 'MIDDLE')
     class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
 
     fuse_pad(graph)
@@ -218,6 +222,8 @@ def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str
     for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
     graph.graph['layout'] = 'NCHW'
+
+    log_step(argv.steps, 'BACK')
     class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
     for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
@@ -226,6 +232,7 @@ def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str
 
     for_graph_and_each_sub_graph_recursively(graph, remove_output_ops)
 
+    log_step(argv.steps, 'EMIT')
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     meta_info=meta_info)
 
index 3e35319..bbe78b3 100644 (file)
@@ -210,13 +210,13 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                                        'DEBUG', 'NOTSET'],
                               default='ERROR')
     common_group.add_argument('--input',
-                              help='Comma-separated list of input nodes names with shapes ' +
-                                   'and values for freezing. '+
-                                   'For example, use the following format to set input port <port1> ' +
-                                   'of the node <node_name1> with the shape <shape1> as an input node and ' +
-                                   'freeze output port <port2> of the node <node_name2> with the value <value2> ' +
-                                   'and the shape <shape2>: ' +
-                                   'port1:node_name1[shape1], node_name2:port2[shape2]->value2.')
+                              help='Quoted list of comma-separated input nodes names with shapes ' +
+                                   'and values for freezing. The shape and value are specified as space-separated lists. '+
+                                   'For example, use the following format to set input port 0 ' +
+                                   'of the node `node_name1` with the shape [3 4] as an input node and ' +
+                                   'freeze output port 1 of the node `node_name2` with the value [20 15] ' +
+                                   'and the shape [2]: ' +
+                                   '"0:node_name1[3 4],node_name2:1[2]->[20 15]".')
     common_group.add_argument('--output',
                               help='The name of the output operation of the model. ' +
                                    'For TensorFlow*, do not add :0 to this name.')
@@ -301,6 +301,10 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                               help='[ Experimental feature ] Enables `Shape` operation with all children keeping. '
                                    'This feature makes model reshapable in Inference Engine',
                               action='store_true', default=False)
+    common_group.add_argument('--steps',
+                              help='Enables model conversion steps display',
+                              action='store_true', default=False)
+
     return parser
 
 
@@ -369,7 +373,8 @@ def get_mxnet_cli_options():
 def get_kaldi_cli_options():
     d = {
         'counts': '- A file name with full path to the counts file',
-        'remove_output_softmax': '- Removes the SoftMax layer that is the output layer'
+        'remove_output_softmax': '- Removes the SoftMax layer that is the output layer',
+        'remove_memory': '- Removes the Memory layer and use additional inputs and outputs instead'
     }
 
     return OrderedDict(sorted(d.items(), key=lambda t: t[0]))
@@ -564,6 +569,11 @@ def get_kaldi_cli_parser(parser: argparse.ArgumentParser = None):
     kaldi_group.add_argument("--remove_output_softmax",
                              help="Removes the SoftMax layer that is the output layer",
                              action='store_true')
+
+    kaldi_group.add_argument("--remove_memory",
+                             help="Removes the Memory layer and use additional inputs outputs instead",
+                             action='store_true',
+                             default=False)
     return parser
 
 
index d7d28e7..3ff74fb 100644 (file)
@@ -25,9 +25,12 @@ class BasicError(Exception):
     """
 
     def __str__(self):
+        cause = ""
+        if self.__cause__:
+            cause = self.__cause__.__str__() + '\n'
         if len(self.args) <= 1:
-            return Exception.__str__(self)
-        return self.args[0].format(*self.args[1:])  # pylint: disable=unsubscriptable-object
+            return cause + Exception.__str__(self)
+        return cause + self.args[0].format(*self.args[1:])  # pylint: disable=unsubscriptable-object
 
 
 class FrameworkError(BasicError):
index dfaaf9c..51cfdd3 100644 (file)
@@ -199,7 +199,8 @@ def invert_sub_graph_between_nodes(graph: Graph, start_nodes: list, end_nodes: l
     while len(d) != 0:
         cur_node_name = d.popleft()
         sub_graph_nodes.append(cur_node_name)
-        if cur_node_name not in start_nodes and detect_extra_start_node(Node(graph, cur_node_name)):
+        if cur_node_name not in start_nodes and \
+                detect_extra_start_node is not None and detect_extra_start_node(Node(graph, cur_node_name)):
             extra_start_nodes.append(cur_node_name)
         else:
             if cur_node_name not in end_nodes:  # do not add output nodes of the end_nodes
index 51bc390..ded44a1 100644 (file)
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
+import importlib
 import logging as log
 import os
 import re
 
+# WA for abseil bug that affects logging while importing TF starting 1.14 version
+# Link to original issue: https://github.com/abseil/abseil-py/issues/99
+if importlib.util.find_spec('absl') is not None:
+    import absl.logging
+    log.root.removeHandler(absl.logging._absl_handler)
+
 handler_num = 0
 
 
@@ -77,3 +83,16 @@ def init_logger(lvl: str, silent: bool):
     if handler_num == 0:
         logger.addHandler(handler)
         handler_num += 1
+
+
+def log_step(flag, step):
+    messages = {
+        'LOAD': 'Model loading step',
+        'FRONT': 'Front phase execution step',
+        'MIDDLE': 'Middle phase execution step',
+        'BACK': 'Back phase execution step',
+        'EMIT': 'IR emitting step',
+    }
+    if flag:
+        assert step in messages.keys()
+        print('[ INFO ] {}'.format(messages[step]))
index 5352db3..0ebd8a1 100644 (file)
@@ -46,10 +46,13 @@ mapping_rules = [
     ('multiscale_anchor_generator_aspect_ratios', 'anchor_generator/multiscale_anchor_generator/aspect_ratios'),
     ('multiscale_anchor_generator_scales_per_octave', 'anchor_generator/multiscale_anchor_generator/scales_per_octave'),
     # SSD anchor generator attributes
-    ('ssd_anchor_generator_min_scale', 'anchor_generator/ssd_anchor_generator/min_scale'),
-    ('ssd_anchor_generator_max_scale', 'anchor_generator/ssd_anchor_generator/max_scale'),
+    ('ssd_anchor_generator_min_scale', 'anchor_generator/ssd_anchor_generator/min_scale', 0.2),
+    ('ssd_anchor_generator_max_scale', 'anchor_generator/ssd_anchor_generator/max_scale', 0.95),
     ('ssd_anchor_generator_num_layers', 'anchor_generator/ssd_anchor_generator/num_layers'),
     ('ssd_anchor_generator_aspect_ratios', 'anchor_generator/ssd_anchor_generator/aspect_ratios'),
+    ('ssd_anchor_generator_scales', 'anchor_generator/ssd_anchor_generator/scales'),
+    ('ssd_anchor_generator_interpolated_scale_aspect_ratio',
+     'anchor_generator/ssd_anchor_generator/interpolated_scale_aspect_ratio', 1.0),
     ('ssd_anchor_generator_reduce_lowest', 'anchor_generator/ssd_anchor_generator/reduce_boxes_in_lowest_layer'),
     ('ssd_anchor_generator_base_anchor_height', 'anchor_generator/ssd_anchor_generator/base_anchor_height', 1.0),
     ('ssd_anchor_generator_base_anchor_width', 'anchor_generator/ssd_anchor_generator/base_anchor_width', 1.0),
index 6c8e19b..747a5ce 100644 (file)
@@ -145,6 +145,9 @@ class TestingSimpleProtoParser(unittest.TestCase):
                            'anchor_generator_width': 256,
                            'anchor_generator_height_stride': 16,
                            'anchor_generator_width_stride': 16,
+                           'ssd_anchor_generator_min_scale': 0.2,
+                           'ssd_anchor_generator_max_scale': 0.95,
+                           'ssd_anchor_generator_interpolated_scale_aspect_ratio': 1.0,
                            }
         os.unlink(file_name)
         self.assertDictEqual(pipeline_config._model_params, expected_result)
index fd5a305..0d2cd53 100644 (file)
@@ -234,9 +234,14 @@ def build_graph_with_edge_attrs(nodes_attrs: dict, edges: list, update_attribute
 
 
 def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref=None, check_op_attrs=False):
+    from mo.utils.unittest.ir_engine import IREngine
+    stderr = []
     if last_node_ref is None:
         last_node_ref = last_node
 
+    if 'statistics' in graph.graph and 'statistics' in graph_ref.graph:
+        assert graph.graph['statistics'] == graph_ref.graph['statistics'], "int8 statistics comparison failed"
+
     q = deque([last_node])
     q_ref = deque([last_node_ref])
 
@@ -245,7 +250,8 @@ def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref
 
     while len(q_ref) != 0:
         if len(q) == 0:
-            return False, 'Graphs have different number of nodes'
+            stderr.append('Graphs have different number of nodes')
+            return False, stderr
         node = Node(graph, q.popleft())
         node_ref = Node(graph_ref, q_ref.popleft())
 
@@ -254,25 +260,28 @@ def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref
 
         # Check that nodes has same amount of output nodes
         if len(node_ref.out_nodes()) != len(node.out_nodes()):
-            return False, 'Current node "{}" and reference node "{}" have different amount of output nodes: {} vs {}'.\
-                format(node.id, node_ref.id, len(node_ref.out_nodes()), len(node.out_nodes()))
+            stderr.append('Current node "{}" and reference node "{}" have different amount of output nodes: {} vs {}'.\
+                          format(node.id, node_ref.id, len(node_ref.out_nodes()), len(node.out_nodes())))
+            return False, stderr
 
         # Check that nodes has same amount of input nodes
         if len(node_ref.in_nodes()) != len(node.in_nodes()):
-            return False, 'Current node "{}" and reference node "{}" have different amount of input nodes: {} vs {}'.\
-                format(node.id, node_ref.id, len(node_ref.in_nodes()), len(node.in_nodes()))
+            stderr.append('Current node "{}" and reference node "{}" have different amount of input nodes: {} vs {}'.\
+                          format(node.id, node_ref.id, len(node_ref.in_nodes()), len(node.in_nodes())))
+            return False, stderr
 
         # Check that nodes has same 'kind'
         if node_ref.kind != node.kind:
-            return False, 'Current node "{}" and reference node "{}" have different kind parameter'.\
-                format(node.id, node_ref.id)
+            stderr.append('Current node "{}" and reference node "{}" have different kind parameter'.\
+                          format(node.id, node_ref.id))
+            return False, stderr
 
         # Check can_be_fused attr
         if node_ref.has_valid('can_be_fused'):
             if node_ref.soft_get('can_be_fused') != node.soft_get('can_be_fused'):
-                return False, 'Current node "{}" and reference node "{}" have different "can_be_fused" parameter ' \
+                stderr.append('Current node "{}" and reference node "{}" have different "can_be_fused" parameter ' \
                               '{} and {}'.format(node.id, node_ref.id, node.soft_get('can_be_fused'),
-                                                 node_ref.soft_get('can_be_fused'))
+                                                 node_ref.soft_get('can_be_fused')))
 
         if node_ref.kind == 'op':
             # Check that nodes has same operation
@@ -282,41 +291,53 @@ def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref
                                                                              'infer', 'IE']:
                         continue
                     if attr not in graph.node[node.id]:
-                        return False, 'Current node "{}" has missing attribute {}'.format(node.id, attr)
+                        stderr.append('Current node "{}" has missing attribute {}'.format(node.id, attr))
+                        continue
 
                     if type(graph_ref.node[node_ref.id][attr]) in [np.ndarray, list]:
                         if not np.array_equal(graph.node[node.id][attr], graph_ref.node[node_ref.id][attr]):
-                            return False, 'Current node "{}" and reference node "{}" have different attr "{}" : ' \
+                            stderr.append('Current node "{}" and reference node "{}" have different attr "{}" : ' \
                                           '{} and {}'.format(node.id, node_ref.id, attr, graph.node[node.id][attr],
-                                                             graph_ref.node[node_ref.id][attr])
+                                                             graph_ref.node[node_ref.id][attr]))
                     elif isinstance(graph.node[node.id][attr], Number):
                         eps = 5e-2 if node.has('precision') and node['precision'] == 'FP16' else 1e-4
                         if abs(graph.node[node.id][attr] - graph_ref.node[node_ref.id][attr]) > eps:
-                            return False, '{} and {} has different attr {} : {} and {}'.format(
-                                node.id, node_ref.id, attr, graph.node[node.id][attr],
-                                graph_ref.node[node_ref.id][attr])
+                            stderr.append('{} and {} has different attr {} : {} and {}'.format(
+                                          node.id, node_ref.id, attr, graph.node[node.id][attr],
+                                          graph_ref.node[node_ref.id][attr]))
+                    elif isinstance(graph.node[node.id][attr], IREngine):
+                        resp, err_log = graph.node[node.id][attr].compare(graph_ref.node[node_ref.id][attr])
+                        if not resp:
+                            stderr.extend(err_log)
                     elif graph.node[node.id][attr] != graph_ref.node[node_ref.id][attr]:
-                        return False, 'Current node "{}" and reference node "{}" have different attr "{}" : {} and {}'.format(
-                            node.id, node_ref.id, attr, graph.node[node.id][attr],
-                            graph_ref.node[node_ref.id][attr])
+                        stderr.append('Current node "{}" and reference node "{}" have different attr "{}" : {} and {}'.format(
+                                      node.id, node_ref.id, attr, graph.node[node.id][attr],
+                                      graph_ref.node[node_ref.id][attr]))
 
         else:
             if node_ref.has_valid('shape') and not node.has_valid('shape'):
-                return False, '{} has None shape'.format(node.id)
+                stderr.append('{} has None shape'.format(node.id))
             if node_ref.has_valid('value') and not node.has_valid('value'):
-                return False, '{} has None value'.format(node.id)
+                stderr.append('{} has None value'.format(node.id))
 
             # Check that nodes has same shape and value
             if node_ref.has_valid('shape') and node_ref.shape is not None and not np.array_equal(node_ref.shape,
                                                                                                  node.shape):
-                return False, 'Current node "{}" and reference node "{}" have different shapes {} and {}'.\
-                    format(node.id, node_ref.id, node.shape, node_ref.shape)
+                stderr.append('Current node "{}" and reference node "{}" have different shapes {} and {}'.\
+                              format(node.id, node_ref.id, node.shape, node_ref.shape))
 
             if node_ref.has_valid('value') and node_ref.value is not None:
-                eps = 5e-2 if np.asarray(node.value).dtype == 'float16' else 1e-4
+                dtype = np.asarray(node.value).dtype
+                if dtype == 'uint8':
+                    eps = 0
+                elif dtype == 'float16':
+                    eps = 5e-2
+                else:
+                    eps = 1e-4
+
                 if not np.allclose(node_ref.value, node.value, rtol=eps, atol=eps):
-                    return False, 'Current node "{}" and reference node "{}" have different values \n{} \nand \n{}'.\
-                        format(node.id, node_ref.id, node.value, node_ref.value)
+                    stderr.append('Current node "{}" and reference node "{}" have different values \n{} \nand \n{}'.\
+                                  format(node.id, node_ref.id, node.value, node_ref.value))
         ports = sorted(node.in_nodes().keys()) if node.kind == 'op' else None
         in_nodes = [node.in_node(k) for k in ports] if node.kind == 'op' else node.in_nodes()
         for in_node in in_nodes:
@@ -325,7 +346,8 @@ def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref
 
         ports_ref = sorted(node_ref.in_nodes().keys()) if node_ref.kind == 'op' else None
         if ports != ports_ref:
-            return False, 'Current node "{}" and reference node "{}" have different ports'.format(node.id, node_ref.id)
+            stderr.append('Current node "{}" and reference node "{}" have different ports'.format(node.id, node_ref.id))
+            return False, stderr
 
         in_nodes = [node_ref.in_node(k) for k in ports] if node_ref.kind == 'op' else node_ref.in_nodes()
         for in_node in in_nodes:
@@ -342,14 +364,26 @@ def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref
             if out_node.id not in checked_nodes_ref and out_node.id not in q_ref:
                 q_ref.append(out_node.id)
 
-    return True, ''
+    return (False, '\n'.join(stderr)) if stderr else (True, [])
+
+
+class FakeAttr:
+    def __init__(self, **kwargs):
+        self.__dict__.update(kwargs)
+
+    def __setitem__(self, key, value):
+        setattr(self, key, value)
+
+    def __getitem__(self, item):
+        return getattr(self, item)
 
 
 class FakeNode:
     def __init__(self, pl, ml):
         self.pb = pl
         self.model_pb = ml
-        self.graph = None
+        self.graph = FakeAttr()
+        self.graph.graph = {}
         self.update_node = lambda: None
 
     def __setitem__(self, key, value):
diff --git a/model-optimizer/mo/utils/unittest/ir_engine.py b/model-optimizer/mo/utils/unittest/ir_engine.py
new file mode 100644 (file)
index 0000000..99347d4
--- /dev/null
@@ -0,0 +1,332 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import hashlib
+import os
+import xml.etree.ElementTree as ET
+from collections import namedtuple, defaultdict
+from pathlib import Path
+
+import networkx as nx
+import numpy as np
+import logging as log
+import sys
+
+from mo.graph.graph import Node, Graph
+from mo.utils.unittest.graph import compare_graphs
+
+log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG, stream=sys.stdout)
+
+class IREngine(object):
+    def __init__(self, path_to_xml: str, path_to_bin=None, precision="FP32", xml_tree=None):
+        if not xml_tree and not os.path.exists(path_to_xml):
+            raise AttributeError("File {} do not exists!".format(path_to_xml))
+
+        if path_to_bin and not os.path.exists(path_to_bin):
+            raise AttributeError("File {} do not exists!".format(path_to_bin))
+
+        self.path_to_xml = str(path_to_xml)
+        self.path_to_bin = str(path_to_bin) if path_to_bin else None
+        self.xml_tree = xml_tree
+        self.input_node = None
+
+        if precision.upper() not in ['FP32', 'FP16']:
+            raise AttributeError("Precision {} is not supported!".format(precision))
+        self.__load_ir()
+
+    def __load_xml(self):
+        xml_tree = self.xml_tree or ET.parse(self.path_to_xml)
+        xml_root = xml_tree.getroot()
+        xml_layers = {}
+        xml_edges = []
+        statistics = {}
+
+        Edge = namedtuple('edge', ['from_layer', 'from_port', 'to_layer', 'to_port'])
+
+        # Create graph with operations only
+        self.graph = Graph()
+        self.graph.graph['hashes'] = {}
+
+        # Parse XML
+        for child in xml_root:
+            if child.tag == 'layers':
+                for layer in child:
+                    layer_id, layer_attrs = self.__load_layer(layer)
+                    xml_layers.update({layer_id: layer_attrs})
+            elif child.tag == 'edges':
+                for edge in child:
+                    xml_edges.append(Edge(edge.attrib['from-layer'], int(edge.attrib['from-port']),
+                                          edge.attrib['to-layer'], int(edge.attrib['to-port'])))
+            elif child.tag == 'statistics':
+                layers = child.findall('layer')
+                for layer in layers:
+                    statistics[layer.find('name').text] = {'min': layer.find('min').text, 'max': layer.find('max').text}
+
+        self.graph.graph['statistics'] = statistics
+
+        for layer in xml_layers.keys():
+            self.graph.add_node(layer, **xml_layers[layer])
+
+        for edge in xml_edges:
+            self.graph.add_edges_from(
+                [(edge.from_layer, edge.to_layer, {'from_port': edge.from_port, 'to_port': edge.to_port})])
+
+        # Insert data nodes between op nodes and insert data nodes with weights
+        nodes = list(self.graph.nodes())
+        for node in nodes:
+            out_edges = Node(self.graph, node).get_outputs()
+            data_nodes = {}
+            for port in self.graph.node[node]['ports']:
+                data = self.graph.unique_id(prefix='data_')
+                self.graph.add_node(data, **{'kind': 'data', 'shape': self.graph.node[node]['ports'][port],
+                                             'value': None})
+                self.graph.add_edges_from([(node, data, {'out': port})])
+                data_nodes.update({port: data})
+
+            for out_node, edge_attrs in out_edges:
+                self.graph.remove_edge(node, out_node)
+                if edge_attrs['from_port'] in data_nodes:
+                    data = data_nodes[edge_attrs['from_port']]
+                else:
+                    raise RuntimeError("SMTH wrong with IR! There is an edge from not existing port")
+                self.graph.add_edges_from([(data, out_node, {'in': edge_attrs['to_port']})])
+
+    def __load_bin(self):
+        bin_buff = np.fromfile(file=self.path_to_bin, dtype=np.uint8)
+        graph = self.graph
+        nodes = [node for node in graph.nodes()]
+        hashes = defaultdict(dict)
+        for node in nodes:
+            for w in ['weights', 'biases', 'custom']:
+                if w in graph.node[node]:
+                    data = graph.unique_id(prefix='data_')
+                    offset, size, in_port, precision = graph.node[node][w]
+                    if Node(graph, node).soft_get('type') == 'BinaryConvolution':
+                        precision = np.uint8
+                    value = np.frombuffer(buffer=bin_buff, dtype=precision, count=size, offset=offset)
+                    hashes[graph.node[node]['name']][w] = hashlib.sha512(value.tobytes()).hexdigest()
+                    graph.add_node(data, **{'kind': 'data', 'value': value, 'shape': value.shape})
+                    graph.add_edges_from([(data, node, {'in': in_port})])
+        self.graph.graph['hashes'].update(hashes)
+
+    def __load_bin_hashes(self):
+        graph = self.graph
+        bin_hash_map = {name: blob_map.item(0) for name, blob_map in dict(np.load(self.path_to_bin,
+                                                                                  allow_pickle=True)).items()}
+
+        for node in graph.nodes():
+            for w in ['weights', 'biases', 'custom']:
+                if w in graph.node[node]:
+                    assert Node(graph, node).has_valid('name')
+                    node_name = Node(graph, node).name
+                    assert node_name in bin_hash_map and w in bin_hash_map[node_name]
+                    graph.node[node]['hashes'] = bin_hash_map[node_name][w]
+
+
+    def __load_ir(self):
+        self.__load_xml()
+        if not self.path_to_bin:
+            return
+
+        if self.path_to_bin.endswith('.bin.hashes.npz'):
+            self.__load_bin_hashes()
+        else:
+            self.__load_bin()
+
+    def __load_layer(self, layer):
+        """
+            Layer example
+
+            <layer id="1" name="862" precision="FP32" type="Convolution">
+                <data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="5" output="32" pad-b="0" pad-r="2" pad-x="2" pad-y="0" stride-x="1" stride-y="1"/>
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>3</dim>
+                        <dim>32</dim>
+                        <dim>32</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="3">
+                        <dim>1</dim>
+                        <dim>32</dim>
+                        <dim>32</dim>
+                        <dim>32</dim>
+                    </port>
+                </output>
+                <blobs>
+                    <weights offset="0" size="1920"/>
+                    <biases offset="1920" size="128"/>
+                </blobs>
+            </layer>
+
+        """
+
+        layer_id = layer.attrib['id']
+
+        layer_attrs = layer.attrib
+        layer_attrs.update({'ports': {}, 'kind': 'op'})
+
+        inputs_counter = 0
+
+        for attr in layer:
+            if attr.tag == 'data':
+                layer_attrs.update(IREngine.__normalize_attrs(attr.attrib))
+            elif attr.tag == 'input':
+                inputs_counter = len(attr)
+            elif attr.tag == 'output':
+                output = attr
+                for port in output:
+                    port_id = int(port.attrib['id'])
+                    output_shape = []
+                    for dim in port:
+                        output_shape.append(int(dim.text))
+
+                    layer_attrs['ports'].update({port_id: output_shape})
+            elif attr.tag == 'blobs':
+                in_port = inputs_counter
+                precision = layer.attrib['precision']
+                precision_map = {
+                    'FP32': (4, np.float32),
+                    'FP16': (2, np.float16),
+                    'I64': (8, np.int64),
+                    'I32': (4, np.int32),
+                }
+                type_size, dtype = precision_map[precision]
+                for blob_attr in attr:
+                    layer_attrs.update({blob_attr.tag: (int(blob_attr.attrib['offset']),
+                                                        int(blob_attr.attrib['size']) // type_size,
+                                                        in_port,
+                                                        dtype)})
+                    in_port += 1
+            elif attr.tag == 'body':
+                xml_body_child = list(layer.iterfind('body'))
+                assert len(xml_body_child) == 1
+
+                body_ir = IREngine(path_to_xml=None,
+                                   path_to_bin=self.path_to_bin,
+                                   xml_tree=ET.ElementTree(xml_body_child[0]))
+                self.graph.graph['hashes'].update(body_ir.graph.graph['hashes'])
+
+                # Find port_map section and take an input with axis specified - this will be out input_layer for body
+                xml_port_map = list(layer.iterfind('port_map'))
+                if not len(xml_port_map) == 1:
+                    log.warning("TensorIterator body won\'t be compared due to missing port_map section!")
+                    continue
+                xml_port_map = xml_port_map[0]
+
+                input_layers = []
+                for input in xml_port_map:
+                    if input.tag == 'input' and 'axis' in input.attrib:
+                        if 'internal_layer_id' not in input.attrib:
+                            log.warning("internal_layer_id attrib not found in input section")
+                        else:
+                            input_layers.append(Node(body_ir.graph, input.attrib['internal_layer_id']))
+
+                if len(input_layers) != 1:
+                    log.warning("TensorIterator body won\'t be compared due to the number of inputs in body != 1 "
+                                "({})".format(len(input_layers)))
+                else:
+                    body_ir.input_node = input_layers[0]
+                    layer_attrs.update({'body': body_ir})
+
+        return layer_id, layer_attrs
+
+    @staticmethod
+    def __normalize_attrs(attrs: dict):
+        """
+        Normalize attributes for type 'data'.
+        Replace " from values (not used right now) and make list of value with int, float or other types values.
+        Example: {'order': '1,0,2'} -> {'order': [1, 0, 2]}
+                 {'order': '1'}     -> {'order': 1}
+        """
+        normalized_attrs = {}
+        for attr, value in attrs.items():
+            value = value.replace('\"', '')
+            value = value.split(',')
+            n_value = []
+            for val in value:
+                if val.isdigit():
+                    n_value.append(int(val))
+                elif IREngine.__isfloat(val):
+                    n_value.append(float(val))
+                else:
+                    n_value.append(val)
+
+            if len(n_value) == 1:
+                normalized_attrs.update({attr: n_value[0]})
+            else:
+                normalized_attrs.update({attr: n_value})
+
+        return normalized_attrs
+
+    @staticmethod
+    def __isfloat(value):
+        try:
+            float(value)
+            return True
+        except ValueError:
+            return False
+
+    @staticmethod
+    def __find_input(graph):
+        inputs = []
+        for node in sorted(graph.nodes()):
+            node = Node(graph, node)
+            if node.has_valid('type') and node.type == 'Input':
+                inputs.append(node)
+
+        if len(inputs) < 1:
+            raise RuntimeError("Graph {} has less than one input node")
+
+        return inputs
+
+    def compare(self, ref_net):
+        if not isinstance(ref_net, IREngine):
+            ir_input = self.__find_input(self.graph)[0]
+            ref_input = self.__find_input(ref_net)[0]
+            ref_graph = ref_net
+        else:
+            ir_input = self.input_node or self.__find_input(self.graph)[0]
+            ref_input = ref_net.input_node or ref_net.__find_input(ref_net.graph)[0]
+            ref_graph = ref_net.graph
+        # TODO check that ir_input[0].id and ref_input[0].id are the same
+        result, stderr = compare_graphs(graph=self.graph, graph_ref=ref_graph, last_node=ir_input.id,
+                                        last_node_ref=ref_input.id, check_op_attrs=True)
+        return result, stderr
+
+    def generate_bin_hashes_file(self, path_for_file=None):
+        # This function creates file with extension '.bin.hashes.npz' where hashes of bin exists.
+        # For creating this file in custom filder use attribute path_for_file.
+        # Where directory for file should be existed
+        graph = self.graph
+        if path_for_file is None:
+            path_for_file = str(Path(self.path_to_xml).with_suffix('.bin.hashes.npz'))
+        assert 'hashes' in graph.graph, "Loaded IR graph doesn't contain `hashes`: {}".format(self.path_to_xml)
+        np.savez_compressed(path_for_file, **graph.graph['hashes'])
+        return path_for_file
+
+    def get_inputs(self):
+        # Function return input nodes in dictionary: {input_node_name: input_node_shape, ...}
+        input_nodes = self.__find_input(self.graph)
+        return {input_node.name: input_node.out_node().shape for input_node in input_nodes}
+
+    def __eq__(self, other):
+        # To call this function create two IREngine objects (IR1, IR2) and compare them IR1 == IR2
+        if not isinstance(other, IREngine):
+            raise AttributeError("IREngine can be compared only with IREngine object type")
+        return self.compare(other)[0]
\ No newline at end of file
diff --git a/model-optimizer/mo/utils/unittest/ir_engine_test.py b/model-optimizer/mo/utils/unittest/ir_engine_test.py
new file mode 100644 (file)
index 0000000..fd87373
--- /dev/null
@@ -0,0 +1,136 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+import logging as log
+import sys
+from generator import generator, generate
+import os
+
+from mo.utils.unittest.ir_engine import IREngine
+from mo.graph.graph import Graph, Node
+
+log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.DEBUG, stream=sys.stdout)
+
+
+@generator
+class TestFunction (unittest.TestCase):
+    def setUp(self):
+        self.xml = os.path.join(os.path.dirname(__file__),
+                                "./test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.xml")
+        self.xml_negative = os.path.join(os.path.dirname(__file__),
+                                "./test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6_negative.xml")
+        self.bin = os.path.splitext(self.xml)[0] + '.bin'
+        self.assertTrue(os.path.exists(self.xml), 'XML file not found: {}'.format(self.xml))
+        self.assertTrue(os.path.exists(self.bin), 'BIN file not found: {}'.format(self.bin))
+
+        self.IR = IREngine(path_to_xml=str(self.xml), path_to_bin=str(self.bin))
+        self.IR_ref = IREngine(path_to_xml=str(self.xml), path_to_bin=str(self.bin))
+        self.IR_negative = IREngine(path_to_xml=str(self.xml_negative), path_to_bin=str(self.bin))
+
+    @generate(*[(4.4, True), ('aaaa', False)])
+    def test_is_float(self, test_data, result):
+        test_data = test_data
+        self.assertEqual(IREngine._IREngine__isfloat(test_data), result,
+                         "Function __isfloat is not working with value: {}".format(test_data))
+        log.info('Test for function __is_float passed wit value: {}, expected result: {}'.format(test_data, result))
+
+    # TODO add comparison not for type IREngine
+    def test_compare(self):
+        flag, msg = self.IR.compare(self.IR_ref)
+        self.assertTrue(flag, 'Comparing false, test compare function failed')
+        log.info('Test for function compare passed')
+
+    def test_comare_negative(self):
+        # Reference data for test:
+        reference_msg = 'Current node "2" and reference node "2" have different attr "type" : Const and Input'
+        # Check function:
+        flag, msg = self.IR.compare(self.IR_negative)
+        self.assertFalse(flag, 'Comparing flag failed, test compare function failed')
+        self.assertEqual(msg, reference_msg, 'Comparing message failes, test compare negative failed')
+
+        log.info('Test for function compare passed')
+
+    def test_find_input(self):
+        # Create references for this test:
+        ref_nodes = [Node(self.IR.graph, '0')]
+        # Check function:
+        a = IREngine._IREngine__find_input(self.IR.graph)
+        self.assertTrue(a == ref_nodes, 'Error')
+
+    def test_get_inputs(self):
+        # Reference data for test:
+        ref_input_dict = {'data': [1, 10, 16]}
+        # Check function:
+        inputs_dict = self.IR.get_inputs()
+        # is_equal = compare_dictionaries(ref_input_dict, inputs_dict)
+        self.assertTrue(ref_input_dict == inputs_dict, 'Test on function get_inputs failed')
+        log.info('Test for function get_inputs passed')
+
+    def test_eq_function(self):
+        self.assertTrue(self.IR == self.IR_ref, 'Comparing false, test eq function failed')
+        log.info('Test for function eq passed')
+
+    def test_generate_bin_hashes_file(self):
+        # Generate bin_hashes file in default directory
+        path_for_file = self.IR.generate_bin_hashes_file()
+        self.assertTrue(os.path.exists(path_for_file),
+                        'File with hashes not exists: {}. '
+                        'Test for function generate_bin_hashes_file failed'.format(path_for_file))
+        log.info('Test for function generate_bin_hashes_file with default folder passed')
+
+    def test_generate_bin_hashes_file_custom_directory(self):
+        # Generate bin_hashes file in custom directory
+        directory_for_file = os.path.join(os.path.dirname(__file__), 'test_data/bin_hash/')
+        if not os.path.exists(directory_for_file):
+            os.mkdir(directory_for_file)
+        path_for_file_2 = self.IR.generate_bin_hashes_file(path_for_file=directory_for_file)
+        self.assertTrue(os.path.exists(path_for_file_2),
+                        'File with hashes not exists: {}. '
+                        'Test for function generate_bin_hashes_file failed'.format(path_for_file_2))
+        log.info('Test for function generate_bin_hashes_file with custom folder passed')
+
+    @generate(*[({'order': '1,0,2'}, {'order': [1, 0, 2]}),
+                ({'order': '1'}, {'order': 1})])
+    def test_normalize_attr(self, test_data, reference):
+        result_dict = IREngine._IREngine__normalize_attrs(attrs=test_data)
+        self.assertTrue(reference == result_dict, 'Test on function normalize_attr failed')
+        log.info('Test for function normalize_attr passed')
+
+    def test_load_bin_hashes(self):
+        if not os.path.exists(os.path.join(os.path.splitext(self.bin)[0], '.bin.hashes.npz')):
+            path_for_file = self.IR.generate_bin_hashes_file()
+        IR = IREngine(path_to_xml=str(self.xml), path_to_bin=str(path_for_file))
+        is_ok = True
+        # Check for constant nodes
+        const_nodes = IR.graph.get_op_nodes(type='Const')
+        for node in const_nodes:
+            if not node.has_valid('hashes'):
+                log.error('Constant node {} do not include hashes'.format(node.name))
+                is_ok = False
+
+        # Check for TensorIterator Body
+        ti_nodes = IR.graph.get_op_nodes(type='TensorIterator')
+        for ti in ti_nodes:
+            if not ti.has_valid('body'):
+                log.error('TensorIterator has not body attrubite for node: {}'.format(ti.name))
+            else:
+                const_ti_nodes = ti.body.graph.get_op_nodes(type='Const')
+                for node in const_ti_nodes:
+                    if not node.has_valid('hashes'):
+                        log.error('Constant node {} do not include hashes'.format(node.name))
+                        is_ok = False
+
+        self.assertTrue(is_ok, 'Test for function load_bin_hashes failed')
\ No newline at end of file
diff --git a/model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.bin b/model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.bin
new file mode 100644 (file)
index 0000000..00fa110
Binary files /dev/null and b/model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.bin differ
diff --git a/model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.xml b/model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6.xml
new file mode 100644 (file)
index 0000000..1c572cb
--- /dev/null
@@ -0,0 +1,626 @@
+<?xml version="1.0" ?>
+<net batch="1" name="mxnet_synthetic_gru_bidirectional_FP16_1_v6" version="6">
+       <layers>
+               <layer id="0" name="data" precision="FP16" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>16</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="hybridsequential0_gru0_swapaxes0" precision="FP16" type="Permute">
+                       <data order="1,0,2"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>16</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="hybridsequential0_gru0_hybridsequential0_gru0_h0_0/Output_0/Data__const" precision="FP16" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="0" size="512"/>
+                       </blobs>
+               </layer>
+               <layer id="3" name="hybridsequential0_gru0_hybridsequential0_gru0_h0_0/Output_0/Data_/DecomposedBiLSTM_0" precision="FP16" type="Split">
+                       <data axis="0" num_split="2"/>
+                       <input>
+                               <port id="0">
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="4" name="141_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="512" size="8"/>
+                       </blobs>
+               </layer>
+               <layer id="5" name="hybridsequential0_gru0_rnn0/Split/forward/HiddenStateResize" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="6" name="hybridsequential0_gru0_rnn0/Split/forward/TensorIterator" precision="FP16" type="TensorIterator">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+                       <port_map>
+                               <input axis="0" external_port_id="0" internal_layer_id="0" internal_port_id="0" part_size="1" stride="1"/>
+                               <input external_port_id="1" internal_layer_id="1" internal_port_id="1"/>
+                               <output axis="0" external_port_id="2" internal_layer_id="2" internal_port_id="1" part_size="1" stride="1"/>
+                               <output external_port_id="3" internal_layer_id="1" internal_port_id="4"/>
+                       </port_map>
+                       <back_edges>
+                               <edge from-layer="1" from-port="4" to-layer="1" to-port="1"/>
+                       </back_edges>
+                       <body>
+                               <layers>
+                                       <layer id="0" name="hybridsequential0_gru0_rnn0/Split/forward/input_squeeze" precision="FP16" type="Reshape">
+                                               <data dim="-1,16"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                                       <layer id="1" name="hybridsequential0_gru0_rnn0/Split/forward/GRUCell" precision="FP16" type="GRUCell">
+                                               <data hidden_size="128" linear_before_reset="1"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="4">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                               <blobs>
+                                                       <weights offset="560" size="110592"/>
+                                                       <biases offset="111152" size="1024"/>
+                                               </blobs>
+                                       </layer>
+                                       <layer id="2" name="hybridsequential0_gru0_rnn0/Split/forward/output_unsqueeze/" precision="FP16" type="Reshape">
+                                               <data dim="1,-1,128"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                               </layers>
+                               <edges>
+                                       <edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>
+                                       <edge from-layer="1" from-port="4" to-layer="2" to-port="0"/>
+                               </edges>
+                       </body>
+               </layer>
+               <layer id="7" name="138_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="520" size="12"/>
+                       </blobs>
+               </layer>
+               <layer id="8" name="hybridsequential0_gru0_rnn0/Split/forward/SqueezeNumDirections/1" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="9" name="127_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="512" size="8"/>
+                       </blobs>
+               </layer>
+               <layer id="10" name="hybridsequential0_gru0_rnn0/Split/reverse/HiddenStateResize" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="11" name="hybridsequential0_gru0_rnn0/Split/reverse/TensorIterator" precision="FP16" type="TensorIterator">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+                       <port_map>
+                               <input axis="0" end="0" external_port_id="0" internal_layer_id="0" internal_port_id="0" part_size="1" start="-1" stride="-1"/>
+                               <input external_port_id="1" internal_layer_id="1" internal_port_id="1"/>
+                               <output axis="0" end="0" external_port_id="2" internal_layer_id="2" internal_port_id="1" part_size="1" start="-1" stride="-1"/>
+                               <output external_port_id="3" internal_layer_id="1" internal_port_id="4"/>
+                       </port_map>
+                       <back_edges>
+                               <edge from-layer="1" from-port="4" to-layer="1" to-port="1"/>
+                       </back_edges>
+                       <body>
+                               <layers>
+                                       <layer id="0" name="hybridsequential0_gru0_rnn0/Split/reverse/input_squeeze" precision="FP16" type="Reshape">
+                                               <data dim="-1,16"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                                       <layer id="1" name="hybridsequential0_gru0_rnn0/Split/reverse/GRUCell" precision="FP16" type="GRUCell">
+                                               <data hidden_size="128" linear_before_reset="1"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="4">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                               <blobs>
+                                                       <weights offset="112176" size="110592"/>
+                                                       <biases offset="111152" size="1024"/>
+                                               </blobs>
+                                       </layer>
+                                       <layer id="2" name="hybridsequential0_gru0_rnn0/Split/reverse/output_unsqueeze/" precision="FP16" type="Reshape">
+                                               <data dim="1,-1,128"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                               </layers>
+                               <edges>
+                                       <edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>
+                                       <edge from-layer="1" from-port="4" to-layer="2" to-port="0"/>
+                               </edges>
+                       </body>
+               </layer>
+               <layer id="12" name="124_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="520" size="12"/>
+                       </blobs>
+               </layer>
+               <layer id="13" name="hybridsequential0_gru0_rnn0/Split/reverse/SqueezeNumDirections/1" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="14" name="hybridsequential0_gru0_rnn0/FinalConcat/HiddenState" precision="FP16" type="Concat">
+                       <data axis="0"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="15" name="134_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="532" size="16"/>
+                       </blobs>
+               </layer>
+               <layer id="16" name="hybridsequential0_gru0_rnn0/Split/forward/SqueezeNumDirections/0" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="17" name="120_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="532" size="16"/>
+                       </blobs>
+               </layer>
+               <layer id="18" name="hybridsequential0_gru0_rnn0/Split/reverse/SqueezeNumDirections/0" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="19" name="hybridsequential0_gru0_rnn0/FinalConcat/Data" precision="FP16" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="20" name="hybridsequential0_gru0_rnn0/Transpose_mxnet/" precision="FP16" type="Permute">
+                       <data order="0,2,1,3"/>
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>2</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="21" name="97_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="548" size="12"/>
+                       </blobs>
+               </layer>
+               <layer id="22" name="hybridsequential0_gru0_rnn0/Reshape_mxnet/" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>2</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="23" name="hybridsequential0_gru0_swapaxes1" precision="FP16" type="Permute">
+                       <data order="1,0,2"/>
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="24" name="hybridsequential0_relu0_fwd" precision="FP16" type="ReLU">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+               <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
+               <edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
+               <edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
+               <edge from-layer="1" from-port="1" to-layer="6" to-port="0"/>
+               <edge from-layer="5" from-port="2" to-layer="6" to-port="1"/>
+               <edge from-layer="6" from-port="3" to-layer="8" to-port="0"/>
+               <edge from-layer="7" from-port="1" to-layer="8" to-port="1"/>
+               <edge from-layer="3" from-port="2" to-layer="10" to-port="0"/>
+               <edge from-layer="9" from-port="1" to-layer="10" to-port="1"/>
+               <edge from-layer="1" from-port="1" to-layer="11" to-port="0"/>
+               <edge from-layer="10" from-port="2" to-layer="11" to-port="1"/>
+               <edge from-layer="11" from-port="3" to-layer="13" to-port="0"/>
+               <edge from-layer="12" from-port="1" to-layer="13" to-port="1"/>
+               <edge from-layer="8" from-port="2" to-layer="14" to-port="0"/>
+               <edge from-layer="13" from-port="2" to-layer="14" to-port="1"/>
+               <edge from-layer="6" from-port="2" to-layer="16" to-port="0"/>
+               <edge from-layer="15" from-port="1" to-layer="16" to-port="1"/>
+               <edge from-layer="11" from-port="2" to-layer="18" to-port="0"/>
+               <edge from-layer="17" from-port="1" to-layer="18" to-port="1"/>
+               <edge from-layer="16" from-port="2" to-layer="19" to-port="0"/>
+               <edge from-layer="18" from-port="2" to-layer="19" to-port="1"/>
+               <edge from-layer="19" from-port="2" to-layer="20" to-port="0"/>
+               <edge from-layer="20" from-port="1" to-layer="22" to-port="0"/>
+               <edge from-layer="21" from-port="1" to-layer="22" to-port="1"/>
+               <edge from-layer="22" from-port="2" to-layer="23" to-port="0"/>
+               <edge from-layer="23" from-port="1" to-layer="24" to-port="0"/>
+       </edges>
+       <meta_data>
+               <MO_version value="unknown version"/>
+               <cli_parameters>
+                       <blobs_as_inputs value="False"/>
+                       <caffe_parser_path value="DIR"/>
+                       <data_type value="FP16"/>
+                       <disable_nhwc_to_nchw value="False"/>
+                       <disable_omitting_optional value="False"/>
+                       <disable_resnet_optimization value="False"/>
+                       <enable_concat_optimization value="False"/>
+                       <enable_flattening_nested_params value="False"/>
+                       <enable_ssd_gluoncv value="False"/>
+                       <extensions value="DIR"/>
+                       <framework value="mxnet"/>
+                       <freeze_placeholder_with_value value="{}"/>
+                       <generate_experimental_IR_V10 value="False"/>
+                       <input_model value="DIR/net.params-0000.params"/>
+                       <input_model_is_text value="False"/>
+                       <input_shape value="(1, 10, 16)"/>
+                       <k value="DIR/CustomLayersMapping.xml"/>
+                       <keep_quantize_ops_in_IR value="False"/>
+                       <keep_shape_ops value="False"/>
+                       <legacy_mxnet_model value="True"/>
+                       <log_level value="ERROR"/>
+                       <mean_scale_values value="{}"/>
+                       <mean_values value="()"/>
+                       <model_name value="mxnet_synthetic_gru_bidirectional_FP16_1_v6"/>
+                       <move_to_preprocess value="False"/>
+                       <output_dir value="DIR"/>
+                       <placeholder_shapes value="[ 1 10 16]"/>
+                       <remove_output_softmax value="False"/>
+                       <reverse_input_channels value="False"/>
+                       <save_params_from_nd value="False"/>
+                       <scale_values value="()"/>
+                       <silent value="False"/>
+                       <version value="False"/>
+                       <unset unset_cli_parameters="batch, counts, disable_fusing, disable_gfusing, finegrain_fusing, generate_deprecated_IR_V2, input, input_checkpoint, input_meta_graph, input_proto, input_symbol, mean_file, mean_file_offsets, nd_prefix_name, output, pretrained_model_name, saved_model_dir, saved_model_tags, scale, tensorboard_logdir, tensorflow_custom_layer_libraries, tensorflow_custom_operations_config_update, tensorflow_object_detection_api_pipeline_config, tensorflow_operation_patterns, tensorflow_subgraph_patterns, tensorflow_use_custom_operations_config"/>
+               </cli_parameters>
+       </meta_data>
+</net>
diff --git a/model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6_negative.xml b/model-optimizer/mo/utils/unittest/test_data/mxnet_synthetic_gru_bidirectional_FP16_1_v6_negative.xml
new file mode 100644 (file)
index 0000000..62e8422
--- /dev/null
@@ -0,0 +1,626 @@
+<?xml version="1.0" ?>
+<net batch="1" name="mxnet_synthetic_gru_bidirectional_FP16_1_v6" version="6">
+       <layers>
+               <layer id="0" name="data" precision="FP16" type="Input">
+                       <output>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>16</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="hybridsequential0_gru0_swapaxes0" precision="FP16" type="Permute">
+                       <data order="1,0,2"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>16</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="hybridsequential0_gru0_hybridsequential0_gru0_h0_0/Output_0/Data__const" precision="FP16" type="Input">
+                       <output>
+                               <port id="1">
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="0" size="512"/>
+                       </blobs>
+               </layer>
+               <layer id="3" name="hybridsequential0_gru0_hybridsequential0_gru0_h0_0/Output_0/Data_/DecomposedBiLSTM_0" precision="FP16" type="Split">
+                       <data axis="0" num_split="2"/>
+                       <input>
+                               <port id="0">
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="4" name="141_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="512" size="8"/>
+                       </blobs>
+               </layer>
+               <layer id="5" name="hybridsequential0_gru0_rnn0/Split/forward/HiddenStateResize" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="6" name="hybridsequential0_gru0_rnn0/Split/forward/TensorIterator" precision="FP16" type="TensorIterator">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+                       <port_map>
+                               <input axis="0" external_port_id="0" internal_layer_id="0" internal_port_id="0" part_size="1" stride="1"/>
+                               <input external_port_id="1" internal_layer_id="1" internal_port_id="1"/>
+                               <output axis="0" external_port_id="2" internal_layer_id="2" internal_port_id="1" part_size="1" stride="1"/>
+                               <output external_port_id="3" internal_layer_id="1" internal_port_id="4"/>
+                       </port_map>
+                       <back_edges>
+                               <edge from-layer="1" from-port="4" to-layer="1" to-port="1"/>
+                       </back_edges>
+                       <body>
+                               <layers>
+                                       <layer id="0" name="hybridsequential0_gru0_rnn0/Split/forward/input_squeeze" precision="FP16" type="Reshape">
+                                               <data dim="-1,16"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                                       <layer id="1" name="hybridsequential0_gru0_rnn0/Split/forward/GRUCell" precision="FP16" type="GRUCell">
+                                               <data hidden_size="128" linear_before_reset="1"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="4">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                               <blobs>
+                                                       <weights offset="560" size="110592"/>
+                                                       <biases offset="111152" size="1024"/>
+                                               </blobs>
+                                       </layer>
+                                       <layer id="2" name="hybridsequential0_gru0_rnn0/Split/forward/output_unsqueeze/" precision="FP16" type="Reshape">
+                                               <data dim="1,-1,128"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                               </layers>
+                               <edges>
+                                       <edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>
+                                       <edge from-layer="1" from-port="4" to-layer="2" to-port="0"/>
+                               </edges>
+                       </body>
+               </layer>
+               <layer id="7" name="138_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="520" size="12"/>
+                       </blobs>
+               </layer>
+               <layer id="8" name="hybridsequential0_gru0_rnn0/Split/forward/SqueezeNumDirections/1" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="9" name="127_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="512" size="8"/>
+                       </blobs>
+               </layer>
+               <layer id="10" name="hybridsequential0_gru0_rnn0/Split/reverse/HiddenStateResize" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>2</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="11" name="hybridsequential0_gru0_rnn0/Split/reverse/TensorIterator" precision="FP16" type="TensorIterator">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>16</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="3">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+                       <port_map>
+                               <input axis="0" end="0" external_port_id="0" internal_layer_id="0" internal_port_id="0" part_size="1" start="-1" stride="-1"/>
+                               <input external_port_id="1" internal_layer_id="1" internal_port_id="1"/>
+                               <output axis="0" end="0" external_port_id="2" internal_layer_id="2" internal_port_id="1" part_size="1" start="-1" stride="-1"/>
+                               <output external_port_id="3" internal_layer_id="1" internal_port_id="4"/>
+                       </port_map>
+                       <back_edges>
+                               <edge from-layer="1" from-port="4" to-layer="1" to-port="1"/>
+                       </back_edges>
+                       <body>
+                               <layers>
+                                       <layer id="0" name="hybridsequential0_gru0_rnn0/Split/reverse/input_squeeze" precision="FP16" type="Reshape">
+                                               <data dim="-1,16"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                                       <layer id="1" name="hybridsequential0_gru0_rnn0/Split/reverse/GRUCell" precision="FP16" type="GRUCell">
+                                               <data hidden_size="128" linear_before_reset="1"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>16</dim>
+                                                       </port>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="4">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                               <blobs>
+                                                       <weights offset="112176" size="110592"/>
+                                                       <biases offset="111152" size="1024"/>
+                                               </blobs>
+                                       </layer>
+                                       <layer id="2" name="hybridsequential0_gru0_rnn0/Split/reverse/output_unsqueeze/" precision="FP16" type="Reshape">
+                                               <data dim="1,-1,128"/>
+                                               <input>
+                                                       <port id="0">
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </input>
+                                               <output>
+                                                       <port id="1">
+                                                               <dim>1</dim>
+                                                               <dim>1</dim>
+                                                               <dim>128</dim>
+                                                       </port>
+                                               </output>
+                                       </layer>
+                               </layers>
+                               <edges>
+                                       <edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>
+                                       <edge from-layer="1" from-port="4" to-layer="2" to-port="0"/>
+                               </edges>
+                       </body>
+               </layer>
+               <layer id="12" name="124_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="520" size="12"/>
+                       </blobs>
+               </layer>
+               <layer id="13" name="hybridsequential0_gru0_rnn0/Split/reverse/SqueezeNumDirections/1" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="14" name="hybridsequential0_gru0_rnn0/FinalConcat/HiddenState" precision="FP16" type="Concat">
+                       <data axis="0"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="15" name="134_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="532" size="16"/>
+                       </blobs>
+               </layer>
+               <layer id="16" name="hybridsequential0_gru0_rnn0/Split/forward/SqueezeNumDirections/0" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="17" name="120_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="532" size="16"/>
+                       </blobs>
+               </layer>
+               <layer id="18" name="hybridsequential0_gru0_rnn0/Split/reverse/SqueezeNumDirections/0" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>4</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="19" name="hybridsequential0_gru0_rnn0/FinalConcat/Data" precision="FP16" type="Concat">
+                       <data axis="1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="20" name="hybridsequential0_gru0_rnn0/Transpose_mxnet/" precision="FP16" type="Permute">
+                       <data order="0,2,1,3"/>
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>2</dim>
+                                       <dim>1</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>2</dim>
+                                       <dim>128</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="21" name="97_const" precision="I32" type="Const">
+                       <output>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="548" size="12"/>
+                       </blobs>
+               </layer>
+               <layer id="22" name="hybridsequential0_gru0_rnn0/Reshape_mxnet/" precision="FP16" type="Reshape">
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>2</dim>
+                                       <dim>128</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="23" name="hybridsequential0_gru0_swapaxes1" precision="FP16" type="Permute">
+                       <data order="1,0,2"/>
+                       <input>
+                               <port id="0">
+                                       <dim>10</dim>
+                                       <dim>1</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="24" name="hybridsequential0_relu0_fwd" precision="FP16" type="ReLU">
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>10</dim>
+                                       <dim>256</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+               <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
+               <edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
+               <edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
+               <edge from-layer="1" from-port="1" to-layer="6" to-port="0"/>
+               <edge from-layer="5" from-port="2" to-layer="6" to-port="1"/>
+               <edge from-layer="6" from-port="3" to-layer="8" to-port="0"/>
+               <edge from-layer="7" from-port="1" to-layer="8" to-port="1"/>
+               <edge from-layer="3" from-port="2" to-layer="10" to-port="0"/>
+               <edge from-layer="9" from-port="1" to-layer="10" to-port="1"/>
+               <edge from-layer="1" from-port="1" to-layer="11" to-port="0"/>
+               <edge from-layer="10" from-port="2" to-layer="11" to-port="1"/>
+               <edge from-layer="11" from-port="3" to-layer="13" to-port="0"/>
+               <edge from-layer="12" from-port="1" to-layer="13" to-port="1"/>
+               <edge from-layer="8" from-port="2" to-layer="14" to-port="0"/>
+               <edge from-layer="13" from-port="2" to-layer="14" to-port="1"/>
+               <edge from-layer="6" from-port="2" to-layer="16" to-port="0"/>
+               <edge from-layer="15" from-port="1" to-layer="16" to-port="1"/>
+               <edge from-layer="11" from-port="2" to-layer="18" to-port="0"/>
+               <edge from-layer="17" from-port="1" to-layer="18" to-port="1"/>
+               <edge from-layer="16" from-port="2" to-layer="19" to-port="0"/>
+               <edge from-layer="18" from-port="2" to-layer="19" to-port="1"/>
+               <edge from-layer="19" from-port="2" to-layer="20" to-port="0"/>
+               <edge from-layer="20" from-port="1" to-layer="22" to-port="0"/>
+               <edge from-layer="21" from-port="1" to-layer="22" to-port="1"/>
+               <edge from-layer="22" from-port="2" to-layer="23" to-port="0"/>
+               <edge from-layer="23" from-port="1" to-layer="24" to-port="0"/>
+       </edges>
+       <meta_data>
+               <MO_version value="unknown version"/>
+               <cli_parameters>
+                       <blobs_as_inputs value="False"/>
+                       <caffe_parser_path value="DIR"/>
+                       <data_type value="FP16"/>
+                       <disable_nhwc_to_nchw value="False"/>
+                       <disable_omitting_optional value="False"/>
+                       <disable_resnet_optimization value="False"/>
+                       <enable_concat_optimization value="False"/>
+                       <enable_flattening_nested_params value="False"/>
+                       <enable_ssd_gluoncv value="False"/>
+                       <extensions value="DIR"/>
+                       <framework value="mxnet"/>
+                       <freeze_placeholder_with_value value="{}"/>
+                       <generate_experimental_IR_V10 value="False"/>
+                       <input_model value="DIR/net.params-0000.params"/>
+                       <input_model_is_text value="False"/>
+                       <input_shape value="(1, 10, 16)"/>
+                       <k value="DIR/CustomLayersMapping.xml"/>
+                       <keep_quantize_ops_in_IR value="False"/>
+                       <keep_shape_ops value="False"/>
+                       <legacy_mxnet_model value="True"/>
+                       <log_level value="ERROR"/>
+                       <mean_scale_values value="{}"/>
+                       <mean_values value="()"/>
+                       <model_name value="mxnet_synthetic_gru_bidirectional_FP16_1_v6"/>
+                       <move_to_preprocess value="False"/>
+                       <output_dir value="DIR"/>
+                       <placeholder_shapes value="[ 1 10 16]"/>
+                       <remove_output_softmax value="False"/>
+                       <reverse_input_channels value="False"/>
+                       <save_params_from_nd value="False"/>
+                       <scale_values value="()"/>
+                       <silent value="False"/>
+                       <version value="False"/>
+                       <unset unset_cli_parameters="batch, counts, disable_fusing, disable_gfusing, finegrain_fusing, generate_deprecated_IR_V2, input, input_checkpoint, input_meta_graph, input_proto, input_symbol, mean_file, mean_file_offsets, nd_prefix_name, output, pretrained_model_name, saved_model_dir, saved_model_tags, scale, tensorboard_logdir, tensorflow_custom_layer_libraries, tensorflow_custom_operations_config_update, tensorflow_object_detection_api_pipeline_config, tensorflow_operation_patterns, tensorflow_subgraph_patterns, tensorflow_use_custom_operations_config"/>
+               </cli_parameters>
+       </meta_data>
+</net>
index e8408ec..ff576d9 100644 (file)
@@ -44,7 +44,40 @@ def check_python_version():
         return 1
 
 
-def get_module_version_list_from_file(file_name):
+def parse_versions_list(required_fw_versions: str, version_list: list()):
+    """
+    Parsing requirements versions
+    :param required_fw_versions: String with fw versions from requirements file
+    :param version_list: List for append
+    :return: list of tuples of strings like (name_of_module, sign, version)
+
+    Returned object is:
+    [('tensorflow', '>=', '1.2.0'), ('networkx', '==', '2.1'), ('numpy', None, None)]
+    """
+
+    line = required_fw_versions.strip('\n')
+    line = line.strip(' ')
+    if line == '':
+        return []
+    splited_versions_by_conditions = re.split(r"==|>=|<=|>|<", line)
+    splited_versions_by_conditions = [l.strip(',') for l in splited_versions_by_conditions]
+
+    if len(splited_versions_by_conditions) == 0:
+        return []
+    if len(splited_versions_by_conditions) == 1:
+        version_list.append((splited_versions_by_conditions[0], None, None))
+    else:
+        splited_required_versions= re.split(r",", line)
+        for i, l in enumerate(splited_required_versions):
+            comparisons = ['==', '>=', '<=', '<', '>']
+            for comparison in comparisons:
+                if comparison in l:
+                    version_list.append((splited_versions_by_conditions[0], comparison, splited_versions_by_conditions[i + 1]))
+                    break
+    return version_list
+
+
+def get_module_version_list_from_file(file_name: str):
     """
     Please do not add parameter type annotations (param:type).
     Because we import this file while checking Python version.
@@ -65,25 +98,7 @@ def get_module_version_list_from_file(file_name):
     req_dict = list()
     with open(file_name) as f:
         for line in f:
-            line = line.strip('\n')
-            line = line.strip(' ')
-            if line == '':
-                continue
-            splited_line = re.split(r"==|>=|<=|>|<", line)
-            splited_line = [l.strip(',') for l in splited_line]
-            if len(splited_line) == 1:
-                req_dict.append((splited_line[0], None, None))
-            else:
-                if '==' in line:
-                    req_dict.append((splited_line[0], '==', splited_line[1]))
-                elif '>=' in line:
-                    req_dict.append((splited_line[0], '>=', splited_line[1]))
-                elif '<=' in line:
-                    req_dict.append((splited_line[0], '<=', splited_line[1]))
-                elif '<' in line:
-                    req_dict.append((splited_line[0], '<', splited_line[1]))
-                elif '>' in line:
-                    req_dict.append((splited_line[0], '>', splited_line[1]))
+            req_dict = parse_versions_list(line, req_dict)
     return req_dict
 
 
diff --git a/model-optimizer/mo/utils/versions_checker_test.py b/model-optimizer/mo/utils/versions_checker_test.py
new file mode 100644 (file)
index 0000000..27a6455
--- /dev/null
@@ -0,0 +1,55 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+import unittest.mock as mock
+
+from unittest.mock import mock_open
+from mo.utils.versions_checker import get_module_version_list_from_file, parse_versions_list
+
+class TestingVersionsChecker(unittest.TestCase):
+    @mock.patch('builtins.open', new_callable=mock_open, create=True)
+    def test_get_module_version_list_from_file(self, mock_open):
+        mock_open.return_value.__enter__ = mock_open
+        mock_open.return_value.__iter__ = mock.Mock(
+            return_value=iter(['mxnet>=1.0.0,<=1.3.1', 'networkx>=1.11', 'numpy==1.12.0', 'defusedxml<=0.5.0']))
+        ref_list =[('mxnet', '>=', '1.0.0'), ('mxnet', '<=', '1.3.1'),
+                          ('networkx', '>=', '1.11'),
+                          ('numpy', '==', '1.12.0'), ('defusedxml', '<=', '0.5.0')]
+        version_list = get_module_version_list_from_file('mock_file')
+        self.assertEquals(len(version_list), 5)
+        for i, version_dict in enumerate(version_list):
+            self.assertTupleEqual(ref_list[i], version_dict)
+
+    @mock.patch('builtins.open', new_callable=mock_open, create=True)
+    def test_get_module_version_list_from_file_with_fw_name(self, mock_open):
+        mock_open.return_value.__enter__ = mock_open
+        mock_open.return_value.__iter__ = mock.Mock(
+            return_value=iter(['mxnet']))
+        ref_list = [('mxnet', None, None)]
+        version_list = get_module_version_list_from_file('mock_file')
+        self.assertEquals(len(version_list), 1)
+        for i, version_dict in enumerate(version_list):
+            self.assertTupleEqual(ref_list[i], version_dict)
+
+    def test_append_version_list(self):
+        v1 = 'mxnet>=1.0.0,<=1.3.1'
+        req_list = list()
+        parse_versions_list(v1, req_list)
+        ref_list = [('mxnet', '>=', '1.0.0'),
+                    ('mxnet', '<=', '1.3.1')]
+        for i, v in enumerate(req_list):
+            self.assertEquals(v, ref_list[i])
index 5b46a2c..2dba5b9 100644 (file)
@@ -4,5 +4,4 @@ networkx>=1.11
 numpy>=1.12.0
 protobuf==3.6.1
 onnx>=1.1.2
-test-generator==0.1.1
 defusedxml>=0.5.0
index eb74892..a032f83 100644 (file)
@@ -1,5 +1,4 @@
 networkx>=1.11
 numpy>=1.12.0
 protobuf==3.6.1
-test-generator==0.1.1
-defusedxml>=0.5.0
\ No newline at end of file
+defusedxml>=0.5.0
diff --git a/model-optimizer/requirements_dev.txt b/model-optimizer/requirements_dev.txt
new file mode 100644 (file)
index 0000000..d325751
--- /dev/null
@@ -0,0 +1,8 @@
+coverage==4.4.2
+m2r==0.1.12
+pyenchant==1.6.11
+pylint==2.1.1
+Sphinx==1.6.5
+safety==1.8.5
+test-generator==0.1.1
+defusedxml>=0.5.0
index 24caaf4..acd2c87 100644 (file)
@@ -1,4 +1,3 @@
 networkx>=1.11
 numpy==1.13.0
-test-generator==0.1.1
 defusedxml>=0.5.0
index 1e2f557..883ec69 100644 (file)
@@ -1,5 +1,4 @@
 mxnet>=1.0.0,<=1.3.1
 networkx>=1.11
 numpy>=1.12.0
-test-generator==0.1.1
-defusedxml>=0.5.0
\ No newline at end of file
+defusedxml>=0.5.0
index e196da4..e0ed76e 100644 (file)
@@ -1,5 +1,4 @@
 onnx>=1.1.2
 networkx>=1.11
 numpy>=1.12.0
-test-generator==0.1.1
-defusedxml>=0.5.0
\ No newline at end of file
+defusedxml>=0.5.0
index 209381c..2accfb7 100644 (file)
@@ -1,5 +1,4 @@
 tensorflow>=1.2.0,<2.0.0
 networkx>=1.11
 numpy>=1.12.0
-test-generator==0.1.1
-defusedxml>=0.5.0
\ No newline at end of file
+defusedxml>=0.5.0
index 8ff6785..63a5f5d 100644 (file)
@@ -1,3 +1,4 @@
+accuracy_checker
 accuracy_checker.log
 i8_normalized.dot
 openvino.tools.benchmark.log
diff --git a/tools/accuracy_checker/.pylintrc b/tools/accuracy_checker/.pylintrc
deleted file mode 100644 (file)
index 7c903ac..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-[MASTER]
-disable = C0103,
-          C0111,
-          too-many-locals,
-          too-many-arguments,
-          unused-argument,
-          too-many-instance-attributes,
-          too-few-public-methods,
-          unsubscriptable-object,
-          unbalanced-tuple-unpacking,
-          arguments-differ,
-          E1101,
-          E1111,
-          C0204,
-          W0201,
-          W0107,
-          R0401
-
-max-line-length = 120
-ignore-docstrings = yes
-extension-pkg-whitelist=inference_engine,cv2,numpy
-ignored-modules = numpy,cv2,openvino.inference_engine,caffe
-load-plugins = pylint_checkers
-ignored-classes = pathlib.PurePath
-jobs=0
-
-[SIMILARITIES]
-ignore-imports = yes
-
-[BASIC]
-bad-functions=print,as_posix,absolute
diff --git a/tools/accuracy_checker/README.md b/tools/accuracy_checker/README.md
deleted file mode 100644 (file)
index ceee153..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-# Deep Learning accuracy validation framework
-
-## Installation
-
-### Prerequisites
-
-Install prerequisites first:
-
-#### 1. Python
-
-**accuracy checker** uses **Python 3**. Install it first:
-
-- [Python3][python3], [setuptools][setuptools]:
-
-```bash
-sudo apt-get install python3 python3-dev python3-setuptools python3-pip
-```
-
-Python setuptools and python package manager (pip) install packages into system directory by default. Installation of accuracy checker tested only via [virtual environment][virtualenv].
-
-In order to use virtual environment you should install it first:
-
-```bash
-python3 -m pip install virtualenv
-python3 -m virtualenv -p `which python3` <directory_for_environment>
-```
-
-Before starting to work inside virtual environment, it should be activated:
-
-```bash
-source <directory_for_environment>/bin/activate
-```
-
-Virtual environment can be deactivated using command
-
-```bash
-deactivate
-```
-
-#### 2. Frameworks
-
-The next step is installing backend frameworks for Accuracy Checker.
-
-In order to evaluate some models required frameworks have to be installed. Accuracy-Checker supports these frameworks:
-
-- [OpenVINO][openvino-get-started].
-- [Caffe][caffe-get-started].
-
-You can use any of them or several at a time.
-
-#### 3. Requirements installation
-```bash
-pip3 install -r requirements.txt
-
-[python3]: https://www.python.org/downloads/
-[setuptools]: https://pypi.python.org/pypi/setuptools
-[caffe-get-started]: accuracy_checker/launcher/caffe_installation_readme.md
-[virtual-environment]: https://docs.python.org/3/tutorial/venv.html
-[virtualenv]: https://virtualenv.pypa.io/en/stable
-[openvino-get-started]: https://software.intel.com/en-us/openvino-toolkit/documentation/get-started
\ No newline at end of file
diff --git a/tools/accuracy_checker/accuracy_checker/__init__.py b/tools/accuracy_checker/accuracy_checker/__init__.py
deleted file mode 100644 (file)
index c73671b..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-__version__ = "0.6.9"
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/README.md b/tools/accuracy_checker/accuracy_checker/adapters/README.md
deleted file mode 100644 (file)
index 40cec31..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-# Adapters
-
-Adapter is a function for conversion network infer output to metric specific format.
-You can use 2 ways to set adapter for topology:
-* Define adapter as a string.
-
-```yml
-adapter: classification
-```
-
-* Define adapter as a dictionary, using `type:` for setting adapter name. This approach gives opportunity to set additional parameters for adapter if it is required.
-
-```yml
-adapter:
-  type: reid
-  grn_workaround: False
-```
-
-AccuracyChecker supports following set of adapters:
-* `classification` - converting output of classification model to `ClassificationPrediction` representation.
-* `segmentation` - converting output of semantic segmentation model to `SeegmentationPrediction` representation.
-* `tiny_yolo_v1` - converting output of Tiny YOLO v1 model to `DetectionPrediction` representation.
-* `reid` - converting output of reidentification model to `ReIdentificationPrediction` representation.
-  * `grn_workaround` - enabling processing output with adding Global Region Normalization layer.
-* `yolo_v2` - converting output of YOLO v2 family models to `DetectionPrediction` representation.
-  * `classes` - number of detection classes (default 20).
-  * `anchors` - anchor values provided as comma-separated list or one of precomputed: `yolo_v2` and `tiny_yolo_v2`.
-  * `coords` - number of bbox coordinates (default 4).
-  * `num` - num parameter from DarkNet configuration file (default 5).
-* `yolo_v3` - converting output of YOLO v3 family models to `DetectionPrediction` representation.
-  * `classes` - number of detection classes (default 80).
-  * `anchors` - anchor values provided as comma-separited list or precomputed: `yolo_v3`.
-  * `coords` - number of bbox coordinates (default 4).
-  * `num` - num parameter from DarkNet configuration file (default 3).
-  * `threshold` - minimal objectness score value for valid detections (default 0.001).
-  * `input_width` and `input_height` - network input width and height correspondingly (default 416).
-  * `outputs` - the list of output layers names (optional), if specified there should be exactly 3 output layers provided.
-* `lpr` - converting output of license plate recognition model to `CharacterRecognitionPrediction` representation.
-* `ssd` - converting  output of SSD model to `DetectionPrediction` representation.
-* `face_person_detection` - converting face person detection model output with 2 detection outputs to `ContainerPredition`, where value of parameters `face_out`and `person_out` are used for identification `DetectionPrediction` in container. 
-  * `face_out` -  face detection output layer name.
-  * `person_out` - person detection output layer name.
-* `attributes_recognition`  - converting vehicle attributes recognition model output to `ContainerPrediction` where value of parameters `color_out`and `type_out` are used for identification `ClassificationPrediction` in container. 
-  * `color_out` - vehicle color attribute output layer name.
-  * `type_out`- vehicle type attribute output layer name.
-* `head_pose` - converting head pose estimation model output to `ContainerPrediction` where names of parameters `angle_pitch`, `angle_yaw` and `angle_roll` are used for identification `RegressionPrediction` in container. 
-  * `angle_pitch` - output layer name for pitch angle.
-  * `angle_yaw`- output layer name for yaw angle.
-  * `angle_roll` - output layer name for roll angle.
-* `age_gender` - converting age gender recognition model output to `ContainerPrediction` with `ClassificationPrediction` named `gender` for gender recognition, `ClassificationPrediction` named `age_classification` and `RegressionPrediction` named `age_error` for age recognition.
-  * `age_out` - output layer name for age recognition.
-  * `gender_out` - output layer name for gender recognition.
-* `action_detection` - converting output of model for person detection and action recognition tasks to `ContainerPrediction` with `DetectionPrdiction` for class agnostic metric calculation and `DetectionPrediction` for action recognition. The representations in container have names `class_agnostic_prediction` and `action_prediction` respectively.
-  * `priorbox_out` - name of layer containing prior boxes in SSD format.
-  * `loc_out` - name of layer containing box coordinates in SSD format.
-  * `main_conf_out` - name of layer containing detection confidences.
-  * `add_conf_out_prefix` - prefix for generation name of layers containing action confidences if topology has several following layers or layer name.
-  * `add_conf_out_count` - number of layers with action confidences (optional, you can not provide this argument if action confidences contained in one layer).
-  * `num_action_classes` - number classes for action recognition.
-  * `detection_threshold` - minimal detection confidences level for valid detections.
-* `super_resolution` - converting output of single image super resolution network to `SuperResolutionPrediction`.
-* `landmarks_regression` - converting output of model for landmarks regression to `FacialLandmarksPrediction`.
-* `text_detection` - converting output of model for text detection to `TextDetectionPrediction`.
-  * `pixel_class_out` - name of layer containing information related to text/no-text classification for each pixel.
-  * `pixel_link_out` - name of layer containing information related to linkage between pixels and their neighbors.
-* `human_pose_estimation` - converting output of model for human pose estimation to `PoseEstimationPrediction`.
-  * `part_affinity_fields_out` - name of output layer with keypoints pairwise relations (part affinity fields).
-  * `keypoints_heatmap_out` - name of output layer with keypoints heatmaps.
-* `beam_search_decoder` - realization CTC Beam Search decoder for symbol sequence recognition, converting model output to `CharacterRecognitionPrediction`.
-  * `beam_size` -  size of the beam to use during decoding (default 10).
-  * `blank_label` - index of the CTC blank label.
-  * `softmaxed_probabilities` - indicator that model uses softmax for output layer (default False).
-* `gaze_estimation` - converting output of gaze estimation model to `GazeVectorPrediction`.
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/__init__.py b/tools/accuracy_checker/accuracy_checker/adapters/__init__.py
deleted file mode 100644 (file)
index 221d8ed..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .adapter import Adapter, AdapterField, create_adapter
-
-from .action_recognition import ActionDetection
-from .text_detection import TextDetectionAdapter, LPRAdapter, BeamSearchDecoder
-from .image_processing import SuperResolutionAdapter
-from .attributes_recognition import (
-    HeadPoseEstimatorAdapter,
-    VehicleAttributesRecognitionAdapter,
-    PersonAttributesAdapter,
-    AgeGenderAdapter,
-    LandmarksRegressionAdapter,
-    GazeEstimationAdapter
-)
-
-from .reidentification import ReidAdapter
-from .detection import TinyYOLOv1Adapter, SSDAdapter, FacePersonAdapter, YoloV2Adapter, YoloV3Adapter
-from .classification import ClassificationAdapter
-from .segmentation import SegmentationAdapter, BrainTumorSegmentationAdapter
-from .pose_estimation import HumanPoseAdapter
-
-from .dummy_adapters import XML2DetectionAdapter
-
-from .hit_ratio import HitRatioAdapter
-
-__all__ = [
-    'Adapter',
-    'AdapterField',
-    'create_adapter',
-
-    'XML2DetectionAdapter',
-
-    'ClassificationAdapter',
-
-    'SSDAdapter',
-    'TinyYOLOv1Adapter',
-    'YoloV2Adapter',
-    'YoloV3Adapter',
-    'FacePersonAdapter',
-
-    'SegmentationAdapter',
-    'BrainTumorSegmentationAdapter',
-
-    'ReidAdapter',
-
-    'SuperResolutionAdapter',
-
-    'HeadPoseEstimatorAdapter',
-    'VehicleAttributesRecognitionAdapter',
-    'PersonAttributesAdapter',
-    'AgeGenderAdapter',
-    'LandmarksRegressionAdapter',
-    'GazeEstimationAdapter',
-
-    'TextDetectionAdapter',
-
-    'BeamSearchDecoder',
-    'LPRAdapter',
-
-    'HumanPoseAdapter',
-
-    'ActionDetection',
-
-    'HitRatioAdapter'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/action_recognition.py b/tools/accuracy_checker/accuracy_checker/adapters/action_recognition.py
deleted file mode 100644 (file)
index 113eb9d..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..adapters import Adapter
-from ..config import ConfigValidator, StringField, NumberField
-from ..representation import DetectionPrediction, ContainerPrediction
-
-
-class ActionDetectorConfig(ConfigValidator):
-    type = StringField()
-    priorbox_out = StringField()
-    loc_out = StringField()
-    main_conf_out = StringField()
-    add_conf_out_prefix = StringField()
-    add_conf_out_count = NumberField(optional=True, min_value=1)
-    num_action_classes = NumberField()
-    detection_threshold = NumberField(optional=True, floats=True, min_value=0, max_value=1)
-
-
-class ActionDetection(Adapter):
-    __provider__ = 'action_detection'
-
-    def validate_config(self):
-        action_detector_adapter_config = ActionDetectorConfig('ActionDetector_Config')
-        action_detector_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        self.priorbox_out = self.launcher_config['priorbox_out']
-        self.loc_out = self.launcher_config['loc_out']
-        self.main_conf_out = self.launcher_config['main_conf_out']
-        self.num_action_classes = self.launcher_config['num_action_classes']
-        self.detection_threshold = self.launcher_config.get('detection_threshold', 0)
-        add_conf_out_count = self.launcher_config.get('add_conf_out_count')
-        add_conf_out_prefix = self.launcher_config['add_conf_out_prefix']
-        if add_conf_out_count is None:
-            self.add_conf_outs = [add_conf_out_prefix]
-        else:
-            self.add_conf_outs = []
-            for num in np.arange(start=1, stop=add_conf_out_count + 1):
-                self.add_conf_outs.append('{}{}'.format(add_conf_out_prefix, num))
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-        prior_boxes = raw_outputs[self.priorbox_out][0][0].reshape(-1, 4)
-        prior_variances = raw_outputs[self.priorbox_out][0][1].reshape(-1, 4)
-        for batch_id, identifier in enumerate(identifiers):
-            labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = self.prepare_detection_for_id(
-                batch_id, raw_outputs, prior_boxes, prior_variances
-            )
-            action_prediction = DetectionPrediction(identifier, labels, class_scores, x_mins, y_mins, x_maxs, y_maxs)
-            person_prediction = DetectionPrediction(
-                identifier, [1] * len(labels), main_scores, x_mins, y_mins, x_maxs, y_maxs
-            )
-            result.append(ContainerPrediction({
-                'action_prediction': action_prediction, 'class_agnostic_prediction': person_prediction
-            }))
-
-        return result
-
-    def prepare_detection_for_id(self, batch_id, raw_outputs, prior_boxes, prior_variances):
-        num_detections = raw_outputs[self.loc_out][batch_id].size // 4
-        locs = raw_outputs[self.loc_out][batch_id].reshape(-1, 4)
-        main_conf = raw_outputs[self.main_conf_out][batch_id].reshape(num_detections, -1)
-        add_confs = list(map(
-            lambda layer: raw_outputs[layer][batch_id].reshape(-1, self.num_action_classes), self.add_conf_outs
-        ))
-        anchors_num = len(add_confs)
-        labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = [], [], [], [], [], [], []
-        for index in range(num_detections):
-            if main_conf[index, 1] < self.detection_threshold:
-                continue
-
-            x_min, y_min, x_max, y_max = self.decode_box(prior_boxes[index], prior_variances[index], locs[index])
-            action_confs = add_confs[index % anchors_num][index // anchors_num]
-            action_label = np.argmax(action_confs)
-            labels.append(action_label)
-            class_scores.append(action_confs[action_label])
-            x_mins.append(x_min)
-            y_mins.append(y_min)
-            x_maxs.append(x_max)
-            y_maxs.append(y_max)
-            main_scores.append(main_conf[index, 1])
-
-        return labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores
-
-    @staticmethod
-    def decode_box(prior, var, deltas):
-        prior_width = prior[2] - prior[0]
-        prior_height = prior[3] - prior[1]
-        prior_center_x = (prior[0] + prior[2]) / 2
-        prior_center_y = (prior[1] + prior[3]) / 2
-
-        decoded_box_center_x = var[0] * deltas[0] * prior_width + prior_center_x
-        decoded_box_center_y = var[1] * deltas[1] * prior_height + prior_center_y
-        decoded_box_width = np.exp(var[2] * deltas[2]) * prior_width
-        decoded_box_height = np.exp(var[3] * deltas[3]) * prior_height
-
-        decoded_xmin = decoded_box_center_x - decoded_box_width / 2
-        decoded_ymin = decoded_box_center_y - decoded_box_height / 2
-        decoded_xmax = decoded_box_center_x + decoded_box_width / 2
-        decoded_ymax = decoded_box_center_y + decoded_box_height / 2
-
-        return decoded_xmin, decoded_ymin, decoded_xmax, decoded_ymax
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/adapter.py b/tools/accuracy_checker/accuracy_checker/adapters/adapter.py
deleted file mode 100644 (file)
index 76d5ef6..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..config import BaseField, ConfigValidator, StringField, ConfigError
-from ..dependency import ClassProvider
-
-
-class Adapter(ClassProvider):
-    """
-    Interface that describes converting raw output to appropriate representation.
-    """
-
-    __provider_type__ = 'adapter'
-
-    def __init__(self, launcher_config, label_map=None, output_blob=None):
-        self.launcher_config = launcher_config
-        self.output_blob = output_blob
-        self.label_map = label_map
-
-        self.validate_config()
-        self.configure()
-
-    def __call__(self, context=None, outputs=None, **kwargs):
-        if outputs is not None:
-            return self.process(outputs, **kwargs)
-        predictions = self.process(context.prediction_batch, context.identifiers_batch, **kwargs)
-        context.prediction_batch = predictions
-        return context
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        raise NotImplementedError
-
-    def configure(self):
-        pass
-
-    def validate_config(self):
-        pass
-
-    @staticmethod
-    def _extract_predictions(outputs_list, meta):
-        return outputs_list[0]
-
-
-class AdapterField(BaseField):
-    def validate(self, entry, field_uri_=None):
-        super().validate(entry, field_uri_)
-
-        if entry is None:
-            return
-
-        field_uri_ = field_uri_ or self.field_uri
-        if isinstance(entry, str):
-            StringField(choices=Adapter.providers).validate(entry, 'adapter')
-        elif isinstance(entry, dict):
-            class DictAdapterValidator(ConfigValidator):
-                type = StringField(choices=Adapter.providers)
-            dict_adapter_validator = DictAdapterValidator(
-                'adapter', on_extra_argument=DictAdapterValidator.IGNORE_ON_EXTRA_ARGUMENT
-            )
-            dict_adapter_validator.validate(entry)
-        else:
-            self.raise_error(entry, field_uri_, 'adapter must be either string or dictionary')
-
-
-def create_adapter(adapter_config, launcher, dataset=None):
-    if isinstance(adapter_config, str):
-        adapter = Adapter.provide(adapter_config, launcher.config)
-    elif isinstance(adapter_config, dict):
-        adapter = Adapter.provide(adapter_config['type'], adapter_config)
-    else:
-        raise ConfigError('Unknown type for adapter configuration')
-    adapter.output_blob = launcher.output_blob
-    if dataset:
-        metadata = dataset.metadata
-        if metadata:
-            adapter.label_map = dataset.metadata.get('label_map')
-
-    return adapter
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/attributes_recognition.py b/tools/accuracy_checker/accuracy_checker/adapters/attributes_recognition.py
deleted file mode 100644 (file)
index a166b23..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..adapters import Adapter
-from ..config import ConfigValidator, StringField
-from ..representation import (
-    ContainerPrediction,
-    RegressionPrediction,
-    ClassificationPrediction,
-    FacialLandmarksPrediction,
-    MultiLabelRecognitionPrediction,
-    GazeVectorPrediction
-)
-
-
-class HeadPoseEstimatorAdapterConfig(ConfigValidator):
-    type = StringField()
-    angle_yaw = StringField()
-    angle_pitch = StringField()
-    angle_roll = StringField()
-
-
-class HeadPoseEstimatorAdapter(Adapter):
-    """
-    Class for converting output of HeadPoseEstimator to HeadPosePrediction representation
-    """
-    __provider__ = 'head_pose'
-
-    def validate_config(self):
-        head_pose_estimator_adapter_config = HeadPoseEstimatorAdapterConfig(
-            'HeadPoseEstimator_Config', on_extra_argument=HeadPoseEstimatorAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
-        head_pose_estimator_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        """
-        Specifies parameters of config entry
-        """
-        self.angle_yaw = self.launcher_config['angle_yaw']
-        self.angle_pitch = self.launcher_config['angle_pitch']
-        self.angle_roll = self.launcher_config['angle_roll']
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-            frame_meta: list of meta information about each frame
-        Returns:
-                list of ContainerPrediction objects
-        """
-        result = []
-        raw_output = self._extract_predictions(raw, frame_meta)
-        for identifier, yaw, pitch, roll in zip(
-                identifiers,
-                raw_output[self.angle_yaw],
-                raw_output[self.angle_pitch],
-                raw_output[self.angle_roll]
-        ):
-            prediction = ContainerPrediction({'angle_yaw': RegressionPrediction(identifier, yaw[0]),
-                                              'angle_pitch': RegressionPrediction(identifier, pitch[0]),
-                                              'angle_roll': RegressionPrediction(identifier, roll[0])})
-            result.append(prediction)
-
-        return result
-
-
-class VehicleAttributesRecognitionAdapterConfig(ConfigValidator):
-    type = StringField()
-    color_out = StringField()
-    type_out = StringField()
-
-
-class VehicleAttributesRecognitionAdapter(Adapter):
-    __provider__ = 'vehicle_attributes'
-
-    def validate_config(self):
-        attributes_recognition_adapter_config = VehicleAttributesRecognitionAdapterConfig(
-            'VehicleAttributesRecognition_Config',
-            on_extra_argument=VehicleAttributesRecognitionAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
-        attributes_recognition_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        """
-        Specifies parameters of config entry
-        """
-        self.color_out = self.launcher_config['color_out']
-        self.type_out = self.launcher_config['type_out']
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        res = []
-        raw_output = self._extract_predictions(raw, frame_meta)
-        for identifier, colors, types in zip(identifiers, raw_output[self.color_out], raw_output[self.type_out]):
-            res.append(ContainerPrediction({'color': ClassificationPrediction(identifier, colors.reshape(-1)),
-                                            'type': ClassificationPrediction(identifier, types.reshape(-1))}))
-        return res
-
-
-class AgeGenderAdapterConfig(ConfigValidator):
-    type = StringField()
-    age_out = StringField()
-    gender_out = StringField()
-
-
-class AgeGenderAdapter(Adapter):
-    __provider__ = 'age_gender'
-
-    def configure(self):
-        self.age_out = self.launcher_config['age_out']
-        self.gender_out = self.launcher_config['gender_out']
-
-    def validate_config(self):
-        age_gender_adapter_config = AgeGenderAdapterConfig(
-            'AgeGender_Config', on_extra_argument=AgeGenderAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
-        age_gender_adapter_config.validate(self.launcher_config)
-
-    @staticmethod
-    def get_age_scores(age):
-        age_scores = np.zeros(4)
-        if age < 19:
-            age_scores[0] = 1
-            return age_scores
-        if age < 36:
-            age_scores[1] = 1
-            return age_scores
-        if age < 66:
-            age_scores[2] = 1
-            return age_scores
-        age_scores[3] = 1
-        return age_scores
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        raw_output = self._extract_predictions(raw, frame_meta)
-        for identifier, age, gender in zip(identifiers, raw_output[self.age_out], raw_output[self.gender_out]):
-            gender = gender.reshape(-1)
-            age = age.reshape(-1)[0]*100
-            gender_rep = ClassificationPrediction(identifier, gender)
-            age_class_rep = ClassificationPrediction(identifier, self.get_age_scores(age))
-            age_error_rep = RegressionPrediction(identifier, age)
-            result.append(ContainerPrediction({'gender': gender_rep, 'age_classification': age_class_rep,
-                                               'age_error': age_error_rep}))
-        return result
-
-
-class LandmarksRegressionAdapter(Adapter):
-    __provider__ = 'landmarks_regression'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        res = []
-        raw_output = self._extract_predictions(raw, frame_meta)
-        for identifier, values in zip(identifiers, raw_output[self.output_blob]):
-            x_values, y_values = values[::2], values[1::2]
-            res.append(FacialLandmarksPrediction(identifier, x_values.reshape(-1), y_values.reshape(-1)))
-        return res
-
-
-class PersonAttributesConfig(ConfigValidator):
-    attributes_recognition_out = StringField(optional=True)
-
-
-class PersonAttributesAdapter(Adapter):
-    __provider__ = 'person_attributes'
-
-    def validate_config(self):
-        person_attributes_adapter_config = PersonAttributesConfig(
-            'PersonAttributes_Config',
-            PersonAttributesConfig.IGNORE_ON_EXTRA_ARGUMENT
-        )
-        person_attributes_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        self.attributes_recognition_out = self.launcher_config.get('attributes_recognition_out', self.output_blob)
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        raw_output = self._extract_predictions(raw, frame_meta)
-        self.attributes_recognition_out = self.attributes_recognition_out or self.output_blob
-        for identifier, multi_label in zip(identifiers, raw_output[self.attributes_recognition_out]):
-            multi_label[multi_label > 0.5] = 1.
-            multi_label[multi_label <= 0.5] = 0.
-
-            result.append(MultiLabelRecognitionPrediction(identifier, multi_label.reshape(-1)))
-
-        return result
-
-
-class GazeEstimationAdapter(Adapter):
-    __provider__ = 'gaze_estimation'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        raw_output = self._extract_predictions(raw, frame_meta)
-        for identifier, output in zip(identifiers, raw_output[self.output_blob]):
-            result.append(GazeVectorPrediction(identifier, output))
-
-        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/classification.py b/tools/accuracy_checker/accuracy_checker/adapters/classification.py
deleted file mode 100644 (file)
index ddcf267..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..adapters import Adapter
-from ..representation import ClassificationPrediction
-
-
-class ClassificationAdapter(Adapter):
-    """
-    Class for converting output of classification model to ClassificationPrediction representation
-    """
-    __provider__ = 'classification'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-            frame_meta: list of meta information about each frame
-        Returns:
-            list of ClassificationPrediction objects
-        """
-        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
-        prediction = np.reshape(prediction, (prediction.shape[0], -1))
-
-        result = []
-        for identifier, output in zip(identifiers, prediction):
-            result.append(ClassificationPrediction(identifier, output))
-
-        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/detection.py b/tools/accuracy_checker/accuracy_checker/adapters/detection.py
deleted file mode 100644 (file)
index 8f36d13..0000000
+++ /dev/null
@@ -1,501 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import itertools
-import math
-
-import numpy as np
-
-from ..adapters import Adapter
-from ..config import ConfigValidator, NumberField, StringField, ListField
-from ..postprocessor.nms import NMS
-from ..representation import DetectionPrediction, ContainerPrediction
-from ..utils import get_or_parse_value
-
-
-class TinyYOLOv1Adapter(Adapter):
-    """
-    Class for converting output of Tiny YOLO v1 model to DetectionPrediction representation
-    """
-    __provider__ = 'tiny_yolo_v1'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-        Returns:
-             list of DetectionPrediction objects
-        """
-        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
-
-        PROBABILITY_SIZE = 980
-        CONFIDENCE_SIZE = 98
-        BOXES_SIZE = 392
-
-        CELLS_X, CELLS_Y = 7, 7
-        CLASSES = 20
-        OBJECTS_PER_CELL = 2
-
-        result = []
-        for identifier, output in zip(identifiers, prediction):
-            assert PROBABILITY_SIZE + CONFIDENCE_SIZE + BOXES_SIZE == output.shape[0]
-
-            probability, scale, boxes = np.split(output, [PROBABILITY_SIZE, PROBABILITY_SIZE + CONFIDENCE_SIZE])
-
-            probability = np.reshape(probability, (CELLS_Y, CELLS_X, CLASSES))
-            scale = np.reshape(scale, (CELLS_Y, CELLS_X, OBJECTS_PER_CELL))
-            boxes = np.reshape(boxes, (CELLS_Y, CELLS_X, OBJECTS_PER_CELL, 4))
-
-            confidence = np.zeros((CELLS_Y, CELLS_X, OBJECTS_PER_CELL, CLASSES + 4))
-            for cls in range(CLASSES):
-                confidence[:, :, 0, cls] = np.multiply(probability[:, :, cls], scale[:, :, 0])
-                confidence[:, :, 1, cls] = np.multiply(probability[:, :, cls], scale[:, :, 1])
-
-            labels, scores, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], [], []
-            for i, j, k in np.ndindex((CELLS_X, CELLS_Y, OBJECTS_PER_CELL)):
-                box = boxes[j, i, k]
-                box = [(box[0] + i) / float(CELLS_X), (box[1] + j) / float(CELLS_Y), box[2] ** 2, box[3] ** 2]
-
-                label = np.argmax(confidence[j, i, k, :CLASSES])
-                score = confidence[j, i, k, label]
-
-                labels.append(label)
-                scores.append(score)
-                x_mins.append(box[0] - box[2] / 2.0)
-                y_mins.append(box[1] - box[3] / 2.0)
-                x_maxs.append(box[0] + box[2] / 2.0)
-                y_maxs.append(box[1] + box[3] / 2.0)
-
-            result.append(DetectionPrediction(identifier, labels, scores, x_mins, y_mins, x_maxs, y_maxs))
-
-        return result
-
-
-PRECOMPUTED_ANCHORS = {
-    'yolo_v2': [1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071],
-    'tiny_yolo_v2': [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52],
-    'yolo_v3': [
-        10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0
-    ],
-    'tiny_yolo_v3': [10.0, 14.0, 23.0, 27.0, 37.0, 58.0, 81.0, 82.0, 135.0, 169.0, 344.0, 319.0]
-}
-
-
-def entry_index(w, h, n_coords, n_classes, pos, entry):
-    row = pos // (w * h)
-    col = pos % (w * h)
-    return row * w * h * (n_classes + n_coords + 1) + entry * w * h + col
-
-
-class BaseYoloAdapterConfig(ConfigValidator):
-    classes = NumberField(floats=False, optional=True, min_value=1)
-    coords = NumberField(floats=False, optional=True, min_value=1)
-    num = NumberField(floats=False, optional=True, min_value=1)
-    anchors = StringField(optional=True)
-
-
-class YoloV2Adapter(Adapter):
-    """
-    Class for converting output of YOLO v2 family models to DetectionPrediction representation
-    """
-    __provider__ = 'yolo_v2'
-
-    def validate_config(self):
-        yolo_v2_adapter_config = BaseYoloAdapterConfig('BaseYoloAdapter_Config')
-        yolo_v2_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        self.classes = self.launcher_config.get('classes', 20)
-        self.coords = self.launcher_config.get('coords', 4)
-        self.num = self.launcher_config.get('num', 5)
-        self.anchors = get_or_parse_value(self.launcher_config.get('anchors', 'yolo_v2'), PRECOMPUTED_ANCHORS)
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-        Returns:
-            list of DetectionPrediction objects
-        """
-        predictions = self._extract_predictions(raw, frame_meta)[self.output_blob]
-
-        cells_x, cells_y = 13, 13
-
-        result = []
-        for identifier, prediction in zip(identifiers, predictions):
-            labels, scores, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], [], []
-            for y, x, n in np.ndindex((cells_y, cells_x, self.num)):
-                index = n * cells_y * cells_x + y * cells_x + x
-
-                box_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, 0)
-                obj_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, self.coords)
-
-                scale = prediction[obj_index]
-
-                box = [
-                    (x + prediction[box_index + 0 * (cells_y * cells_x)]) / cells_x,
-                    (y + prediction[box_index + 1 * (cells_y * cells_x)]) / cells_y,
-                    np.exp(prediction[box_index + 2 * (cells_y * cells_x)]) * self.anchors[2 * n + 0] / cells_x,
-                    np.exp(prediction[box_index + 3 * (cells_y * cells_x)]) * self.anchors[2 * n + 1] / cells_y
-                ]
-
-                classes_prob = np.empty(self.classes)
-                for cls in range(self.classes):
-                    cls_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, self.coords + 1 + cls)
-                    classes_prob[cls] = prediction[cls_index]
-
-                classes_prob = classes_prob * scale
-
-                label = np.argmax(classes_prob)
-
-                labels.append(label)
-                scores.append(classes_prob[label])
-                x_mins.append(box[0] - box[2] / 2.0)
-                y_mins.append(box[1] - box[3] / 2.0)
-                x_maxs.append(box[0] + box[2] / 2.0)
-                y_maxs.append(box[1] + box[3] / 2.0)
-
-            result.append(DetectionPrediction(identifier, labels, scores, x_mins, y_mins, x_maxs, y_maxs))
-
-        return result
-
-
-class YoloV3AdapterConfig(BaseYoloAdapterConfig):
-    threshold = NumberField(floats=True, optional=True, min_value=0)
-    outputs = ListField(optional=True)
-
-
-class YoloV3Adapter(Adapter):
-    """
-    Class for converting output of YOLO v3 family models to DetectionPrediction representation
-    """
-    __provider__ = 'yolo_v3'
-
-    def validate_config(self):
-        yolo_v3_adapter_config = YoloV3AdapterConfig('YoloV3Adapter_Config')
-        yolo_v3_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        self.classes = self.launcher_config.get('classes', 80)
-        self.coords = self.launcher_config.get('coords', 4)
-        self.num = self.launcher_config.get('num', 3)
-        self.anchors = get_or_parse_value(self.launcher_config.get('anchors', 'yolo_v3'), PRECOMPUTED_ANCHORS)
-        self.threshold = self.launcher_config.get('threshold', 0.001)
-        self.outputs = self.launcher_config.get('outputs', [])
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-        Returns:
-            list of DetectionPrediction objects
-        """
-
-        def get_anchors_offset(x):
-            return int((self.num * 2) * (len(self.anchors) / (self.num * 2) - 1 - math.log2(x / 13)))
-
-        def parse_yolo_v3_results(prediction, threshold, w, h, det):
-            cells_x, cells_y = prediction.shape[1:]
-            prediction = prediction.flatten()
-            for y, x, n in np.ndindex((cells_y, cells_x, self.num)):
-                index = n * cells_y * cells_x + y * cells_x + x
-                anchors_offset = get_anchors_offset(cells_x)
-
-                box_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, 0)
-                obj_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, self.coords)
-
-                scale = prediction[obj_index]
-                if scale < threshold:
-                    continue
-
-                box = [
-                    (x + prediction[box_index + 0 * (cells_y * cells_x)]) / cells_x,
-                    (y + prediction[box_index + 1 * (cells_y * cells_x)]) / cells_y,
-                    np.exp(prediction[box_index + 2 * (cells_y * cells_x)]) * self.anchors[
-                        anchors_offset + 2 * n + 0] / w,
-                    np.exp(prediction[box_index + 3 * (cells_y * cells_x)]) * self.anchors[
-                        anchors_offset + 2 * n + 1] / h
-                ]
-
-                classes_prob = np.empty(self.classes)
-                for cls in range(self.classes):
-                    cls_index = entry_index(cells_x, cells_y, self.coords, self.classes, index,
-                                            self.coords + 1 + cls)
-                    classes_prob[cls] = prediction[cls_index] * scale
-
-                    det['labels'].append(cls)
-                    det['scores'].append(classes_prob[cls])
-                    det['x_mins'].append(box[0] - box[2] / 2.0)
-                    det['y_mins'].append(box[1] - box[3] / 2.0)
-                    det['x_maxs'].append(box[0] + box[2] / 2.0)
-                    det['y_maxs'].append(box[1] + box[3] / 2.0)
-
-            return det
-
-        result = []
-
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-
-        if self.outputs:
-            outputs = self.outputs
-        else:
-            outputs = raw_outputs.keys()
-
-        batch = len(identifiers)
-        predictions = [[] for _ in range(batch)]
-        for blob in outputs:
-            for b in range(batch):
-                predictions[b].append(raw_outputs[blob][b])
-
-        for identifier, prediction, meta in zip(identifiers, predictions, frame_meta):
-            detections = {'labels': [], 'scores': [], 'x_mins': [], 'y_mins': [], 'x_maxs': [], 'y_maxs': []}
-            input_shape = list(meta.get('input_shape', {'data': (3, 416, 416)}).values())[0]
-            self.input_width = input_shape[2]
-            self.input_height = input_shape[1]
-
-            for p in prediction:
-                parse_yolo_v3_results(p, self.threshold, self.input_width, self.input_height, detections)
-
-            result.append(DetectionPrediction(
-                identifier, detections['labels'], detections['scores'], detections['x_mins'], detections['y_mins'],
-                detections['x_maxs'], detections['y_maxs']
-            ))
-
-        return result
-
-
-class SSDAdapter(Adapter):
-    """
-    Class for converting output of SSD model to DetectionPrediction representation
-    """
-    __provider__ = 'ssd'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-        Returns:
-            list of DetectionPrediction objects
-        """
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-        prediction_batch = raw_outputs[self.output_blob]
-        prediction_count = prediction_batch.shape[2]
-        prediction_batch = prediction_batch.reshape(prediction_count, -1)
-        prediction_batch = self.remove_empty_detections(prediction_batch)
-
-        result = []
-        for batch_index, identifier in enumerate(identifiers):
-            prediction_mask = np.where(prediction_batch[:, 0] == batch_index)
-            detections = prediction_batch[prediction_mask]
-            detections = detections[:, 1::]
-            result.append(DetectionPrediction(identifier, *zip(*detections)))
-
-        return result
-
-    @staticmethod
-    def remove_empty_detections(prediction_blob):
-        ind = prediction_blob[:, 0]
-        ind_ = np.where(ind == -1)[0]
-        m = ind_[0] if ind_.size else prediction_blob.shape[0]
-        return prediction_blob[:m, :]
-
-
-class PyTorchSSDDecoderConfig(ConfigValidator):
-    type = StringField()
-    scores_out = StringField()
-    boxes_out = StringField()
-    confidence_threshold = NumberField(optional=True)
-    nms_threshold = NumberField(optional=True)
-    keep_top_k = NumberField(optional=True, floats=False)
-
-
-class PyTorchSSDDecoder(Adapter):
-    """
-    Class for converting output of PyTorch SSD models to DetectionPrediction representation
-    """
-    __provider__ = 'pytorch_ssd_decoder'
-
-    def validate_config(self):
-        config_validator = PyTorchSSDDecoderConfig(
-            'PyTorchSSD_decoder_config', PyTorchSSDDecoderConfig.ERROR_ON_EXTRA_ARGUMENT
-        )
-
-        config_validator.validate(self.launcher_config)
-
-    def configure(self):
-        self.scores_out = self.launcher_config['scores_out']
-        self.boxes_out = self.launcher_config['boxes_out']
-        self.confidence_threshold = self.launcher_config.get('confidence_threshold', 0.05)
-        self.nms_threshold = self.launcher_config.get('nms_threshold', 0.5)
-        self.keep_top_k = self.launcher_config.get('keep_top_k', 200)
-
-        # Set default values according to:
-        # https://github.com/mlperf/inference/tree/master/cloud/single_stage_detector
-        self.aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
-        self.feat_size = [[50, 50], [25, 25], [13, 13], [7, 7], [3, 3], [3, 3]]
-        self.scales = [21, 45, 99, 153, 207, 261, 315]
-        self.strides = [3, 3, 2, 2, 2, 2]
-        self.scale_xy = 0.1
-        self.scale_wh = 0.2
-
-    @staticmethod
-    def softmax(x, axis=0):
-        return np.transpose(np.transpose(np.exp(x)) * np.reciprocal(np.sum(np.exp(x), axis=axis)))
-
-    @staticmethod
-    def default_boxes(fig_size, feat_size, scales, aspect_ratios):
-
-        fig_size_w, fig_size_h = fig_size
-        scales = [(int(s * fig_size_w / 300), int(s * fig_size_h / 300)) for s in scales]
-        fkw, fkh = np.transpose(feat_size)
-
-        default_boxes = []
-        for idx, sfeat in enumerate(feat_size):
-            sfeat_w, sfeat_h = sfeat
-            sk1 = scales[idx][0] / fig_size_w
-            sk2 = scales[idx + 1][1] / fig_size_h
-            sk3 = math.sqrt(sk1 * sk2)
-            all_sizes = [(sk1, sk1), (sk3, sk3)]
-            for alpha in aspect_ratios[idx]:
-                w, h = sk1 * math.sqrt(alpha), sk1 / math.sqrt(alpha)
-                all_sizes.append((w, h))
-                all_sizes.append((h, w))
-            for w, h in all_sizes:
-                for i, j in itertools.product(range(sfeat_w), range(sfeat_h)):
-                    cx, cy = (j + 0.5) / fkh[idx], (i + 0.5) / fkw[idx]
-                    default_boxes.append((cx, cy, w, h))
-        default_boxes = np.clip(default_boxes, 0, 1)
-
-        return default_boxes
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-        Returns:
-            list of DetectionPrediction objects
-        """
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-
-        batch_scores = raw_outputs[self.scores_out]
-        batch_boxes = raw_outputs[self.boxes_out]
-
-        result = []
-        for identifier, scores, boxes, meta in zip(identifiers, batch_scores, batch_boxes, frame_meta):
-            detections = {'labels': [], 'scores': [], 'x_mins': [], 'y_mins': [], 'x_maxs': [], 'y_maxs': []}
-            image_info = meta.get("image_size")[0:2]
-
-            # Default boxes
-            dboxes = self.default_boxes(image_info, self.feat_size, self.scales, self.aspect_ratios)
-
-            # Scores
-            scores = np.transpose(scores)
-            scores = self.softmax(scores, axis=1)
-
-            # Boxes
-            boxes = np.transpose(boxes)
-            boxes[:, :2] = self.scale_xy * boxes[:, :2]
-            boxes[:, 2:] = self.scale_wh * boxes[:, 2:]
-            boxes[:, :2] = boxes[:, :2] * dboxes[:, 2:] + dboxes[:, :2]
-            boxes[:, 2:] = np.exp(boxes[:, 2:]) * dboxes[:, 2:]
-
-            for label, score in enumerate(np.transpose(scores)):
-
-                # Skip background label
-                if label == 0:
-                    continue
-
-                # Filter out detections with score < confidence_threshold
-                mask = score > self.confidence_threshold
-                filtered_boxes, filtered_score = boxes[mask, :], score[mask]
-                if filtered_score.size == 0:
-                    continue
-
-                # Transform to format (x_min, y_min, x_max, y_max)
-                x_mins = (filtered_boxes[:, 0] - 0.5 * filtered_boxes[:, 2])
-                y_mins = (filtered_boxes[:, 1] - 0.5 * filtered_boxes[:, 3])
-                x_maxs = (filtered_boxes[:, 0] + 0.5 * filtered_boxes[:, 2])
-                y_maxs = (filtered_boxes[:, 1] + 0.5 * filtered_boxes[:, 3])
-
-                # Apply NMS
-                keep = NMS.nms(x_mins, y_mins, x_maxs, y_maxs, filtered_score, self.nms_threshold,
-                               include_boundaries=False, keep_top_k=self.keep_top_k)
-
-                filtered_score = filtered_score[keep]
-                x_mins = x_mins[keep]
-                y_mins = y_mins[keep]
-                x_maxs = x_maxs[keep]
-                y_maxs = y_maxs[keep]
-
-                # Keep topK
-                # Applied just after NMS - no additional sorting is required for filtered_score array
-                filtered_score = filtered_score[:self.keep_top_k]
-                x_mins = x_mins[:self.keep_top_k]
-                y_mins = y_mins[:self.keep_top_k]
-                x_maxs = x_maxs[:self.keep_top_k]
-                y_maxs = y_maxs[:self.keep_top_k]
-
-                # Save detections
-                labels = np.full_like(filtered_score, label)
-                detections['labels'].extend(labels)
-                detections['scores'].extend(filtered_score)
-                detections['x_mins'].extend(x_mins)
-                detections['y_mins'].extend(y_mins)
-                detections['x_maxs'].extend(x_maxs)
-                detections['y_maxs'].extend(y_maxs)
-
-            result.append(
-                DetectionPrediction(
-                    identifier, detections['labels'], detections['scores'], detections['x_mins'],
-                    detections['y_mins'], detections['x_maxs'], detections['y_maxs']
-                )
-            )
-
-        return result
-
-
-class FacePersonDetectionAdapterConfig(ConfigValidator):
-    type = StringField()
-    face_out = StringField()
-    person_out = StringField()
-
-
-class FacePersonAdapter(Adapter):
-    __provider__ = 'face_person_detection'
-
-    def validate_config(self):
-        face_person_detection_adapter_config = FacePersonDetectionAdapterConfig(
-            'FacePersonDetection_Config', on_extra_argument=FacePersonDetectionAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
-        face_person_detection_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        self.face_detection_out = self.launcher_config['face_out']
-        self.person_detection_out = self.launcher_config['person_out']
-        self.face_adapter = SSDAdapter(self.launcher_config, self.label_map, self.face_detection_out)
-        self.person_adapter = SSDAdapter(self.launcher_config, self.label_map, self.person_detection_out)
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        face_batch_result = self.face_adapter.process(raw, identifiers)
-        person_batch_result = self.person_adapter.process(raw, identifiers)
-        result = [ContainerPrediction({self.face_detection_out: face_result, self.person_detection_out: person_result})
-                  for face_result, person_result in zip(face_batch_result, person_batch_result)]
-
-        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/dummy_adapters.py b/tools/accuracy_checker/accuracy_checker/adapters/dummy_adapters.py
deleted file mode 100644 (file)
index 300dec9..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..representation import DetectionPrediction
-from ..adapters import Adapter
-
-
-class XML2DetectionAdapter(Adapter):
-    """
-    Class for converting xml detection results in OpenCV FileStorage format to DetectionPrediction representation.
-    """
-
-    __provider__ = 'xml_detection'
-
-    def process(self, tree, identifiers=None, frame_meta=None):
-        class_to_ind = dict(zip(self.label_map.values(), range(len(self.label_map.values()))))
-
-        result = {}
-        for frames in tree.getroot():
-            for frame in frames:
-                identifier = frame.tag + '.png'
-                labels, scores, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], [], []
-                for prediction in frame:
-                    if prediction.find('is_ignored'):
-                        continue
-
-                    label = prediction.find('type')
-                    if not label:
-                        raise ValueError('Detection predictions contains detection without "{}"'.format('type'))
-                    label = class_to_ind[label.text]
-
-                    confidence = prediction.find('confidence')
-                    if confidence is None:
-                        raise ValueError('Detection predictions contains detection without "{}"'.format('confidence'))
-                    confidence = float(confidence.text)
-
-                    box = prediction.find('roi')
-                    if not box:
-                        raise ValueError('Detection predictions contains detection without "{}"'.format('roi'))
-                    box = list(map(float, box.text.split()))
-
-                    labels.append(label)
-                    scores.append(confidence)
-                    x_mins.append(box[0])
-                    y_mins.append(box[1])
-                    x_maxs.append(box[0] + box[2])
-                    y_maxs.append(box[1] + box[3])
-
-                    result[identifier] = DetectionPrediction(identifier, labels, scores, x_mins, y_mins, x_maxs, y_maxs)
-
-        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/hit_ratio.py b/tools/accuracy_checker/accuracy_checker/adapters/hit_ratio.py
deleted file mode 100644 (file)
index f28b84f..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..adapters import Adapter
-from ..representation import HitRatioPrediction
-
-
-class HitRatioAdapter(Adapter):
-    """
-    Class for converting output of NCF model to HitRatioPrediction representation.
-    """
-
-    __provider__ = 'hit_ratio_adapter'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            raw: output of model.
-            identifiers: list of input data identifiers.
-            frame_meta: metadata for frame.
-        Returns:
-            list of HitRatioPrediction objects.
-        """
-
-        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
-        prediction = np.reshape(prediction, -1)
-
-        result = []
-        for identifier, output in zip(identifiers, prediction):
-            result.append(HitRatioPrediction(identifier, output))
-
-        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/image_processing.py b/tools/accuracy_checker/accuracy_checker/adapters/image_processing.py
deleted file mode 100644 (file)
index 21ecec3..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..adapters import Adapter
-from ..representation import SuperResolutionPrediction
-
-
-class SuperResolutionAdapter(Adapter):
-    __provider__ = 'super_resolution'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-        for identifier, img_sr in zip(identifiers, raw_outputs[self.output_blob]):
-            img_sr *= 255
-            img_sr = np.clip(img_sr, 0., 255.)
-            img_sr = img_sr.transpose((1, 2, 0)).astype(np.uint8)
-            result.append(SuperResolutionPrediction(identifier, img_sr))
-
-        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/pose_estimation.py b/tools/accuracy_checker/accuracy_checker/adapters/pose_estimation.py
deleted file mode 100644 (file)
index 25350f5..0000000
+++ /dev/null
@@ -1,331 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import math
-from operator import itemgetter
-
-import cv2
-import numpy as np
-
-from ..adapters import Adapter
-from ..config import ConfigValidator, StringField
-from ..representation import PoseEstimationPrediction
-
-
-class HumanPoseAdapterConfig(ConfigValidator):
-    type = StringField()
-    part_affinity_fields_out = StringField()
-    keypoints_heatmap_out = StringField()
-
-
-class HumanPoseAdapter(Adapter):
-    __provider__ = 'human_pose_estimation'
-
-    limb_seq = [
-        [2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13],
-        [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]
-    ]
-    map_idx = [
-        [31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], [23, 24], [25, 26],
-        [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], [55, 56], [37, 38], [45, 46]
-    ]
-
-    def validate_config(self):
-        human_pose_estimation_config = HumanPoseAdapterConfig('HumanPose_Config')
-        human_pose_estimation_config.validate(self.launcher_config)
-
-    def configure(self):
-        self.part_affinity_fields = self.launcher_config['part_affinity_fields_out']
-        self.keypoints_heatmap = self.launcher_config['keypoints_heatmap_out']
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-        raw_output = zip(
-            identifiers, raw_outputs[self.keypoints_heatmap],
-            raw_outputs[self.part_affinity_fields], frame_meta
-        )
-        for identifier, heatmap, paf, meta in raw_output:
-            height, width, _ = meta['image_size']
-            heatmap_avg = np.zeros((height, width, 19), dtype=np.float32)
-            paf_avg = np.zeros((height, width, 38), dtype=np.float32)
-            pad = meta.get('padding', [0, 0, 0, 0])
-            heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0))
-            heatmap = cv2.resize(heatmap, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC)
-            heatmap = heatmap[pad[0]:heatmap.shape[0] - pad[2], pad[1]:heatmap.shape[1] - pad[3]:, :]
-            heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC)
-            heatmap_avg = heatmap_avg + heatmap
-
-            paf = np.transpose(np.squeeze(paf), (1, 2, 0))
-            paf = cv2.resize(paf, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC)
-            paf = paf[pad[0]:paf.shape[0] - pad[2], pad[1]:paf.shape[1] - pad[3], :]
-            paf = cv2.resize(paf, (width, height), interpolation=cv2.INTER_CUBIC)
-            paf_avg = paf_avg + paf
-
-            peak_counter = 0
-            all_peaks = []
-            for part in range(0, 18):  # 19th for bg
-                peak_counter += self.find_peaks(heatmap_avg[:, :, part], all_peaks, peak_counter)
-
-            subset, candidate = self.group_peaks(all_peaks, paf_avg)
-            result.append(PoseEstimationPrediction(identifier, *self.get_poses(subset, candidate)))
-
-        return result
-
-    @staticmethod
-    def find_peaks(heatmap, all_peaks, prev_peak_counter):
-        heatmap[heatmap < 0.1] = 0
-        map_aug = np.zeros((heatmap.shape[0] + 2, heatmap.shape[1] + 2))
-        map_left = np.zeros(map_aug.shape)
-        map_right = np.zeros(map_aug.shape)
-        map_up = np.zeros(map_aug.shape)
-        map_down = np.zeros(map_aug.shape)
-
-        map_aug[1:map_aug.shape[0] - 1, 1:map_aug.shape[1] - 1] = heatmap
-        map_left[1:map_aug.shape[0] - 1, :map_aug.shape[1] - 2] = heatmap
-        map_right[1:map_aug.shape[0] - 1, 2:map_aug.shape[1]] = heatmap
-        map_up[:map_aug.shape[0] - 2, 1:map_aug.shape[1] - 1] = heatmap
-        map_down[2:map_aug.shape[0], 1:map_aug.shape[1] - 1] = heatmap
-
-        peaks_binary = (map_aug > map_left) & (map_aug > map_right) & (map_aug > map_up) & (map_aug > map_down)
-        peaks_binary = peaks_binary[1:map_aug.shape[0] - 1, 1:map_aug.shape[1] - 1]
-        peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]))
-        peaks = sorted(peaks, key=itemgetter(0))  # same order with matlab
-
-        flag = np.ones(len(peaks), np.uint8)
-        peaks_with_score_and_id = []
-        peak_counter = 0
-        for i, _ in enumerate(peaks):
-            if flag[i] != 1:
-                continue
-            for j in range(i + 1, len(peaks)):
-                if math.sqrt((peaks[i][0] - peaks[j][0]) ** 2 + (peaks[i][1] - peaks[j][1]) ** 2) < 6:
-                    flag[j] = 0
-            peak_id = peak_counter + prev_peak_counter
-            peak_counter += 1
-            peaks_with_score_and_id.append([peaks[i][0], peaks[i][1], heatmap[peaks[i][1], peaks[i][0]], peak_id])
-        all_peaks.append(peaks_with_score_and_id)
-
-        return peak_counter
-
-    @staticmethod
-    def _add_pose_single_candidate(subset, candidate, idx_joint, kpt_num=20):
-        for joint in candidate:
-            num = 0
-            for subset_j in subset:  # check if already in some pose, was added as a part of another limb
-                if subset_j[idx_joint] == joint[3]:
-                    num += 1
-                    continue
-            if num == 0:
-                person_keypoints = np.ones(kpt_num) * -1
-                person_keypoints[idx_joint] = joint[3]  # joint idx
-                person_keypoints[-1] = 1  # n joints in pose
-                person_keypoints[-2] = joint[2]  # pose score
-                subset.append(person_keypoints)
-
-        return subset
-
-    @staticmethod
-    def _filter_subset(subset):
-        filtered_subset = []
-        for subset_element in subset:
-            if subset_element[-1] < 3 or (subset_element[-2] / subset_element[-1] < 0.2):
-                continue
-            filtered_subset.append(subset_element)
-
-        return np.asarray(filtered_subset)
-
-    @staticmethod
-    def _add_pose_both_candidates(subset, temp, index_a, index_b, candidates, kpt_num=20):
-        for i, temp_i in enumerate(temp):
-            num = 0
-            for j, subset_j in enumerate(subset):
-                if subset_j[index_a] == temp_i[0]:
-                    subset[j][index_b] = temp[i][1]
-                    num += 1
-                    subset[j][-1] += 1
-                    subset[j][-2] += candidates[temp_i[1], 2] + temp_i[2]
-            if num == 0:
-                person_keypoints = np.ones(kpt_num) * -1
-                person_keypoints[index_a] = temp[i][0]
-                person_keypoints[index_b] = temp[i][1]
-                person_keypoints[-1] = 2
-                person_keypoints[-2] = np.sum(candidates[temp_i[0:2], 2]) + temp_i[2]
-                subset.append(person_keypoints)
-
-        return subset
-
-    @staticmethod
-    def _copy_temperature_to_subset(subset, temp, index_a, index_b):
-        for _, temp_i in enumerate(temp):
-            for j, subset_j in enumerate(subset):
-                check_subset_a = subset_j[index_a] == temp_i[0] and subset_j[index_b] == -1
-                check_subset_b = subset_j[index_b] == temp_i[1] and subset_j[index_a] == -1
-                if check_subset_a:
-                    subset[j][index_b] = temp_i[1]
-                    continue
-                if check_subset_b:
-                    subset[j][index_a] = temp_i[0]
-
-        return subset
-
-    @staticmethod
-    def _get_temperature(cand_a_, cand_b_, score_mid, pafs, threshold=0.05):
-        temp_ = []
-        for index_a_, cand_a_element in enumerate(cand_a_):
-            for index_b_, cand_b_element in enumerate(cand_b_):
-                mid_point = [(
-                    int(round((cand_a_element[0] + cand_b_element[0]) * 0.5)),
-                    int(round((cand_a_element[1] + cand_b_element[1]) * 0.5))
-                )] * 2
-                vec = [cand_b_element[0] - cand_a_element[0], cand_b_element[1] - cand_a_element[1]]
-                norm_vec = math.sqrt(vec[0] ** 2 + vec[1] ** 2)
-                if norm_vec == 0:
-                    continue
-                vec[0] /= norm_vec
-                vec[1] /= norm_vec
-                score_mid_a = score_mid[mid_point[0][1], mid_point[0][0], 0]
-                score_mid_b = score_mid[mid_point[1][1], mid_point[1][0], 1]
-                score = vec[0] * score_mid_a + vec[1] * score_mid_b
-
-                height_n = pafs.shape[0] // 2
-                suc_ratio = 0
-                mid_score = 0
-                mid_num = 10  # n points for integral over paf
-
-                if score > -100:
-                    p_sum = 0
-                    p_count = 0
-
-                    x = np.linspace(cand_a_element[0], cand_b_element[0], mid_num)
-                    y = np.linspace(cand_a_element[1], cand_b_element[1], mid_num)
-                    for point_idx in range(0, mid_num):
-                        px = int(round(x[point_idx]))
-                        py = int(round(y[point_idx]))
-                        pred = score_mid[py, px, 0:2]
-                        score = vec[0] * pred[0] + vec[1] * pred[1]
-                        if score > threshold:
-                            p_sum += score
-                            p_count += 1
-                    suc_ratio = p_count / mid_num
-                    ratio = 0
-                    if p_count > 0:
-                        ratio = p_sum / p_count
-                    mid_score = ratio + min(height_n / norm_vec - 1, 0)
-                if mid_score > 0 and suc_ratio > 0.8:
-                    score = mid_score
-                    score_all = score + cand_a_element[2] + cand_b_element[2]
-                    temp_.append([index_a_, index_b_, score, score_all])
-        if temp_:
-            temp_ = sorted(temp_, key=itemgetter(2), reverse=True)
-
-        return temp_
-
-    def _get_connections(self, cand_a, cand_b, score_mid, pafs, thresh):
-        temp_ = self._get_temperature(cand_a, cand_b, score_mid, pafs, thresh)
-        num_limbs = min(len(cand_a), len(cand_b))
-        cnt = 0
-        occur_a = np.zeros(len(cand_a), dtype=np.int32)
-        occur_b = np.zeros(len(cand_b), dtype=np.int32)
-        connections = []
-        for row_temp in temp_:
-            if cnt == num_limbs:
-                break
-            i, j, score = row_temp[0:3]
-            if occur_a[i] == 0 and occur_b[j] == 0:
-                connections.append([cand_a[i][3], cand_b[j][3], score])
-                cnt += 1
-                occur_a[i] = 1
-                occur_b[j] = 1
-        return connections
-
-    def group_peaks(self, peaks, pafs, kpt_num=20, threshold=0.05):
-        subset = []
-        candidates = np.array([item for sublist in peaks for item in sublist])
-        for keypoint_id, maped_keypoints in enumerate(self.map_idx):
-            score_mid = pafs[:, :, [x - 19 for x in maped_keypoints]]
-            candidate_a = peaks[self.limb_seq[keypoint_id][0] - 1]
-            candidate_b = peaks[self.limb_seq[keypoint_id][1] - 1]
-            idx_joint_a = self.limb_seq[keypoint_id][0] - 1
-            idx_joint_b = self.limb_seq[keypoint_id][1] - 1
-
-            if not candidate_a and not candidate_b:  # no such limb
-                continue
-            if not candidate_a:  # limb has just B joint
-                subset = self._add_pose_single_candidate(subset, candidate_b, idx_joint_b, kpt_num)
-                continue
-            if not candidate_b:  # limb has just A joint
-                subset = self._add_pose_single_candidate(subset, candidate_a, idx_joint_a, kpt_num)
-                continue
-
-            temp = self._get_connections(candidate_a, candidate_b, score_mid, pafs, threshold)
-            if not temp:
-                continue
-
-            if keypoint_id == 0:
-                subset = [np.ones(kpt_num) * -1 for _ in temp]
-                for i, temp_i in enumerate(temp):
-                    subset[i][self.limb_seq[0][0] - 1] = temp_i[0]
-                    subset[i][self.limb_seq[0][1] - 1] = temp_i[1]
-                    subset[i][-1] = 2
-                    subset[i][-2] = np.sum(candidates[temp_i[0:2], 2]) + temp_i[2]
-            else:
-                index_a = self.limb_seq[keypoint_id][0] - 1
-                index_b = self.limb_seq[keypoint_id][1] - 1
-                if keypoint_id in (17, 18):
-                    subset = self._copy_temperature_to_subset(subset, temp, index_a, index_b)
-                    continue
-                subset = self._add_pose_both_candidates(subset, temp, index_a, index_b, candidates, kpt_num)
-
-        return self._filter_subset(subset), candidates
-
-    @staticmethod
-    def get_poses(subset, candidate):
-        persons_keypoints_x, persons_keypoints_y, persons_keypoints_v = [], [], []
-        scores = []
-        for subset_element in subset:
-            if subset_element.size == 0:
-                continue
-            keypoints_x, keypoints_y, keypoints_v = [0] * 17, [0] * 17, [0] * 17
-            to_coco_map = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
-            person_score = subset_element[-2]
-            position_id = -1
-            for keypoint_id in subset_element[:-2]:
-                position_id += 1
-                if position_id == 1:  # No 'Neck' in COCO
-                    continue
-
-                cx, cy, visibility = 0, 0, 0  # Keypoint not found
-                if keypoint_id != -1:
-                    cx, cy = candidate[keypoint_id.astype(int), 0:2]
-                    cx = cx - 0.5 + 1  # +1 for matlab consistency, coords start from 1
-                    cy = cy - 0.5 + 1
-                    visibility = 1
-                keypoints_x[to_coco_map[position_id]] = cx
-                keypoints_y[to_coco_map[position_id]] = cy
-                keypoints_v[to_coco_map[position_id]] = visibility
-
-            scores.append(person_score * max(0, (subset_element[-1] - 1)))  # -1 for Neck
-            persons_keypoints_x.append(keypoints_x)
-            persons_keypoints_y.append(keypoints_y)
-            persons_keypoints_v.append(keypoints_v)
-
-        persons_keypoints_x = np.array(persons_keypoints_x)
-        persons_keypoints_y = np.array(persons_keypoints_y)
-        persons_keypoints_v = np.array(persons_keypoints_v)
-        scores = np.array(scores)
-
-        return persons_keypoints_x, persons_keypoints_y, persons_keypoints_v, scores
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/reidentification.py b/tools/accuracy_checker/accuracy_checker/adapters/reidentification.py
deleted file mode 100644 (file)
index f2fed25..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..adapters import Adapter
-from ..representation import ReIdentificationPrediction
-
-
-class ReidAdapter(Adapter):
-    """
-    Class for converting output of Reid model to ReIdentificationPrediction representation
-    """
-    __provider__ = 'reid'
-
-    def configure(self):
-        """
-        Specifies parameters of config entry
-        """
-        self.grn_workaround = self.launcher_config.get("grn_workaround", True)
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        """
-        Args:
-            identifiers: list of input data identifiers
-            raw: output of model
-        Returns:
-            list of ReIdentificationPrediction objects
-        """
-        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
-
-        if self.grn_workaround:
-            # workaround: GRN layer
-            prediction = self._grn_layer(prediction)
-
-        return [ReIdentificationPrediction(identifier, embedding.reshape(-1))
-                for identifier, embedding in zip(identifiers, prediction)]
-
-    @staticmethod
-    def _grn_layer(prediction):
-        GRN_BIAS = 0.000001
-        sum_ = np.sum(prediction ** 2, axis=1)
-        prediction = prediction / np.sqrt(sum_[:, np.newaxis] + GRN_BIAS)
-
-        return prediction
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/segmentation.py b/tools/accuracy_checker/accuracy_checker/adapters/segmentation.py
deleted file mode 100644 (file)
index fcb26c3..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-from ..adapters import Adapter
-from ..representation import SegmentationPrediction, BrainTumorSegmentationPrediction
-
-
-class SegmentationAdapter(Adapter):
-    __provider__ = 'segmentation'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        frame_meta = frame_meta or [] * len(identifiers)
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-        for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
-            result.append(SegmentationPrediction(identifier, output))
-
-        return result
-
-    def _extract_predictions(self, outputs_list, meta):
-        if not 'tiles_shape' in (meta[-1] or {}):
-            return outputs_list[0]
-        tiles_shapes = [meta['tiles_shape'] for meta in meta]
-        restore_output = []
-        offset = 0
-        for _, image_tiles_shape in enumerate(tiles_shapes):
-            next_offset = offset + image_tiles_shape[0] * image_tiles_shape[1]
-            image_tiles = [network_output[self.output_blob] for network_output in outputs_list[offset:next_offset]]
-            tiles_columns = image_tiles[::image_tiles_shape[0]]
-            image = tiles_columns[0]
-            for tile_column in tiles_columns[1:]:
-                image = np.concatenate((image, tile_column), axis=3)
-            restore_output.append(image.squeeze())
-            offset = next_offset
-
-        return {self.output_blob: restore_output}
-
-
-class BrainTumorSegmentationAdapter(Adapter):
-    __provider__ = 'brain_tumor_segmentation'
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        result = []
-        frame_meta = frame_meta or [] * len(identifiers)
-        raw_outputs = self._extract_predictions(raw, frame_meta)
-        for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
-            result.append(BrainTumorSegmentationPrediction(identifier, output))
-
-        return result
-
-    def _extract_predictions(self, outputs_list, meta):
-        if not (meta[-1] or {}).get('multi_infer', False):
-           return outputs_list[0]
-
-        output_keys = list(outputs_list[0].keys())
-        output_map = {}
-        for output_key in output_keys:
-            output_data = [[output[output_key] for output in outputs_list]]
-            output_map[output_key] = output_data
-
-        return output_map
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/text_detection.py b/tools/accuracy_checker/accuracy_checker/adapters/text_detection.py
deleted file mode 100644 (file)
index d90ebfc..0000000
+++ /dev/null
@@ -1,309 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from collections import defaultdict
-
-import cv2
-import numpy as np
-
-
-from ..adapters import Adapter
-from ..config import ConfigValidator, StringField, NumberField, BoolField, ConfigError
-from ..representation import TextDetectionPrediction, CharacterRecognitionPrediction
-
-
-class TextDetectionAdapterConfig(ConfigValidator):
-    type = StringField()
-    pixel_link_out = StringField()
-    pixel_class_out = StringField()
-
-
-class TextDetectionAdapter(Adapter):
-    __provider__ = 'text_detection'
-
-    def validate_config(self):
-        text_detection_adapter_config = TextDetectionAdapterConfig('TextDetectionAdapter_Config')
-        text_detection_adapter_config.validate(self.launcher_config)
-
-    def configure(self):
-        self.pixel_link_out = self.launcher_config['pixel_link_out']
-        self.pixel_class_out = self.launcher_config['pixel_class_out']
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        results = []
-        predictions = self._extract_predictions(raw, frame_meta)
-        raw_output = zip(identifiers, frame_meta, predictions[self.pixel_link_out], predictions[self.pixel_class_out])
-        for identifier, current_frame_meta, link_data, cls_data in raw_output:
-            link_data = link_data.reshape((1, *link_data.shape))
-            cls_data = cls_data.reshape((1, *cls_data.shape))
-            link_data_shape = link_data.shape
-            new_link_data_shape = (link_data_shape[0], link_data_shape[2], link_data_shape[3], link_data_shape[1] / 2)
-            cls_data_shape = cls_data.shape
-            new_cls_data_shape = (cls_data_shape[0], cls_data_shape[2], cls_data_shape[3], cls_data_shape[1] / 2)
-            link_data = self.softmax(link_data.transpose((0, 2, 3, 1)).reshape(-1))[1::2]
-            cls_data = self.softmax(cls_data.transpose((0, 2, 3, 1)).reshape(-1))[1::2]
-            mask = self.decode_image_by_join(cls_data, new_cls_data_shape, link_data, new_link_data_shape)
-            rects = self.mask_to_boxes(mask, current_frame_meta['image_size'])
-            results.append(TextDetectionPrediction(identifier, rects))
-
-        return results
-
-    @staticmethod
-    def softmax(data):
-        for i in np.arange(start=0, stop=data.size, step=2, dtype=int):
-            maximum = max(data[i], data[i + 1])
-            data[i] = np.exp(data[i] - maximum)
-            data[i + 1] = np.exp(data[i + 1] - maximum)
-            sum_data = data[i] + data[i + 1]
-            data[i] /= sum_data
-            data[i + 1] /= sum_data
-
-        return data
-
-    def decode_image_by_join(self, cls_data, cls_data_shape, link_data, link_data_shape):
-        k_cls_conf_threshold = 0.7
-        k_link_conf_threshold = 0.7
-        height = cls_data_shape[1]
-        width = cls_data_shape[2]
-        id_pixel_mask = np.argwhere(cls_data >= k_cls_conf_threshold).reshape(-1)
-        pixel_mask = cls_data >= k_cls_conf_threshold
-        group_mask = {}
-        pixel_mask[id_pixel_mask] = True
-        points = []
-        for i in id_pixel_mask:
-            points.append((i % width, i // width))
-            group_mask[i] = -1
-        link_mask = link_data >= k_link_conf_threshold
-        neighbours = link_data_shape[3]
-        for point in points:
-            neighbour = 0
-            point_x, point_y = point
-            x_neighbours = [point_x - 1, point_x, point_x + 1]
-            y_neighbours = [point_y - 1, point_y, point_y + 1]
-            for neighbour_y in y_neighbours:
-                for neighbour_x in x_neighbours:
-                    if neighbour_x == point_x and neighbour_y == point_y:
-                        continue
-
-                    if neighbour_x < 0 or neighbour_x >= width or neighbour_y < 0 or neighbour_y >= height:
-                        continue
-
-                    pixel_value = np.uint8(pixel_mask[neighbour_y * width + neighbour_x])
-                    link_value = np.uint8(
-                        link_mask[int(point_y * width * neighbours + point_x * neighbours + neighbour)]
-                    )
-
-                    if pixel_value and link_value:
-                        group_mask = self.join(point_x + point_y * width, neighbour_x + neighbour_y * width, group_mask)
-
-                    neighbour += 1
-
-        return self.get_all(points, width, height, group_mask)
-
-    def join(self, point1, point2, group_mask):
-        root1 = self.find_root(point1, group_mask)
-        root2 = self.find_root(point2, group_mask)
-        if root1 != root2:
-            group_mask[root1] = root2
-
-        return group_mask
-
-    def get_all(self, points, width, height, group_mask):
-        root_map = {}
-        mask = np.zeros((height, width))
-
-        for point in points:
-            point_x, point_y = point
-            point_root = self.find_root(point_x + point_y * width, group_mask)
-            if not root_map.get(point_root):
-                root_map[point_root] = int(len(root_map) + 1)
-            mask[point_y, point_x] = root_map[point_root]
-
-        return mask
-
-    @staticmethod
-    def find_root(point, group_mask):
-        root = point
-        update_parent = False
-        while group_mask[root] != -1:
-            root = group_mask[root]
-            update_parent = True
-
-        if update_parent:
-            group_mask[point] = root
-
-        return root
-
-    @staticmethod
-    def mask_to_boxes(mask, image_size):
-        max_val = np.max(mask).astype(int)
-        resized_mask = cv2.resize(
-            mask.astype(np.float32), (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST
-        )
-        bboxes = []
-        for i in range(int(max_val + 1)):
-            bbox_mask = resized_mask == i
-            contours_tuple = cv2.findContours(bbox_mask.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
-            contours = contours_tuple[1] if len(contours_tuple) > 2 else contours_tuple[0]
-            if not contours:
-                continue
-            rect = cv2.minAreaRect(contours[0])
-            _, hw, _ = rect
-            ignored_height = hw[0] >= image_size[0] - 1
-            ignored_width = hw[1] >= image_size[1] - 1
-            if ignored_height or ignored_width:
-                continue
-            box = cv2.boxPoints(rect)
-            bboxes.append(box)
-
-        return bboxes
-
-
-class LPRAdapter(Adapter):
-    __provider__ = 'lpr'
-
-    def configure(self):
-        if not self.label_map:
-            raise ConfigError('LPR adapter requires dataset label map for correct decoding.')
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        raw_output = self._extract_predictions(raw, frame_meta)
-        predictions = raw_output[self.output_blob]
-        result = []
-        for identifier, output in zip(identifiers, predictions):
-            decoded_out = self.decode(output.reshape(-1))
-            result.append(CharacterRecognitionPrediction(identifier, decoded_out))
-
-        return result
-
-    def decode(self, outputs):
-        decode_out = str()
-        for output in outputs:
-            if output == -1:
-                break
-            decode_out += str(self.label_map[output])
-
-        return decode_out
-
-
-class BeamSearchDecoderConfig(ConfigValidator):
-    beam_size = NumberField(optional=True, floats=False, min_value=1)
-    blank_label = NumberField(optional=True, floats=False, min_value=0)
-    softmaxed_probabilities = BoolField(optional=True)
-
-
-class BeamSearchDecoder(Adapter):
-    __provider__ = 'beam_search_decoder'
-
-    def validate_config(self):
-        beam_search_decoder_config = BeamSearchDecoderConfig(
-            'BeamSearchDecoder_Config',
-            BeamSearchDecoderConfig.IGNORE_ON_EXTRA_ARGUMENT
-        )
-        beam_search_decoder_config.validate(self.launcher_config)
-
-    def configure(self):
-        if not self.label_map:
-            raise ConfigError('Beam Search Decoder requires dataset label map for correct decoding.')
-
-        self.beam_size = self.launcher_config.get('beam_size', 10)
-        self.blank_label = self.launcher_config.get('blank_label', len(self.label_map))
-        self.softmaxed_probabilities = self.launcher_config.get('softmaxed_probabilities', False)
-
-    def process(self, raw, identifiers=None, frame_meta=None):
-        raw_output = self._extract_predictions(raw, frame_meta)
-        output = raw_output[self.output_blob]
-        output = np.swapaxes(output, 0, 1)
-
-        result = []
-        for identifier, data in zip(identifiers, output):
-            if self.softmaxed_probabilities:
-                data = np.log(data)
-            seq = self.decode(data, self.beam_size, self.blank_label)
-            decoded = ''.join(str(self.label_map[char]) for char in seq)
-            result.append(CharacterRecognitionPrediction(identifier, decoded))
-        return result
-
-    @staticmethod
-    def decode(probabilities, beam_size=10, blank_id=None):
-        """
-         Decode given output probabilities to sequence of labels.
-        Arguments:
-            probabilities: The output log probabilities for each time step.
-            Should be an array of shape (time x output dim).
-            beam_size (int): Size of the beam to use during decoding.
-            blank_id (int): Index of the CTC blank label.
-        Returns the output label sequence.
-        """
-        def make_new_beam():
-            return defaultdict(lambda: (-np.inf, -np.inf))
-
-        def log_sum_exp(*args):
-            if all(a == -np.inf for a in args):
-                return -np.inf
-            a_max = np.max(args)
-            lsp = np.log(np.sum(np.exp(a - a_max) for a in args))
-
-            return a_max + lsp
-
-        times, symbols = probabilities.shape
-        # Initialize the beam with the empty sequence, a probability of 1 for ending in blank
-        # and zero for ending in non-blank (in log space).
-        beam = [(tuple(), (0.0, -np.inf))]
-
-        for time in range(times):
-            # A default dictionary to store the next step candidates.
-            next_beam = make_new_beam()
-
-            for symbol_id in range(symbols):
-                current_prob = probabilities[time, symbol_id]
-
-                for prefix, (prob_blank, prob_non_blank) in beam:
-                    # If propose a blank the prefix doesn't change.
-                    # Only the probability of ending in blank gets updated.
-                    if symbol_id == blank_id:
-                        next_prob_blank, next_prob_non_blank = next_beam[prefix]
-                        next_prob_blank = log_sum_exp(
-                            next_prob_blank, prob_blank + current_prob, prob_non_blank + current_prob
-                        )
-                        next_beam[prefix] = (next_prob_blank, next_prob_non_blank)
-                        continue
-                    # Extend the prefix by the new character symbol and add it to the beam.
-                    # Only the probability of not ending in blank gets updated.
-                    end_t = prefix[-1] if prefix else None
-                    next_prefix = prefix + (symbol_id,)
-                    next_prob_blank, next_prob_non_blank = next_beam[next_prefix]
-                    if symbol_id != end_t:
-                        next_prob_non_blank = log_sum_exp(
-                            next_prob_non_blank, prob_blank + current_prob, prob_non_blank + current_prob
-                        )
-                    else:
-                        # Don't include the previous probability of not ending in blank (prob_non_blank) if symbol
-                        #  is repeated at the end. The CTC algorithm merges characters not separated by a blank.
-                        next_prob_non_blank = log_sum_exp(next_prob_non_blank, prob_blank + current_prob)
-
-                    next_beam[next_prefix] = (next_prob_blank, next_prob_non_blank)
-                    # If symbol is repeated at the end also update the unchanged prefix. This is the merging case.
-                    if symbol_id == end_t:
-                        next_prob_blank, next_prob_non_blank = next_beam[prefix]
-                        next_prob_non_blank = log_sum_exp(next_prob_non_blank, prob_non_blank + current_prob)
-                        next_beam[prefix] = (next_prob_blank, next_prob_non_blank)
-
-            beam = sorted(next_beam.items(), key=lambda x: log_sum_exp(*x[1]), reverse=True)[:beam_size]
-
-        best = beam[0]
-
-        return best[0]
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/README.md b/tools/accuracy_checker/accuracy_checker/annotation_converters/README.md
deleted file mode 100644 (file)
index ee13679..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-# Annotation Converters
-
-Annotation converter is a function which converts annotation file to suitable for metric evaluation format.
-Each annotation converter expects specific annotation file format or data structure, which depends on original dataset.
-If converter for your data format is not supported by Accuracy Checker, you can provide your own annotation converter.
-Each annotation converter has parameters available for configuration.
-
-Process of conversion can be implemented in two ways:
-* via configuration file
-* via command line
-
-### Describing annotation conversion in configuration file.
-
-Annotation conversion can be provided in `dataset` section your configuration file to convert annotation inplace before every evaluation.
-Each conversion configuration should contain `converter` field filled selected converter name and provide converter specific parameters (more details in supported converters section). All paths can be prefixed via command line with `-s, --source` argument.
-
-You can additionally use optional parameters like:
-* `subsample_size` - Dataset subsample size. You can specify the number of ground truth objects or dataset ration in percentage. Please, be careful to use this option, some datasets does not support subsampling. 
-* `annotation` - path to store converted annotation pickle file. You can use this parameter if you need to reuse converted annotation to avoid subsequent conversions.
-* `meta` - path to store mata information about converted annotation if it is provided.
-
-Example of usage:
-```yaml
-   annotation_conversion:
-     converter: sample
-     data_dir: sample/sample_dataset
-```
-
-
-### Conversing process via command line.
-
-The command line for annotation conversion looks like:
-
-```bash
-python3 convert_annotation.py <converter_name> <converter_specific parameters>
-```
-All converter specific options should have format `--<parameter_name> <parameter_value>`
-You may refer to `-h, --help` to full list of command line options. Some optional arguments are:
-
-* `-o, --output_dir` - directory to save converted annotation and meta info.
-* `-a, --annotation_name` - annotation file name.
-* `-m, --meta_name` - meta info file name.
-
-### Supported converters 
-
-Accuracy Checker supports following list of annotation converters and specific for them parameters:
-* `wider` - converts from Wider Face dataset to `DetectionAnnotation`.
-  * `annotation_file` - path to txt file, which contains ground truth data in WiderFace dataset format.
-  * `label_start` - specifies face label index in label map. Default value is 1. You can provide another value, if you want to use this dataset for separate label validation,
-  in case when your network predicts other class for faces.
-* `sample` - converts annotation for SampleNet to `ClassificationAnnotation`.
-  * `data_dir` - path to sample dataset root directory.
-* `voc07` - converts Pascal VOC 2007 annotation for detection task to `DetectionAnnotation`.
-   * `image_set_file` - path to file with validation image list (for example VOCdevkit/ImageSets/Main/val.txt).
-   * `annotations_dir` - path to directory with annotation files.
-   * `images_dir` - path to directory with images related to devkit root (default JPEGImages).
-  * `has_background` - allows convert dataset with/without adding background_label. Accepted values are True or False. (default is True) 
-* `voc_segmentation` - converts Pascal VOC annotation for semantic segmentation task to `SegmentationAnnotation`.
-  * `image_set_file` - path to file with validation image list (for example VOCdevkit/ImageSets/Segmentation/val.txt).
-  * `images_dir` - path to directory with images related to devkit root (default JPEGImages).
-  * `mask_dir` - path to directory with ground truth segmentation masks related to devkit root (default SegmentationClass).
-* `mars` - converts MARS person reidentification dataset to `ReidentificationAnnotation`.
-  * `data_dir` - path to data directory, where gallery (`bbox_test`) and `query` subdirectories are located.
-* `market1501` - converts Market1501 person reidentification dataset to `ReidentificationAnnotation`.
-  * `data_dir` - path to data directory, where gallery (`bounding_box_test`) and `query` subdirectories are located.
-* `detection_opencv_storage` - converts detection annotation stored in Detection OpenCV storage format to `DetectionAnnotation`.
-  * `annotation_file` - path to annotation in xml format.
-  * `image_names_file` - path to txt file, which contains image name list for dataset.
-  * `label_start` - specifies label index start in label map. Default value is 1. You can provide another value, if you want to use this dataset for separate label validation.
-  * `background_label` - specifies which index will be used for background label. You can not provide this parameter if your dataset has not background label
-* `face_reid_pairwise` - converts Labeled Faces in the Wild dataset for face reidentification to `ReidentificationClassificationAnnotation`.
-  * `pairs_file` - path to file with annotation positive and negative pairs.
-  * `train_file` - path to file with annotation positive and negative pairs used for network train (optional parameter).
-  * `landmarks_file` - path to file with facial landmarks coordinates for annotation images (optional parameter).
-* `landmarks_regression` - converts VGG Face 2 dataset for facial landmarks regression task to `FacialLandmarksAnnotation`.
-  * `landmarks_csv_file` - path to csv file with coordinates of landmarks points.
-  * `bbox_csv_file` - path to cvs file which contains bounding box coordinates for faces (optional parameter).
-* `mapillary_20` - converts Mapillary dataset contained 20 classes to `SegmentationAnnotation`.
-  * `data_dir` - path to dataset root folder. Relative paths to images and masks directory determine as `imgs` and `masks` respectively. In way when images and masks are located in non default directories, you can use parameters described below. 
-  * `images_dir` - path to images folder.
-  * `mask_dir` - path to ground truth mask folder.
-* `mighty` - converts Mighty AI dataset for road segmentation task to `SegmentationAnnotation`.
-  * `annotation_file` -  txt file with paths to images and masks.
-* `cityscapes` - converts CityScapes Dataset to `SegmentationAnnotation`.
-  * `dataset_root_dir` - path to dataset root.
-  * `images_subfolder` - path from dataset root to directory with validation images (Optional, default `imgsFine/leftImg8bit/val`).
-  * `masks_subfolder` - path from dataset root to directory with ground truth masks (Optional, `gtFine/val`).
-  * `masks_suffix` - suffix for mask file names (Optional, default `_gtFine_labelTrainIds`).
-  * `images_suffix` - suffix for image file names (Optional, default `_leftImg8bit`).
-  * `use_full_label_map` - allows to use full label map with 33 classes instead train label map with 18 classes (Optional, default `False`).
-* `super_resolution` - converts dataset for super resolution task to `SuperResolutionAnnotation`.
-  * `data_dir` - path to folder, where images in low and high resolution are located.
-  * `lr_suffix` - low resolution file name's suffix (default lr).
-  * `hr_suffix` - high resolution file name's suffix (default hr).
-* `icdar15_detection` - converts ICDAR15 dataset for text detection  task to `TextDetectionAnnotation`.
-  * `data_dir` - path to folder with annotations on txt format.
-* `icdar13_recognition` - converts ICDAR13 dataset for text recognition task to `CharecterRecognitionAnnotation`.
-  * `annotation_file` - path to annotation file in txt format.
-* `mscoco_detection` - converts MS COCO dataset for object detection task to `DetectionAnnotation`.
-  * `annotation_file` - path ot annotation file in json format.
-  * `has_background` - allows convert dataset with/without adding background_label. Accepted values are True or False. (default is False).
-  * `use_full_label_map` - allows to use original label map (with 91 object categories) from paper instead public available(80 categories).
-* `mscoco_keypoints` - converts MS COCO dataset for keypoints localization task to `PoseEstimationAnnotation`.
-  * `annotation_file` - path ot annotation file in json format.
-* `imagenet` - convert ImageNet dataset for image classification task to `ClassificationAnnotation`.
-  * `annotation_file` - path to annotation in txt format.
-  * `labels_file` - path to file with word description of labels (synset words).
-  * `has_background` - allows to add background label to original labels and convert dataset for 1001 classes instead 1000 (default value is False).
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/__init__.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/__init__.py
deleted file mode 100644 (file)
index d14cb62..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Copyright (c) 2018 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from .format_converter import BaseFormatConverter
-from .convert import make_subset, save_annotation
-from .market1501 import Market1501Converter
-from .mars import MARSConverter
-from .pascal_voc import PascalVOCDetectionConverter
-from .sample_converter import SampleConverter
-from .wider import WiderFormatConverter
-from .detection_opencv_storage import DetectionOpenCVStorageFormatConverter
-from .bitvehicle import BITVehicle, BITVehicleJSON
-from .lfw import FaceReidPairwiseConverter
-from .vgg_face_regression import LandmarksRegression
-from .mighty import MightyFormatConverter
-from .super_resolution_converter import SRConverter
-from .mapillary_20 import Mapillary20Converters
-from .imagenet import ImageNetFormatConverter
-from .icdar import ICDAR13RecognitionDatasetConverter, ICDAR15DetectionDatasetConverter
-from .ms_coco import MSCocoDetectionConverter, MSCocoKeypointsConverter
-from .cityscapes import CityscapesConverter
-from .ncf_converter import NCFConverter
-from .brats import BratsConverter
-
-__all__ = [
-    'BaseFormatConverter',
-    'make_subset',
-    'save_annotation',
-
-    'ImageNetFormatConverter',
-    'Market1501Converter',
-    'SampleConverter',
-    'PascalVOCDetectionConverter',
-    'WiderFormatConverter',
-    'MARSConverter',
-    'DetectionOpenCVStorageFormatConverter',
-    'BITVehicle',
-    'BITVehicleJSON',
-    'FaceReidPairwiseConverter',
-    'LandmarksRegression',
-    'MightyFormatConverter',
-    'SRConverter',
-    'Mapillary20Converters',
-    'ICDAR13RecognitionDatasetConverter',
-    'ICDAR15DetectionDatasetConverter',
-    'MSCocoKeypointsConverter',
-    'MSCocoDetectionConverter',
-    'CityscapesConverter',
-    'NCFConverter',
-    'BratsConverter',
-]
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/_reid_common.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/_reid_common.py
deleted file mode 100644 (file)
index 8bcce97..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from pathlib import Path
-
-from ..representation import ReIdentificationAnnotation
-
-
-def read_directory(directory, query, image_pattern):
-    pids = set()
-    images = []
-    for image in directory.glob("*.jpg"):
-        pid, camid = map(int, image_pattern.search(image.name).groups())
-        if pid == -1:
-            continue
-
-        camid -= 1
-        pids.add(pid)
-
-        identifier = str(Path(directory.name) / image.name)
-        images.append(ReIdentificationAnnotation(identifier, camid, pid, query))
-
-    return images, pids
-
-
-def check_dirs(dirs, parent_dir, arg_name='data_dir'):
-    for directory in dirs:
-        if directory.is_dir():
-            continue
-
-        message_pattern = "{directory} not found in {parent_dir}. Check {arg_name} is pointed to a correct directory"
-        raise FileNotFoundError(message_pattern.format(directory=directory, parent_dir=parent_dir, arg_name=arg_name))
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/bitvehicle.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/bitvehicle.py
deleted file mode 100644 (file)
index 45f61e2..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from pathlib import Path
-
-from ..representation import DetectionAnnotation
-from ..utils import get_key_by_value, read_json, read_xml
-
-from .format_converter import FileBasedAnnotationConverter
-
-
-class BITVehicleJSON(FileBasedAnnotationConverter):
-    __provider__ = 'bitvehicle_json'
-
-    def convert(self):
-        annotations = []
-        for annotation_image in read_json(self.annotation_file):
-            labels, x_mins, y_mins, x_maxs, y_maxs, is_ignored, occluded = [], [], [], [], [], [], []
-            for detection in annotation_image['objects']:
-                x_min, y_min, x_max, y_max = detection['bbox']
-                label = detection['label']
-
-                if label == 'ignored':
-                    for class_ in _CLASS_TO_IND.values():
-                        is_ignored.append(len(labels))
-                        labels.append(class_)
-                        x_mins.append(x_min)
-                        y_mins.append(y_min)
-                        x_maxs.append(x_max)
-                        y_maxs.append(y_max)
-                else:
-                    is_occluded = detection.get('is_occluded', False) or detection.get('occluded', False)
-                    is_difficult = detection.get('difficult', False)
-                    if is_occluded or is_difficult:
-                        occluded.append(len(labels))
-
-                    labels.append(_CLASS_TO_IND[label])
-                    x_mins.append(x_min)
-                    y_mins.append(y_min)
-                    x_maxs.append(x_max)
-                    y_maxs.append(y_max)
-
-            identifier = Path(annotation_image['image']).name
-            annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
-            annotation.metadata['is_occluded'] = occluded
-            annotation.metadata['difficult_boxes'] = is_ignored
-
-            annotations.append(annotation)
-
-        return annotations, get_meta()
-
-
-class BITVehicle(FileBasedAnnotationConverter):
-    __provider__ = 'bitvehicle'
-
-    def convert(self):
-        annotations = []
-        for annotation_image in read_xml(self.annotation_file):
-            if annotation_image.tag != 'image':
-                continue
-
-            identifier = annotation_image.get('name')
-            labels, x_mins, y_mins, x_maxs, y_maxs, occluded = [], [], [], [], [], []
-            for roi in annotation_image.findall('box'):
-                label = roi.get("label")
-                x_left = int(roi.get('xtl'))
-                x_right = int(roi.get('xbr'))
-                y_top = int(roi.get('ytl'))
-                y_bottom = int(roi.get('ybr'))
-                x_min, y_min, x_max, y_max = x_left, y_top, x_right - x_left, y_bottom - y_top
-                is_occluded = bool(int(roi.get('occluded')))
-
-                labels.append(_CLASS_TO_IND[label])
-                x_mins.append(x_min)
-                y_mins.append(y_min)
-                x_maxs.append(x_max)
-                y_maxs.append(y_max)
-                if is_occluded:
-                    occluded.append(len(labels) - 1)
-
-            annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
-            annotation.metadata['is_occluded'] = occluded
-
-            annotations.append(annotation)
-
-        return annotations, get_meta()
-
-
-_CLASSES = (
-    '__background__',  # always index 0
-    'vehicle',
-    'plate'
-)
-
-_CLASS_TO_IND = dict(zip(_CLASSES, list(range(len(_CLASSES)))))
-
-
-def get_meta():
-    labels = dict(enumerate(_CLASSES))
-    labels[-1] = 'ignored'
-
-    return {'label_map': labels, 'background_label': get_key_by_value(labels, '__background__')}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/brats.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/brats.py
deleted file mode 100644 (file)
index e91033d..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from pathlib import Path
-import warnings
-
-from ..representation import BrainTumorSegmentationAnnotation
-from ..utils import get_path
-from ..config import StringField
-from .format_converter import BaseFormatConverter, DirectoryBasedAnnotationConverterConfig
-
-
-class BratsConverterConfig(DirectoryBasedAnnotationConverterConfig):
-    image_folder = StringField(optional=True)
-    mask_folder = StringField(optional=True)
-
-
-class BratsConverter(BaseFormatConverter):
-    __provider__ = 'brats'
-
-    _config_validator_type = BratsConverterConfig
-
-    def configure(self):
-        self.data_dir = self.config['data_dir']
-        self.image_folder = self.config.get('image_folder', 'imagesTr')
-        self.mask_folder = self.config.get('mask_folder', 'labelsTr')
-
-    def convert(self):
-        mask_folder = Path(self.mask_folder)
-        image_folder = Path(self.image_folder)
-        image_dir = get_path(self.data_dir / image_folder, is_directory=True)
-        mask_dir = get_path(self.data_dir / mask_folder, is_directory=True)
-
-        annotations = []
-        for file_in_dir in image_dir.iterdir():
-            file_name = file_in_dir.parts[-1]
-            mask = mask_dir / file_name
-            if not mask.exists():
-                warnings.warn('Annotation mask for {} does not exists. File will be ignored.'.format(file_name))
-                continue
-            annotation = BrainTumorSegmentationAnnotation(
-                str(image_folder / file_name),
-                str(mask_folder / file_name),
-            )
-
-            annotations.append(annotation)
-
-        return annotations, None
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/cityscapes.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/cityscapes.py
deleted file mode 100644 (file)
index 3bda89a..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-from pathlib import Path
-from ..representation import SegmentationAnnotation
-from ..representation.segmentation_representation import GTMaskLoader
-from ..config import PathField, StringField, BoolField
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-train_meta = {
-    'label_map': {
-        0: 'road', 1: 'sidewalk', 2: 'building', 3: 'wall', 4: 'fence', 5: 'pole', 6: 'traffic light',
-        7: 'traffic sign', 8: 'vegetation', 9: 'terrain', 10: 'sky', 11: 'person', 12: 'rider', 13: 'car',
-        14: 'truck', 15: 'bus', 16: 'train', 17: 'motorcycle', 18: 'bicycle'
-    },
-    'segmentation_colors': (
-        (128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153),
-        (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), (255, 0, 0),
-        (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)
-    ),
-}
-
-full_dataset_meta = {
-    'segmentation_colors' : (
-        (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (111, 74, 0), (81, 0, 81), (128, 64, 128),
-        (244, 35, 232), (250, 170, 160), (230, 150, 140), (70, 70, 70), (102, 102, 156), (190, 153, 153),
-        (180, 165, 180), (150, 100, 100), (150, 120, 90), (153, 153, 153), (153, 153, 153), (250, 170, 30),
-        (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), (255, 0, 0), (0, 0, 142),
-        (0, 0, 70), (0, 60, 100), (0, 0, 90), (0, 0, 110), (0, 80, 100), (0, 0, 230), (119, 11, 32)
-    ),
-    'label_map': {
-        0: 'unlabeled', 1:  'ego vehicle', 2: 'rectification border', 3: 'out of roi', 4: 'static', 5: 'dynamic',
-        6: 'ground', 7: 'road', 8: 'sidewalk', 9: 'parking', 10: 'rail track', 11: 'building', 12: 'wall',
-        13: 'fence', 14: 'guard rail', 15: 'bridge', 16: 'tunnel', 17: 'pole', 18: 'polegroup', 19: 'traffic light',
-        20: 'traffic sign', 21: 'vegetation', 22: 'terrain', 23: 'sky', 24: 'person', 25: 'rider', 26: 'car',
-        27: 'truck', 28: 'bus', 29: 'caravan', 30: 'trailer', 31: 'train', 32: 'motorcycle', 33: 'bicycle',
-        -1: 'license plate'
-    }
-}
-
-
-class CityscapesConverterConfig(BaseFormatConverterConfig):
-    dataset_root_dir = PathField(is_directory=True)
-    images_subfolder = StringField(optional=True)
-    masks_subfolder = StringField(optional=True)
-    masks_suffix = StringField(optional=True)
-    images_suffix = StringField(optional=True)
-    use_full_label_map = BoolField(optional=True)
-
-
-class CityscapesConverter(BaseFormatConverter):
-    __provider__ = 'cityscapes'
-
-    _config_validator_type = CityscapesConverterConfig
-
-    def configure(self):
-        self.dataset_root = self.config['dataset_root_dir']
-        self.images_dir = self.config.get('images_subfolder', 'imgsFine/leftImg8bit/val')
-        self.masks_dir = self.config.get('masks_subfolder', 'gtFine/val')
-        self.masks_suffix = self.config.get('masks_suffix', '_gtFine_labelTrainIds')
-        self.images_suffix = self.config.get('images_suffix', '_leftImg8bit')
-        self.use_full_label_map = self.config.get('use_full_label_map', False)
-
-
-    def convert(self):
-        images = list(self.dataset_root.rglob(r'{}/*/*{}.png'.format(self.images_dir, self.images_suffix)))
-        annotations = []
-        for image in images:
-            identifier = str(Path(self.images_dir).joinpath(*image.parts[-2:]))
-            mask = Path(self.masks_dir) / image.parts[-2] / self.masks_suffix.join(
-                str(image.name).split(self.images_suffix)
-            )
-            annotations.append(SegmentationAnnotation(identifier, mask, mask_loader=GTMaskLoader.PILLOW))
-
-        return annotations, full_dataset_meta if self.use_full_label_map else train_meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/convert.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/convert.py
deleted file mode 100644 (file)
index 830f73a..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import warnings
-import json
-from pathlib import Path
-from argparse import ArgumentParser
-from functools import partial
-
-import numpy as np
-
-from ..utils import get_path
-from ..representation import ReIdentificationClassificationAnnotation
-from .format_converter import BaseFormatConverter
-
-
-def build_argparser():
-    parser = ArgumentParser(
-        description="Converts annotation form a arbitrary format to accuracy-checker specific format", add_help=False
-    )
-    parser.add_argument(
-        "converter",
-        help="Specific converter to run",
-        choices=list(BaseFormatConverter.providers.keys())
-    )
-    parser.add_argument(
-        "-o", "--output_dir",
-        help="Directory to save converted annotation and meta info",
-        required=False,
-        type=partial(get_path, is_directory=True)
-    )
-    parser.add_argument("-m", "--meta_name", help="Meta info file name", required=False)
-    parser.add_argument("-a", "--annotation_name", help="Annotation file name", required=False)
-    parser.add_argument("-ss", "--subsample", help="Dataset subsample size", required=False)
-    parser.add_argument("--subsample_seed", help="Seed for generation dataset subsample", type=int, required=False)
-
-    return parser
-
-
-def make_subset(annotation, size, seed=666):
-    def make_subset_pairwise(annotation, size):
-        def get_pairs(pairs_list):
-            pairs_set = set()
-            for identifier in pairs_list:
-                next_annotation = next(
-                    pair_annotation for pair_annotation in annotation if pair_annotation.identifier == identifier
-                )
-                positive_pairs = get_pairs(next_annotation.positive_pairs)
-                negative_pairs = get_pairs(next_annotation.negative_pairs)
-                pairs_set.add(next_annotation)
-                pairs_set.update(positive_pairs)
-                pairs_set.update(negative_pairs)
-            return pairs_set
-
-        subsample_set = set()
-        while len(subsample_set) < size:
-            ann_ind = np.random.choice(len(annotation), 1)
-            annotation_for_subset = annotation[ann_ind[0]]
-            positive_pairs = annotation_for_subset.positive_pairs
-            negative_pairs = annotation_for_subset.negative_pairs
-            if len(positive_pairs) + len(negative_pairs) == 0:
-                continue
-            updated_pairs = set()
-            updated_pairs.add(annotation_for_subset)
-            updated_pairs.update(get_pairs(positive_pairs))
-            updated_pairs.update(get_pairs(negative_pairs))
-            subsample_set.update(updated_pairs)
-        return list(subsample_set)
-
-    np.random.seed(seed)
-    dataset_size = len(annotation)
-    if dataset_size < size:
-        warnings.warn('Dataset size {} less than subset size {}'.format(dataset_size, size))
-        return annotation
-    if isinstance(annotation[-1], ReIdentificationClassificationAnnotation):
-        return make_subset_pairwise(annotation, size)
-
-
-    return list(np.random.choice(annotation, size=size, replace=False))
-
-
-def main():
-    main_argparser = build_argparser()
-    args, _ = main_argparser.parse_known_args()
-    converter, converter_argparser, converter_args = get_converter_arguments(args)
-
-    main_argparser = ArgumentParser(parents=[main_argparser, converter_argparser])
-    args = main_argparser.parse_args()
-
-    converter = configure_converter(converter_args, args, converter)
-    out_dir = args.output_dir or Path.cwd()
-
-    result, meta = converter.convert()
-
-    subsample = args.subsample
-    if subsample:
-        if subsample.endswith('%'):
-            subsample_ratio = float(subsample[:-1]) / 100
-            subsample_size = int(len(result) * subsample_ratio)
-        else:
-            subsample_size = int(args.subsample)
-
-        result = make_subset(result, subsample_size)
-
-    converter_name = converter.get_name()
-    annotation_name = args.annotation_name or "{}.pickle".format(converter_name)
-    meta_name = args.meta_name or "{}.json".format(converter_name)
-
-    annotation_file = out_dir / annotation_name
-    meta_file = out_dir / meta_name
-
-    save_annotation(result, meta, annotation_file, meta_file)
-
-
-def save_annotation(annotation, meta, annotation_file, meta_file):
-    if annotation_file:
-        with annotation_file.open('wb') as file:
-            for representation in annotation:
-                representation.dump(file)
-    if meta_file and meta:
-        with meta_file.open('wt') as file:
-            json.dump(meta, file)
-
-
-def configure_converter(converter_options, args, converter):
-    args_dict, converter_options_dict = vars(args), vars(converter_options)
-    converter_config = {
-        option_name: option_value for option_name, option_value in args_dict.items()
-        if option_name in converter_options_dict and option_value is not None
-    }
-    converter_config['converter'] = args.converter
-    converter.config = converter_config
-    converter.validate_config()
-    converter.configure()
-
-    return converter
-
-
-def get_converter_arguments(arguments):
-    converter = BaseFormatConverter.provide(arguments.converter)
-    converter_argparser = converter.get_argparser()
-    converter_options, _ = converter_argparser.parse_known_args()
-    return converter, converter_argparser, converter_options
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/detection_opencv_storage.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/detection_opencv_storage.py
deleted file mode 100644 (file)
index dfe461a..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from ..config import PathField, NumberField
-from ..representation import DetectionAnnotation
-from ..utils import convert_bboxes_xywh_to_x1y1x2y2, read_xml, read_txt
-
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class DetectionOpenCVConverterConfig(BaseFormatConverterConfig):
-    annotation_file = PathField()
-    image_names_file = PathField(optional=True)
-    label_start = NumberField(floats=False, optional=True)
-    background_label = NumberField(floats=False, optional=True)
-
-
-class DetectionOpenCVStorageFormatConverter(BaseFormatConverter):
-    __provider__ = 'detection_opencv_storage'
-
-    _config_validator_type = DetectionOpenCVConverterConfig
-
-    def configure(self):
-        self.annotation_file = self.config['annotation_file']
-        self.image_names_file = self.config.get('image_names_file')
-        self.label_start = self.config.get('label_start', 1)
-        self.background_label = self.config.get('background_label')
-
-    def convert(self):
-        root = read_xml(self.annotation_file)
-
-        labels_set = self.get_label_set(root)
-
-        labels_set = sorted(labels_set)
-        class_to_ind = dict(zip(labels_set, list(range(self.label_start, len(labels_set) + self.label_start + 1))))
-        label_map = {}
-        for class_label, ind in class_to_ind.items():
-            label_map[ind] = class_label
-
-        annotations = []
-        for frames in root:
-            for frame in frames:
-                identifier = '{}.png'.format(frame.tag)
-                labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
-                difficult_indices = []
-                for annotation in frame:
-                    label = annotation.findtext('type')
-                    if not label:
-                        raise ValueError('"{}" contains detection without "{}"'.format(self.annotation_file, 'type'))
-
-                    box = annotation.findtext('roi')
-                    if not box:
-                        raise ValueError('"{}" contains detection without "{}"'.format(self.annotation_file, 'roi'))
-                    box = list(map(float, box.split()))
-
-                    is_ignored = annotation.findtext('is_ignored', 0)
-                    if int(is_ignored) == 1:
-                        difficult_indices.append(len(labels))
-
-                    labels.append(class_to_ind[label])
-                    x_min, y_min, x_max, y_max = convert_bboxes_xywh_to_x1y1x2y2(*box)
-                    x_mins.append(x_min)
-                    y_mins.append(y_min)
-                    x_maxs.append(x_max)
-                    y_maxs.append(y_max)
-
-                detection_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
-                detection_annotation.metadata['difficult_boxes'] = difficult_indices
-                annotations.append(detection_annotation)
-
-        if self.image_names_file:
-            self.rename_identifiers(annotations, self.image_names_file)
-
-        meta = {}
-        if self.background_label:
-            label_map[self.background_label] = '__background__'
-            meta['background_label'] = self.background_label
-        meta['label_map'] = label_map
-
-        return annotations, meta
-
-    @staticmethod
-    def rename_identifiers(annotation_list, images_file):
-        for annotation, image in zip(annotation_list, read_txt(images_file)):
-            annotation.identifier = image
-
-        return annotation_list
-
-
-    @staticmethod
-    def get_label_set(xml_root):
-        labels_set = set()
-        for frames in xml_root:
-            for frame in frames:
-                for annotation in frame:
-                    label = annotation.findtext('type')
-                    if not label:
-                        raise ValueError('annotation contains detection without label')
-
-                    labels_set.add(label)
-
-        return labels_set
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/format_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/format_converter.py
deleted file mode 100644 (file)
index 20d3381..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-"""
-Copyright (c) 2018 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from argparse import ArgumentParser
-
-from ..config import ConfigValidator, StringField, PathField
-from ..dependency import ClassProvider
-from ..utils import format_key
-
-
-class BaseFormatConverterConfig(ConfigValidator):
-    converter = StringField()
-
-
-class BaseFormatConverter(ClassProvider):
-    __provider_type__ = 'converter'
-
-    _config_validator_type = BaseFormatConverterConfig
-
-    @property
-    def config_validator(self):
-        return self._config_validator_type(
-            '{}_converter_config'.format(self.get_name()),
-            on_extra_argument=self._config_validator_type.ERROR_ON_EXTRA_ARGUMENT
-        )
-
-    def __init__(self, config=None):
-        self.config = config
-        if config:
-            self.validate_config()
-            self.configure()
-
-    def convert(self, *args, **kwargs):
-        """
-        Converts specific annotation format to the ResultRepresentation specific for current dataset/task.
-
-        Returns:
-            annotation: list of ResultRepresentations.
-            meta: meta-data map for the current dataset.
-        """
-        raise NotImplementedError
-
-    @classmethod
-    def get_name(cls):
-        return cls.__provider__
-
-    def get_argparser(self):
-        parser = ArgumentParser(add_help=False)
-        config_validator = self.config_validator
-        fields = config_validator.fields
-        for field_name, field in fields.items():
-            if field_name == 'converter':
-                # it is base argument. Main argparser already use it to get argparser from specific converter.
-                # Converter argparser should contain only converter specific arguments.
-                continue
-
-            required = not field.optional
-            parser.add_argument(
-                format_key(field_name), required=required, type=field.type
-            )
-
-        return parser
-
-    def validate_config(self):
-        self.config_validator.validate(self.config)
-
-    def configure(self):
-        pass
-
-
-class FileBasedAnnotationConverterConfig(BaseFormatConverterConfig):
-    annotation_file = PathField()
-
-
-class FileBasedAnnotationConverter(BaseFormatConverter):
-    _config_validator_type = FileBasedAnnotationConverterConfig
-
-    def configure(self):
-        self.annotation_file = self.config['annotation_file']
-
-    def convert(self, *args, **kwargs):
-        pass
-
-
-class DirectoryBasedAnnotationConverterConfig(BaseFormatConverterConfig):
-    data_dir = PathField(is_directory=True)
-
-
-class DirectoryBasedAnnotationConverter(BaseFormatConverter):
-    _config_validator_type = DirectoryBasedAnnotationConverterConfig
-
-    def configure(self):
-        self.data_dir = self.config['data_dir']
-
-    def convert(self, *args, **kwargs):
-        pass
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/icdar.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/icdar.py
deleted file mode 100644 (file)
index 184ade3..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from ..representation import TextDetectionAnnotation, CharacterRecognitionAnnotation
-from ..utils import read_txt
-from .format_converter import  FileBasedAnnotationConverter, DirectoryBasedAnnotationConverter
-
-
-class ICDAR15DetectionDatasetConverter(DirectoryBasedAnnotationConverter):
-    __provider__ = 'icdar15_detection'
-
-    def convert(self):
-        annotations = []
-
-        for gt_file in self.data_dir.iterdir():
-            gt_file_name = str(gt_file.parts[-1])
-            identifier = '{}.jpg'.format(gt_file_name.split('gt_')[-1].split('.txt')[0])
-            all_points, transcriptions, difficult = [], [], []
-
-            for text_area in read_txt(gt_file):
-                text_annotation = text_area.split(',')
-                transcription = text_annotation[-1]
-                points = np.reshape(list(map(float, text_annotation[:8])), (-1, 2))
-                if transcription == '###':
-                    difficult.append(len(transcriptions))
-                all_points.append(points)
-                transcriptions.append(transcription)
-            annotation = TextDetectionAnnotation(identifier, all_points, transcriptions)
-            annotation.metadata['difficult_boxes'] = difficult
-            annotations.append(annotation)
-
-        return annotations, None
-
-
-class ICDAR13RecognitionDatasetConverter(FileBasedAnnotationConverter):
-    __provider__ = 'icdar13_recognition'
-
-    supported_symbols = '0123456789abcdefghijklmnopqrstuvwxyz'
-
-    def convert(self):
-        annotations = []
-
-        for line in read_txt(self.annotation_file):
-            identifier, text = line.strip().split(' ')
-            annotations.append(CharacterRecognitionAnnotation(identifier, text))
-
-        label_map = {ind: str(key) for ind, key in enumerate(self.supported_symbols)}
-
-        return annotations, {'label_map': label_map, 'blank_label': len(label_map)}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/imagenet.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/imagenet.py
deleted file mode 100644 (file)
index 88df08a..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-import numpy as np
-
-from ..config import PathField, BoolField
-from ..representation import ClassificationAnnotation
-from ..utils import read_txt, get_path
-
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class ImageNetFormatConverterConfig(BaseFormatConverterConfig):
-    annotation_file = PathField()
-    labels_file = PathField(optional=True)
-    has_background = BoolField(optional=True)
-
-
-class ImageNetFormatConverter(BaseFormatConverter):
-    __provider__ = 'imagenet'
-
-    _config_validator_type = ImageNetFormatConverterConfig
-
-    def configure(self):
-        self.annotation_file = self.config['annotation_file']
-        self.labels_file = self.config.get('labels_file')
-        self.has_background = self.config.get('has_background', False)
-
-    def convert(self):
-        annotation = []
-        for image in read_txt(get_path(self.annotation_file)):
-            image_name, label = image.split()
-            label = np.int64(label) if not self.has_background else np.int64(label) + 1
-            annotation.append(ClassificationAnnotation(image_name, label))
-        meta = self._create_meta(self.labels_file, self.has_background) if self.labels_file else None
-
-        return annotation, meta
-
-    @staticmethod
-    def _create_meta(labels_file, has_background=False):
-        meta = {}
-        labels = {}
-        for i, line in enumerate(read_txt(get_path(labels_file))):
-            index_for_label = i if not has_background else i + 1
-            line = line.strip()
-            label = line[line.find(' ') + 1:]
-            labels[index_for_label] = label
-
-        if has_background:
-            labels[0] = 'background'
-            meta['backgound_label'] = 0
-
-        meta['label_map'] = labels
-
-        return meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/lfw.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/lfw.py
deleted file mode 100644 (file)
index 1002daf..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from collections import defaultdict
-from pathlib import Path
-
-from ..config import PathField
-from ..representation import ReIdentificationClassificationAnnotation
-from ..utils import read_txt
-
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class FaceReidPairwiseConverterConfig(BaseFormatConverterConfig):
-    pairs_file = PathField()
-    train_file = PathField(optional=True)
-    landmarks_file = PathField(optional=True)
-
-
-class FaceReidPairwiseConverter(BaseFormatConverter):
-    __provider__ = 'face_reid_pairwise'
-
-    _config_validator_type = FaceReidPairwiseConverterConfig
-
-    def configure(self):
-        self.pairs_file = self.config['pairs_file']
-        self.train_file = self.config.get('train_file')
-        self.landmarks_file = self.config.get('landmarks_file')
-
-    def convert(self):
-        landmarks_map = {}
-        if self.landmarks_file:
-            for landmark_line in read_txt(self.landmarks_file):
-                landmark_line = landmark_line.split('\t')
-                landmarks_map[landmark_line[0]] = [int(point) for point in landmark_line[1:]]
-
-        test_annotations = self.prepare_annotation(self.pairs_file, True, landmarks_map)
-        if self.train_file:
-            train_annotations = self.prepare_annotation(self.train_file, True, landmarks_map)
-            test_annotations += train_annotations
-
-        return test_annotations, None
-
-    @staticmethod
-    def get_image_name(person, image_id):
-        image_path_pattern = '{}/{}_{}{}.jpg'
-        return image_path_pattern.format(person, person, '0' * (4 - len(image_id)), image_id)
-
-    def convert_positive(self, pairs, all_images):
-        positives = defaultdict(set)
-        for data in pairs:
-            image1 = self.get_image_name(data[0], data[1])
-            image2 = self.get_image_name(data[0], data[2])
-            positives[image1].add(image2)
-            all_images.add(image1)
-            all_images.add(image2)
-
-        return positives, all_images
-
-    def convert_negative(self, pairs, all_images):
-        negatives = defaultdict(set)
-        for data in pairs:
-            image1 = self.get_image_name(data[0], data[1])
-            image2 = self.get_image_name(data[2], data[3])
-            negatives[image1].add(image2)
-            all_images.add(image1)
-            all_images.add(image2)
-
-        return negatives, all_images
-
-    def prepare_annotation(self, ann_file: Path, train=False, landmarks_map=None):
-        positive_pairs, negative_pairs = [], []
-        ann_lines = read_txt(ann_file)
-        for line in ann_lines[1:]:  # skip header
-            pair = line.strip().split()
-            if len(pair) == 3:
-                positive_pairs.append(pair)
-            elif len(pair) == 4:
-                negative_pairs.append(pair)
-
-        all_images = set()
-        positive_data, all_images = self.convert_positive(positive_pairs, all_images)
-        negative_data, all_images = self.convert_negative(negative_pairs, all_images)
-
-        annotations = []
-        for image in all_images:
-            annotation = ReIdentificationClassificationAnnotation(image, positive_data[image], negative_data[image])
-
-            if landmarks_map:
-                image_landmarks = landmarks_map.get(image)
-                annotation.metadata['keypoints'] = image_landmarks
-
-            if train:
-                annotation.metadata['train'] = True
-
-            annotations.append(annotation)
-
-        return annotations
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/mapillary_20.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/mapillary_20.py
deleted file mode 100644 (file)
index a089b5a..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from pathlib import Path
-from ..config import PathField
-from ..representation import SegmentationAnnotation
-from ..representation.segmentation_representation import GTMaskLoader
-from ..utils import get_path
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class Mapillary20ConverterConfig(BaseFormatConverterConfig):
-    data_dir = PathField(is_directory=True, optional=True)
-    images_dir = PathField(optional=True, is_directory=True)
-    mask_dir = PathField(optional=True, is_directory=True)
-
-
-class Mapillary20Converters(BaseFormatConverter):
-    __provider__ = 'mapillary_20'
-
-    label_map = {
-        0: 'Road',
-        1: 'Sidewalk',
-        2: 'Building',
-        3: 'Wall',
-        4: 'Fence',
-        5: 'Pole',
-        6: 'Traffic Light',
-        7: 'Traffic Sign',
-        8: 'Vegetation',
-        9: 'Terrain',
-        10: 'Sky',
-        11: 'Person',
-        12: 'Rider',
-        13: 'Car',
-        14: 'Truck',
-        15: 'Bus',
-        16: 'Train',
-        17: 'Motorcycle',
-        18: 'Bicycle',
-        19: 'Ego-Vehicle'
-    }
-
-    _config_validator_type = Mapillary20ConverterConfig
-
-    def configure(self):
-        data_dir = self.config.get('data_dir')
-        image_folder = self.config.get('images_dir', 'imgs')
-        mask_folder = self.config.get('mask_dir', 'masks')
-        if data_dir:
-            image_folder = data_dir / image_folder
-            mask_folder = data_dir / mask_folder
-        self.images_dir = get_path(image_folder, is_directory=True)
-        self.mask_dir = get_path(mask_folder, is_directory=True)
-
-    def convert(self):
-        annotations = []
-        for file_in_dir in self.images_dir.iterdir():
-            annotation = SegmentationAnnotation(
-                str(Path(self.images_dir.name) / file_in_dir.name),
-                str(Path(self.mask_dir.name) / file_in_dir.name),
-                mask_loader=GTMaskLoader.PILLOW
-            )
-
-            annotations.append(annotation)
-
-        return annotations, {'label_map': self.label_map}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/market1501.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/market1501.py
deleted file mode 100644 (file)
index 3d45cc2..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from __future__ import absolute_import, print_function
-
-import re
-
-from ._reid_common import check_dirs, read_directory
-from .format_converter import DirectoryBasedAnnotationConverter
-
-MARKET_IMAGE_PATTERN = re.compile(r'([-\d]+)_c(\d)')
-
-
-class Market1501Converter(DirectoryBasedAnnotationConverter):
-    __provider__ = 'market1501'
-
-    def convert(self):
-        gallery = self.data_dir / 'bounding_box_test'
-        query = self.data_dir / 'query'
-
-        check_dirs((gallery, query), self.data_dir)
-        gallery_images, gallery_pids = read_directory(gallery, query=False, image_pattern=MARKET_IMAGE_PATTERN)
-        query_images, query_pids = read_directory(query, query=True, image_pattern=MARKET_IMAGE_PATTERN)
-        annotation = gallery_images + query_images
-
-        meta = {'num_identities': len(gallery_pids | query_pids)}
-
-        return annotation, meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/mars.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/mars.py
deleted file mode 100644 (file)
index bb8de49..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from __future__ import absolute_import, print_function
-
-import re
-
-from ._reid_common import check_dirs, read_directory
-from .format_converter import DirectoryBasedAnnotationConverter
-
-MARS_IMAGE_PATTERN = re.compile(r'([\d]+)C(\d)')
-
-
-class MARSConverter(DirectoryBasedAnnotationConverter):
-    __provider__ = 'mars'
-
-    def convert(self):
-        gallery = self.data_dir / 'bbox_test'
-        query = self.data_dir / 'query'
-
-        check_dirs((gallery, query), self.data_dir)
-        gallery_images, gallery_pids = read_directory(gallery, query=False, image_pattern=MARS_IMAGE_PATTERN)
-        query_images, query_pids = read_directory(query, query=True, image_pattern=MARS_IMAGE_PATTERN)
-
-        return gallery_images + query_images, {'num_identities': len(gallery_pids | query_pids)}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/mighty.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/mighty.py
deleted file mode 100644 (file)
index c0ae9f2..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..representation import SegmentationAnnotation
-from ..representation.segmentation_representation import GTMaskLoader
-from ..utils import read_txt
-from .format_converter import FileBasedAnnotationConverter
-
-
-class MightyFormatConverter(FileBasedAnnotationConverter):
-    __provider__ = 'mighty'
-
-    label_map = {0: 'BG', 1: 'road', 2: 'curbs', 3: 'marks'}
-
-    def convert(self):
-        annotations = []
-        for line in read_txt(self.annotation_file):
-            identifier, mask = line.split()
-            annotations.append(SegmentationAnnotation(identifier, mask, mask_loader=GTMaskLoader.PILLOW))
-
-        return annotations, {'label_map': self.label_map}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/ms_coco.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/ms_coco.py
deleted file mode 100644 (file)
index f1e41be..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from tqdm import tqdm
-import numpy as np
-
-from ..config import BoolField
-from ..utils import read_json, convert_bboxes_xywh_to_x1y1x2y2
-from ..representation import DetectionAnnotation, PoseEstimationAnnotation
-from .format_converter import BaseFormatConverter, FileBasedAnnotationConverter, FileBasedAnnotationConverterConfig
-
-
-def get_image_annotation(image_id, annotations_):
-    return list(filter(lambda x: x['image_id'] == image_id, annotations_))
-
-
-def get_label_map(full_annotation, use_full_label_map=False, has_background=False):
-    labels = full_annotation['categories']
-
-    if not use_full_label_map:
-        label_offset = 1 if has_background else 0
-        label_id_to_label = {label['id']: label_id + label_offset for label_id, label in enumerate(labels)}
-        label_map = {label_id + label_offset: label['name'] for label_id, label in enumerate(labels)}
-    else:
-        label_id_to_label = {label['id']: label['id'] for label in labels}
-        label_map = {label['id']: label['name'] for label in labels}
-
-    return label_map, label_id_to_label
-
-
-class MSCocoDetectionConverterConfig(FileBasedAnnotationConverterConfig):
-    has_background = BoolField(optional=True)
-    use_full_label_map = BoolField(optional=True)
-
-
-class MSCocoDetectionConverter(BaseFormatConverter):
-    __provider__ = 'mscoco_detection'
-
-    _config_validator_type = MSCocoDetectionConverterConfig
-
-    def configure(self):
-        self.annotation_file = self.config['annotation_file']
-        self.has_background = self.config.get('has_background', False)
-        self.use_full_label_map = self.config.get('use_full_label_map', False)
-
-    def convert(self):
-        detection_annotations = []
-        full_annotation = read_json(self.annotation_file)
-        image_info = full_annotation['images']
-        annotations = full_annotation['annotations']
-
-        label_map, label_id_to_label = get_label_map(full_annotation, self.use_full_label_map, self.has_background)
-
-        meta = {}
-        if self.has_background:
-            label_map[0] = 'background'
-            meta['background_label'] = 0
-
-        meta.update({'label_map': label_map})
-
-        for image in tqdm(image_info):
-            identifier = image['file_name']
-            image_annotation = get_image_annotation(image['id'], annotations)
-            image_labels = [label_id_to_label[annotation['category_id']] for annotation in image_annotation]
-            xmins = [annotation['bbox'][0] for annotation in image_annotation]
-            ymins = [annotation['bbox'][1] for annotation in image_annotation]
-            widths = [annotation['bbox'][2] for annotation in image_annotation]
-            heights = [annotation['bbox'][3] for annotation in image_annotation]
-            xmaxs = np.add(xmins, widths)
-            ymaxs = np.add(ymins, heights)
-            is_crowd = [annotation['iscrowd'] for annotation in image_annotation]
-            detection_annotation = DetectionAnnotation(identifier, image_labels, xmins, ymins, xmaxs, ymaxs)
-            detection_annotation.metadata['iscrowd'] = is_crowd
-            detection_annotations.append(detection_annotation)
-
-        return detection_annotations, meta
-
-
-class MSCocoKeypointsConverter(FileBasedAnnotationConverter):
-    __provider__ = 'mscoco_keypoints'
-
-    def convert(self):
-        keypoints_annotations = []
-
-        full_annotation = read_json(self.annotation_file)
-        image_info = full_annotation['images']
-        annotations = full_annotation['annotations']
-        label_map, _ = get_label_map(full_annotation, True)
-        for image in image_info:
-            identifier = image['file_name']
-            image_annotation = get_image_annotation(image['id'], annotations)
-            if not image_annotation:
-                continue
-            x_vals, y_vals, visibility, labels, areas, is_crowd, bboxes, difficult = [], [], [], [], [], [], [], []
-            for target in image_annotation:
-                if target['num_keypoints'] == 0:
-                    difficult.append(len(x_vals))
-                labels.append(target['category_id'])
-                keypoints = target['keypoints']
-                x_vals.append(keypoints[::3])
-                y_vals.append(keypoints[1::3])
-                visibility.append(keypoints[2::3])
-                areas.append(target['area'])
-                bboxes.append(convert_bboxes_xywh_to_x1y1x2y2(*target['bbox']))
-                is_crowd.append(target['iscrowd'])
-            keypoints_annotation = PoseEstimationAnnotation(
-                identifier, np.array(x_vals), np.array(y_vals), np.array(visibility), np.array(labels)
-            )
-            keypoints_annotation.metadata['areas'] = areas
-            keypoints_annotation.metadata['rects'] = bboxes
-            keypoints_annotation.metadata['iscrowd'] = is_crowd
-            keypoints_annotation.metadata['difficult_boxes'] = difficult
-
-            keypoints_annotations.append(keypoints_annotation)
-
-        return keypoints_annotations, {'label_map': label_map}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/ncf_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/ncf_converter.py
deleted file mode 100644 (file)
index 86d4cb1..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-
-from ..representation import HitRatioAnnotation
-from ..utils import read_txt, get_path
-from ..config import PathField, NumberField
-
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class NCFDatasetConverterConfig(BaseFormatConverterConfig):
-    raiting_file = PathField()
-    negative_file = PathField()
-    users_max_number = NumberField(optional=True)
-
-
-class NCFConverter(BaseFormatConverter):
-    __provider__ = "ncf_converter"
-
-    _config_validator_type = NCFDatasetConverterConfig
-
-    def configure(self):
-        self.raiting_file = self.config['raiting_file']
-        self.negative_file = self.config['negative_file']
-        if 'users_max_number' in self.config:
-            self.users_max_number = self.config['users_max_number']
-        else:
-            self.users_max_number = -1
-
-    def convert(self):
-        annotations = []
-        users = []
-
-        for file_row in read_txt(self.raiting_file):
-            user_id, item_id, _ = file_row.split()
-            users.append(user_id)
-            identifier = ['u:'+user_id, 'i:' + item_id]
-            annotations.append(HitRatioAnnotation(identifier))
-            if self.users_max_number > 0 and len(users) >= self.users_max_number:
-                break;
-
-        item_numbers = 1
-
-        items_neg = []
-        with get_path(self.negative_file).open() as content:
-            for file_row in content:
-                items = file_row.split()
-                items_neg.append(items)
-                if self.users_max_number > 0 and len(items_neg) >= self.users_max_number:
-                    break;
-
-        if items_neg:
-            iterations = len(items_neg[0])
-            item_numbers += iterations
-            for i in range(iterations):
-                for user in users:
-                    item = items_neg[int(user)][i]
-                    identifier = ['u:' + user, 'i:' + item]
-                    annotations.append(HitRatioAnnotation(identifier, False))
-
-        return annotations, {'users_number': len(users), 'item_numbers': item_numbers}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py
deleted file mode 100644 (file)
index b30a72a..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from tqdm import tqdm
-from pathlib import Path
-
-from ..config import PathField, BoolField
-from ..representation import DetectionAnnotation, SegmentationAnnotation
-from ..representation.segmentation_representation import GTMaskLoader
-from ..utils import get_path, read_txt, read_xml
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-_VOC_CLASSES_DETECTION = (
-    'aeroplane', 'bicycle', 'bird', 'boat',
-    'bottle', 'bus', 'car', 'cat', 'chair',
-    'cow', 'diningtable', 'dog', 'horse',
-    'motorbike', 'person', 'pottedplant',
-    'sheep', 'sofa', 'train', 'tvmonitor'
-)
-
-_VOC_CLASSES_SEGMENTATION = tuple(['__background__']) + _VOC_CLASSES_DETECTION
-_SEGMENTATION_COLORS = ((
-    (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0),
-    (0, 0, 128), (128, 0, 128), (0, 128, 128), (128, 128, 128),
-    (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0),
-    (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
-    (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0),
-    (0, 64, 128)
-))
-
-
-def prepare_detection_labels(has_background=True):
-    num_classes = len(_VOC_CLASSES_DETECTION)
-    labels_shift = 1 if has_background else 0
-    reversed_label_map = dict(zip(_VOC_CLASSES_DETECTION, list(range(labels_shift, num_classes + labels_shift))))
-    if has_background:
-        reversed_label_map['__background__'] = 0
-
-    return reversed_label_map
-
-
-def reverse_label_map(label_map):
-    return {value: key for key, value in label_map.items()}
-
-
-class PascalVOCSegmentationConverterConfig(BaseFormatConverterConfig):
-    image_set_file = PathField()
-    images_dir = PathField(optional=True, is_directory=True)
-    mask_dir = PathField(optional=True, is_directory=True)
-
-
-class PascalVOCSegmentationConverter(BaseFormatConverter):
-    __provider__ = 'voc_segmentation'
-
-    _config_validator_type = PascalVOCSegmentationConverterConfig
-
-    def configure(self):
-        self.image_set_file = self.config['image_set_file']
-        self.image_dir = self.config.get('images_dir')
-        if not self.image_dir:
-            self.image_dir = get_path(self.image_set_file.parents[-2] / 'JPEGImages', is_directory=True)
-
-        self.mask_dir = self.config.get('mask_dir')
-        if not self.mask_dir:
-            self.mask_dir = get_path(self.image_set_file.parents[-2] / 'SegmentationClass', is_directory=True)
-
-    def convert(self):
-
-        annotations = []
-        for image in read_txt(self.image_set_file):
-            annotation = SegmentationAnnotation(
-                str(Path(self.image_dir.name) / '{}.jpg'.format(image)),
-                str(Path(self.mask_dir.name) / '{}.png'.format(image)),
-                mask_loader=GTMaskLoader.SCIPY
-            )
-
-            annotations.append(annotation)
-
-        meta = {
-            'label_map': dict(enumerate(_VOC_CLASSES_SEGMENTATION)),
-            'background_label': 0,
-            'segmentation_colors': _SEGMENTATION_COLORS
-        }
-
-        return annotations, meta
-
-
-class PascalVOCDetectionConverterConfig(BaseFormatConverterConfig):
-    image_set_file = PathField()
-    annotations_dir = PathField(is_directory=True)
-    images_dir = PathField(optional=True, is_directory=True)
-    has_background = BoolField(optional=True)
-
-
-class PascalVOCDetectionConverter(BaseFormatConverter):
-    __provider__ = 'voc07'
-
-    _config_validator_type = PascalVOCDetectionConverterConfig
-
-    def configure(self):
-        self.image_set_file = self.config['image_set_file']
-        self.image_dir = self.config.get('images_dir')
-        if not self.image_dir:
-            self.image_dir = get_path(self.image_set_file.parents[-2] / 'JPEGImages')
-        self.annotations_dir = self.config['annotations_dir']
-        self.has_background = self.config.get('has_background', True)
-
-    def convert(self):
-        class_to_ind = prepare_detection_labels(self.has_background)
-
-        detections = []
-        for image in tqdm(read_txt(self.image_set_file, sep=None)):
-            root = read_xml(self.annotations_dir / '{}.xml'.format(image))
-
-            identifier = root.find('.//filename').text
-            get_path(self.image_dir / identifier)
-
-            labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
-            difficult_indices = []
-            for entry in root:
-                if not entry.tag.startswith('object'):
-                    continue
-
-                bbox = entry.find('bndbox')
-                difficult = int(entry.find('difficult').text)
-
-                if difficult == 1:
-                    difficult_indices.append(len(labels))
-
-                labels.append(class_to_ind[entry.find('name').text])
-                x_mins.append(float(bbox.find('xmin').text) - 1)
-                y_mins.append(float(bbox.find('ymin').text) - 1)
-                x_maxs.append(float(bbox.find('xmax').text) - 1)
-                y_maxs.append(float(bbox.find('ymax').text) - 1)
-
-            image_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
-            image_annotation.metadata['difficult_boxes'] = difficult_indices
-
-            detections.append(image_annotation)
-
-        meta = {'label_map': reverse_label_map(class_to_ind)}
-        if self.has_background:
-            meta['background_label'] = 0
-
-        return detections, meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/sample_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/sample_converter.py
deleted file mode 100644 (file)
index 88fb713..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import re
-
-from ..config import PathField
-from ..representation import ClassificationAnnotation
-from ..utils import get_path, read_txt
-
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class SampleConverterConfig(BaseFormatConverterConfig):
-    data_dir = PathField(is_directory=True)
-
-
-class SampleConverter(BaseFormatConverter):
-    """
-    Sample dataset converter. All annotation converters should be derived from BaseFormatConverter class.
-    """
-
-    # register name for this converter
-    # this name will be used for converter class look up
-    __provider__ = 'sample'
-
-    _config_validator_type = SampleConverterConfig
-
-    def configure(self):
-        self.data_dir = self.config['data_dir']
-
-    def convert(self):
-        """
-        This method is executed automatically when convert.py is started.
-        All arguments are automatically forwarded from command line arguments.
-
-        Returns:
-            annotations: list of annotation representation objects.
-            meta: dictionary with additional dataset level metadata.
-        """
-
-        dataset_directory = get_path(self.data_dir, is_directory=True)
-
-        # read and convert annotation
-        labels = self._read_labels(dataset_directory / 'labels.txt')
-        annotations = self._convert_annotations(dataset_directory / 'test', labels)
-
-        # convert label list to label map
-        label_map = {i: labels[i] for i in range(len(labels))}
-        metadata = {'label_map': label_map}
-
-        return annotations, metadata
-
-    @staticmethod
-    def _read_labels(labels_file):
-        """
-        Extract label names from labels.txt file.
-        """
-
-        return read_txt(labels_file)
-
-    @staticmethod
-    def _convert_annotations(test_dir, labels):
-        """
-        Create annotation representations list.
-        """
-
-        # test directory contains files with names XXXX_class.png
-        # we use regular expression to extract class names
-        file_pattern_regex = re.compile(r'\d+_(\w+)\.png')
-
-        annotations = []
-        # iterate over all png images in test directory
-        for image in test_dir.glob('*.png'):
-            # get file name (e.g. from /foo/bar/image.png we get image.png)
-            image_base = str(image.parts[-1])
-
-            # extract class name from file name
-            regex_match = re.match(file_pattern_regex, image_base)
-            image_label = regex_match.group(1)
-
-            # look up class index in label list
-            class_id = labels.index(image_label)
-
-            # create annotation representation object
-            annotations.append(ClassificationAnnotation(image_base, class_id))
-
-        return annotations
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/super_resolution_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/super_resolution_converter.py
deleted file mode 100644 (file)
index 4c053f9..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from ..config import PathField, StringField, BoolField
-from ..representation import SuperResolutionAnnotation
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class SRConverterConfig(BaseFormatConverterConfig):
-    data_dir = PathField(is_directory=True)
-    lr_suffix = StringField(optional=True)
-    hr_suffix = StringField(optional=True)
-    two_streams = BoolField(optional=True)
-
-
-class SRConverter(BaseFormatConverter):
-    __provider__ = 'super_resolution'
-
-    _config_validator_type = SRConverterConfig
-
-    def configure(self):
-        self.data_dir = self.config['data_dir']
-        self.lr_suffix = self.config.get('lr_suffix', 'lr')
-        self.hr_suffix = self.config.get('hr_suffix', 'hr')
-        self.two_streams = self.config.get('two_streams', False)
-
-    def convert(self):
-        file_list_lr = []
-        for file_in_dir in self.data_dir.iterdir():
-            if self.lr_suffix in file_in_dir.parts[-1]:
-                file_list_lr.append(file_in_dir)
-
-        annotation = []
-        for lr_file in file_list_lr:
-            lr_file_name = lr_file.parts[-1]
-            hr_file_name = self.hr_suffix.join(lr_file_name.split(self.lr_suffix))
-            identifier = [lr_file_name, hr_file_name] if self.two_streams else lr_file_name
-            annotation.append(SuperResolutionAnnotation(identifier, hr_file_name))
-
-        return annotation, None
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/vgg_face_regression.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/vgg_face_regression.py
deleted file mode 100644 (file)
index 53c7c57..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..config import PathField
-from ..representation import FacialLandmarksAnnotation
-from ..utils import convert_bboxes_xywh_to_x1y1x2y2, read_csv
-from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
-
-
-class LandmarksRegressionConfig(BaseFormatConverterConfig):
-    landmarks_csv_file = PathField()
-    bbox_csv_file = PathField(optional=True)
-
-
-class LandmarksRegression(BaseFormatConverter):
-    __provider__ = 'landmarks_regression'
-
-    _config_validator_type = LandmarksRegressionConfig
-
-    def configure(self):
-        self.landmarks_csv = self.config['landmarks_csv_file']
-        self.bbox_csv = self.config.get('bbox_csv_file')
-
-    def convert(self):
-        annotations = []
-        for row in read_csv(self.landmarks_csv):
-            identifier = row['NAME_ID'] + '.jpg'
-            x_values = np.array(
-                [float(row["P1X"]), float(row["P2X"]), float(row["P3X"]), float(row["P4X"]), float(row["P5X"])]
-            )
-            y_values = np.array(
-                [float(row["P1Y"]), float(row["P2Y"]), float(row["P3Y"]), float(row["P4Y"]), float(row["P5Y"])]
-            )
-
-            annotation = FacialLandmarksAnnotation(identifier, x_values, y_values)
-            annotation.metadata['left_eye'] = 0
-            annotation.metadata['right_eye'] = 1
-            annotations.append(annotation)
-
-        if self.bbox_csv:
-            for index, row in enumerate(read_csv(self.bbox_csv)):
-                annotations[index].metadata['rect'] = convert_bboxes_xywh_to_x1y1x2y2(
-                    int(row["X"]), int(row["Y"]), int(row["W"]), int(row["H"])
-                )
-
-        meta = {
-            'label_map': {0: 'Left Eye', 1: 'Right Eye', 2: 'Nose', 3: 'Left Mouth Corner', 4: 'Right Mouth Corner'}
-        }
-        return annotations, meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/wider.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/wider.py
deleted file mode 100644 (file)
index 672aa3a..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..config import NumberField
-from ..representation import DetectionAnnotation
-from ..utils import convert_bboxes_xywh_to_x1y1x2y2, read_txt
-
-from .format_converter import BaseFormatConverter, FileBasedAnnotationConverterConfig
-
-
-class WiderConverterConfig(FileBasedAnnotationConverterConfig):
-    label_start = NumberField(floats=False, optional=True)
-
-
-class WiderFormatConverter(BaseFormatConverter):
-    __provider__ = 'wider'
-
-    _config_validator_type = WiderConverterConfig
-
-    def configure(self):
-        self.annotation_file = self.config['annotation_file']
-        self.label_start = self.config.get('label_start', 1)
-
-    def convert(self):
-        image_annotations = read_txt(self.annotation_file)
-        image_ids = []
-        for image_id, line in enumerate(image_annotations):
-            if '.jpg' in line:
-                image_ids.append(image_id)
-
-        annotations = []
-        for image_id in image_ids:
-            identifier = image_annotations[image_id]
-            bbox_count = image_annotations[image_id + 1]
-            bbox_lines = image_annotations[image_id + 2:image_id + 2 + int(bbox_count)]
-
-            x_mins, y_mins, x_maxs, y_maxs = [], [], [], []
-            for bbox in bbox_lines:
-                x_min, y_min, x_max, y_max = convert_bboxes_xywh_to_x1y1x2y2(*(map(float, (bbox.split(' ')[0:4]))))
-                x_mins.append(x_min)
-                y_mins.append(y_min)
-                x_maxs.append(x_max)
-                y_maxs.append(y_max)
-
-            annotations.append(DetectionAnnotation(
-                identifier, [self.label_start] * len(x_mins),
-                x_mins, y_mins, x_maxs, y_maxs
-            ))
-
-        return annotations, {'label_map': {0: '__background__', self.label_start: 'face'}, 'background_label': 0}
diff --git a/tools/accuracy_checker/accuracy_checker/config/__init__.py b/tools/accuracy_checker/accuracy_checker/config/__init__.py
deleted file mode 100644 (file)
index a32b29a..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .config_validator import (
-    BaseField,
-    StringField,
-    ListField,
-    BoolField,
-    PathField,
-    NumberField,
-    DictField,
-
-    BaseValidator,
-    ConfigError,
-    ConfigValidator
-)
-
-
-from .config_reader import ConfigReader
-
-__all__ = [
-    'BaseField',
-    'StringField',
-    'ListField',
-    'BoolField',
-    'PathField',
-    'NumberField',
-    'DictField',
-
-    'BaseValidator',
-    'ConfigError',
-    'ConfigValidator',
-
-    'ConfigReader'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/config/config_reader.py b/tools/accuracy_checker/accuracy_checker/config/config_reader.py
deleted file mode 100644 (file)
index a37686f..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import copy
-from pathlib import Path
-
-import warnings
-
-from ..utils import read_yaml, to_lower_register, contains_any
-from .config_validator import ConfigError
-
-
-class ConfigReader:
-    """
-    Class for parsing input config.
-    """
-
-    @staticmethod
-    def merge(arguments):
-        """
-        Args:
-            arguments: command-line arguments.
-        Returns:
-            dictionary containing configuration.
-        """
-
-        global_config, local_config = ConfigReader._read_configs(arguments)
-        if not local_config:
-            raise ConfigError('Missing local config')
-
-        mode = ConfigReader._check_local_config(local_config)
-        ConfigReader._prepare_global_configs(global_config)
-
-        config = ConfigReader._merge_configs(global_config, local_config, mode)
-
-        ConfigReader._provide_cmd_arguments(arguments, config, mode)
-        ConfigReader._merge_paths_with_prefixes(arguments, config, mode)
-        ConfigReader._filter_launchers(config, arguments, mode)
-
-        return config, mode
-
-    @staticmethod
-    def _read_configs(arguments):
-        global_config = read_yaml(arguments.definitions) if arguments.definitions else None
-        local_config = read_yaml(arguments.config)
-
-        return global_config, local_config
-
-    @staticmethod
-    def _check_local_config(config):
-        def _is_requirements_missed(target, requirements):
-            return list(filter(lambda entry: not target.get(entry), requirements))
-
-        def _check_models_config(config):
-            models = config.get('models')
-            if not models:
-                raise ConfigError('Missed "{}" in local config'.format('models'))
-
-            required_model_entries = ['name', 'launchers', 'datasets']
-            required_dataset_entries = ['name']
-            required_dataset_error = 'Model {} must specify {} for each dataset'
-            for model in models:
-                if _is_requirements_missed(model, required_model_entries):
-                    raise ConfigError('Each model must specify {}'.format(', '.join(required_model_entries)))
-
-                if list(filter(lambda entry: _is_requirements_missed(entry, required_dataset_entries),
-                               model['datasets'])):
-                    raise ConfigError(required_dataset_error.format(model['name'], ', '.join(required_dataset_entries)))
-
-        def _check_pipelines_config(config):
-            def _count_entry(stages, entry):
-                count = 0
-                for stage in stages:
-                    if entry in stage:
-                        count += 1
-                return count
-            required_pipeline_entries = ['name', 'device_info', 'stages']
-            pipelines = config['pipelines']
-            if not pipelines:
-                raise ConfigError('Missed "{}" in local config'.format('pipelines'))
-            for pipeline in pipelines:
-                if _is_requirements_missed(pipeline, required_pipeline_entries):
-                    raise ConfigError('Each pipeline must specify {}'.format(', '.join(required_pipeline_entries)))
-                stages = pipeline['stages']
-                first_stage = stages[0]
-                dataset = first_stage.get('dataset')
-                if not dataset:
-                    raise ConfigError('First stage should contain dataset')
-                count_datasets = _count_entry(stages, 'dataset')
-                if count_datasets != 1:
-                    raise ConfigError('Exactly one dataset per pipeline is supported')
-                count_launchers = _count_entry(stages, 'launcher')
-                if not count_launchers:
-                    raise ConfigError('Launchers are not specified')
-                count_metrics = _count_entry(stages, 'metrics')
-                if not count_metrics:
-                    raise ConfigError('Metrics are not specified')
-
-        if 'pipelines' in config:
-            _check_pipelines_config(config)
-            return 'pipelines'
-
-        _check_models_config(config)
-        return 'models'
-
-    @staticmethod
-    def _prepare_global_configs(global_configs):
-        if not global_configs or 'datasets' not in global_configs:
-            return
-
-        datasets = global_configs['datasets']
-
-        def merge(local_entries, global_entries, identifier):
-            if not local_entries or not global_entries:
-                return
-
-            for i, local in enumerate(local_entries):
-                local_identifier = local.get(identifier)
-                if not local_identifier:
-                    continue
-
-                local_entries[i] = ConfigReader._merge_configs_by_identifier(global_entries, local, identifier)
-
-        for dataset in datasets:
-            merge(dataset.get('preprocessing'), global_configs.get('preprocessing'), 'type')
-            merge(dataset.get('metrics'), global_configs.get('metrics'), 'type')
-            merge(dataset.get('postprocessing'), global_configs.get('postprocessing'), 'type')
-
-    @staticmethod
-    def _merge_configs(global_configs, local_config, mode='models'):
-        def _merge_models_config(global_configs, local_config):
-            config = copy.deepcopy(local_config)
-            if not global_configs:
-                return config
-
-            models = config.get('models')
-            for model in models:
-                for i, launcher_entry in enumerate(model['launchers']):
-                    model['launchers'][i] = ConfigReader._merge_configs_by_identifier(
-                        global_configs['launchers'], launcher_entry, 'framework'
-                    )
-
-                for i, dataset in enumerate(model['datasets']):
-                    model['datasets'][i] = ConfigReader._merge_configs_by_identifier(
-                        global_configs['datasets'], dataset, 'name'
-                    )
-
-            return config
-
-        def _merge_pipelines_config(global_config, local_config):
-            config = copy.deepcopy(local_config)
-            pipelines = []
-            raw_pipelines = local_config['pipelines']
-            for pipeline in raw_pipelines:
-                device_infos = pipeline['device_info']
-                per_device_pipelines = []
-                for device_info in device_infos:
-                    copy_pipeline = copy.deepcopy(pipeline)
-                    for stage in copy_pipeline['stages']:
-                        if 'launcher' in stage:
-                            stage['launcher'].update(device_info)
-                    per_device_pipelines.append(copy_pipeline)
-                pipelines.extend(per_device_pipelines)
-            config['pipelines'] = pipelines
-
-            return config
-
-        functors_by_mode = {
-            'models': _merge_models_config,
-            'pipelines': _merge_pipelines_config
-        }
-
-        return functors_by_mode[mode](global_configs, local_config)
-
-    @staticmethod
-    def _merge_configs_by_identifier(global_config, local_config, identifier):
-        local_identifier = local_config.get(identifier)
-        if local_identifier is None:
-            return local_config
-
-        matched = []
-        for config in global_config:
-            global_identifier = config.get(identifier)
-            if global_identifier is None:
-                continue
-
-            if global_identifier != local_identifier:
-                continue
-
-            matched.append(config)
-
-        config = copy.deepcopy(matched[0] if matched else {})
-        for key, value in local_config.items():
-            config[key] = value
-
-        return config
-
-    @staticmethod
-    def _merge_paths_with_prefixes(arguments, config, mode='models'):
-        args = arguments if isinstance(arguments, dict) else vars(arguments)
-        entries_paths = {
-            'launchers': {
-                'model': 'models',
-                'weights': 'models',
-                'caffe_model': 'models',
-                'caffe_weights': 'models',
-                'tf_model': 'models',
-                'tf_meta': 'models',
-                'mxnet_weights': 'models',
-                'onnx_model': 'models',
-                'kaldi_model': 'models',
-                'cpu_extensions': 'extensions',
-                'gpu_extensions': 'extensions',
-                'bitstream': 'bitstreams',
-                'affinity_map' : 'affinity_map'
-            },
-            'datasets': {
-                'segmentation_masks_source': 'source',
-                'annotation': 'annotations',
-                'dataset_meta': 'annotations',
-                'data_source': 'source',
-            },
-        }
-
-        def merge_entry_paths(keys, value):
-            for field, argument in keys.items():
-                if field not in value:
-                    continue
-
-                config_path = Path(value[field])
-                if config_path.is_absolute():
-                    value[field] = Path(value[field])
-                    continue
-
-                if not argument in args or not args[argument]:
-                    continue
-
-                value[field] = args[argument] / config_path
-
-        def create_command_line_for_conversion(config):
-            mapping = {}
-            value = 'source'
-            for key in config:
-                if key.endswith('file') or key.endswith('dir'):
-                    mapping[key] = value
-            return mapping
-
-        def process_config(config_item, entries_paths, dataset_identifier='datasets', identifers_mapping=None):
-            for entry, command_line_arg in entries_paths.items():
-                entry_id = entry if not identifers_mapping else identifers_mapping[entry]
-                if entry_id not in config_item:
-                    continue
-
-                if entry_id == dataset_identifier:
-                    datasets_configs = config_item[entry_id]
-                    if not isinstance(datasets_configs, list):
-                        datasets_configs = [datasets_configs]
-                    for datasets_config in datasets_configs:
-                        annotation_conversion_config = datasets_config.get('annotation_conversion')
-                        if annotation_conversion_config:
-                            command_line_conversion = (create_command_line_for_conversion(annotation_conversion_config))
-                            merge_entry_paths(command_line_conversion, annotation_conversion_config)
-
-                config_entires = config_item[entry_id]
-                if not isinstance(config_entires, list):
-                    config_entires = [config_entires]
-                for config_entry in config_entires:
-                    merge_entry_paths(command_line_arg, config_entry)
-
-        def process_models(config, entries_paths):
-            for model in config['models']:
-                process_config(model, entries_paths)
-
-        def process_pipelines(config, entries_paths):
-            identifiers_mapping = {'datasets': 'dataset', 'launchers': 'launcher', 'reader': 'reader'}
-            entries_paths.update({'reader': {'data_source': 'source'}})
-            for pipeline in config['pipelines']:
-                for stage in pipeline['stages']:
-                    process_config(stage, entries_paths, 'dataset', identifiers_mapping)
-
-        functors_by_mode = {
-            'models': process_models,
-            'pipelines': process_pipelines
-        }
-
-        processing_func = functors_by_mode[mode]
-        processing_func(config, entries_paths)
-
-    @staticmethod
-    def _provide_cmd_arguments(arguments, config, mode):
-        def merge_converted_model_path(converted_models_dir, mo_output_dir):
-            if mo_output_dir:
-                mo_output_dir = Path(mo_output_dir)
-                if mo_output_dir.is_absolute():
-                    return mo_output_dir
-                return converted_models_dir / mo_output_dir
-            return converted_models_dir
-
-        def merge_dlsdk_launcher_args(arguments, launcher_entry, update_launcher_entry):
-            if launcher_entry['framework'].lower() != 'dlsdk':
-                return launcher_entry
-
-            launcher_entry.update(update_launcher_entry)
-            models_prefix = arguments.models
-            if models_prefix:
-                launcher_entry['_models_prefix'] = models_prefix
-
-            if not arguments.converted_models:
-                return launcher_entry
-
-            mo_params = launcher_entry.get('mo_params', {})
-
-            mo_params.update({
-                'output_dir': merge_converted_model_path(arguments.converted_models, mo_params.get('output_dir'))
-            })
-
-            launcher_entry['mo_params'] = mo_params
-
-            if arguments.aocl:
-                launcher_entry['_aocl'] = arguments.aocl
-
-            return launcher_entry
-
-        def merge_models(config, arguments, update_launcher_entry):
-            for model in config['models']:
-                for launcher_entry in model['launchers']:
-                    merge_dlsdk_launcher_args(arguments, launcher_entry, update_launcher_entry)
-
-        def merge_pipelines(config, arguments, update_launcher_entry):
-            for pipeline in config['pipelines']:
-                for stage in pipeline['stages']:
-                    if 'launcher' in stage:
-                        merge_dlsdk_launcher_args(arguments, stage['launcher'], update_launcher_entry)
-        functors_by_mode = {
-            'models': merge_models,
-            'pipelines': merge_pipelines
-        }
-
-        additional_keys = [
-            'model_optimizer', 'tf_custom_op_config_dir',
-            'tf_obj_detection_api_pipeline_config_path',
-            'cpu_extensions_mode', 'vpu_log_level'
-        ]
-        arguments_dict = arguments if isinstance(arguments, dict) else vars(arguments)
-        update_launcher_entry = {}
-
-        for key in additional_keys:
-            value = arguments_dict.get(key)
-            if value:
-                update_launcher_entry['_{}'.format(key)] = value
-
-        return functors_by_mode[mode](config, arguments, update_launcher_entry)
-
-    @staticmethod
-    def _filter_launchers(config, arguments, mode='models'):
-        def filtered(launcher, targets):
-            target_tags = args.get('target_tags') or []
-            if target_tags:
-                if not contains_any(target_tags, launcher.get('tags', [])):
-                    return True
-
-            config_framework = launcher['framework'].lower()
-            target_framework = (args.get('target_framework') or config_framework).lower()
-            if config_framework != target_framework:
-                return True
-
-            return targets and launcher.get('device', '').lower() not in targets
-
-        def filter_models(config, target_devices):
-            for model in config['models']:
-                launchers = model['launchers']
-                launchers = [launcher for launcher in launchers if not filtered(launcher, target_devices)]
-
-                if not launchers:
-                    warnings.warn('Model "{}" has no launchers'.format(model['name']))
-
-                model['launchers'] = launchers
-
-        def filter_pipelines(config, target_devices):
-            saved_pipelines = []
-            for pipeline in config['pipelines']:
-                filtered_pipeline = False
-                for stage in pipeline:
-                    if 'launcher' in stage:
-                        if filtered(stage['launcher'], target_devices):
-                            filtered_pipeline = True
-                        break
-                if filtered_pipeline:
-                    continue
-                saved_pipelines.append(pipeline)
-            config['pipelines'] = saved_pipelines
-
-        functors_by_mode = {
-            'models': filter_models,
-            'pipelines': filter_pipelines
-        }
-
-        args = arguments if isinstance(arguments, dict) else vars(arguments)
-        target_devices = to_lower_register(args.get('target_devices') or [])
-        filtering_mode = functors_by_mode[mode]
-        filtering_mode(config, target_devices)
diff --git a/tools/accuracy_checker/accuracy_checker/config/config_validator.py b/tools/accuracy_checker/accuracy_checker/config/config_validator.py
deleted file mode 100644 (file)
index 5853a5f..0000000
+++ /dev/null
@@ -1,341 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import enum
-import math
-import re
-import warnings
-from collections import OrderedDict
-from copy import copy
-from functools import partial
-from pathlib import Path
-
-from ..utils import get_path, string_to_bool
-
-
-class ConfigError(ValueError):
-    pass
-
-
-class BaseValidator:
-    def __init__(self, on_error=None, additional_validator=None):
-        self.on_error = on_error
-        self.additional_validator = additional_validator
-
-        self.field_uri = None
-
-    def validate(self, entry, field_uri=None):
-        field_uri = field_uri or self.field_uri
-        if self.additional_validator and not self.additional_validator(entry, field_uri):
-            self.raise_error(entry, field_uri)
-
-    def raise_error(self, value, field_uri, reason=None):
-        if self.on_error:
-            self.on_error(value, field_uri, reason)
-
-        error_message = 'Invalid value "{value}" for {field_uri}'.format(value=value, field_uri=field_uri)
-        if reason:
-            error_message = '{error_message}: {reason}'.format(error_message=error_message, reason=reason)
-
-        raise ConfigError(error_message.format(value, field_uri))
-
-
-class _ExtraArgumentBehaviour(enum.Enum):
-    WARN = 'warn'
-    IGNORE = 'ignore'
-    ERROR = 'error'
-
-
-def _is_dict_like(entry):
-    return hasattr(entry, '__iter__') and hasattr(entry, '__getitem__')
-
-
-class ConfigValidator(BaseValidator):
-    WARN_ON_EXTRA_ARGUMENT = _ExtraArgumentBehaviour.WARN
-    ERROR_ON_EXTRA_ARGUMENT = _ExtraArgumentBehaviour.ERROR
-    IGNORE_ON_EXTRA_ARGUMENT = _ExtraArgumentBehaviour.IGNORE
-
-    def __init__(self, config_uri, on_extra_argument=WARN_ON_EXTRA_ARGUMENT, **kwargs):
-        super().__init__(**kwargs)
-        self.on_extra_argument = on_extra_argument
-
-        self.fields = OrderedDict()
-        self.field_uri = config_uri
-        for name in dir(self):
-            value = getattr(self, name)
-            if not isinstance(value, BaseField):
-                continue
-
-            field_copy = copy(value)
-            field_copy.field_uri = "{}.{}".format(config_uri, name)
-            self.fields[name] = field_copy
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        field_uri = field_uri or self.field_uri
-        if not _is_dict_like(entry):
-            raise ConfigError("{} is expected to be dict-like".format(field_uri))
-
-        extra_arguments = []
-        for key in entry:
-            if key not in self.fields:
-                extra_arguments.append(key)
-                continue
-
-            self.fields[key].validate(entry[key])
-
-        required_fields = set(name for name, value in self.fields.items() if not value.optional)
-        missing_arguments = required_fields.difference(entry)
-
-        if missing_arguments:
-            arguments = ', '.join(map(str, missing_arguments))
-            self.raise_error(
-                entry, field_uri, "Invalid config for {}: missing required fields: {}".format(field_uri, arguments)
-            )
-
-        if extra_arguments:
-            unknown_options_error = "specifies unknown options: {}".format(extra_arguments)
-            message = "{} {}".format(field_uri, unknown_options_error)
-
-            if self.on_extra_argument == _ExtraArgumentBehaviour.WARN:
-                warnings.warn(message)
-            if self.on_extra_argument == _ExtraArgumentBehaviour.ERROR:
-                self.raise_error(entry, field_uri, message)
-
-    @property
-    def known_fields(self):
-        return set(self.fields)
-
-    def raise_error(self, value, field_uri, reason=None):
-        if self.on_error:
-            self.on_error(value, field_uri, reason)
-        else:
-            raise ConfigError(reason)
-
-
-class BaseField(BaseValidator):
-    def __init__(self, optional=False, allow_none=False, description=None, **kwargs):
-        super().__init__(**kwargs)
-        self.optional = optional
-        self.allow_none = allow_none
-        self.description = description
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        field_uri = field_uri or self.field_uri
-        if not self.allow_none and entry is None:
-            raise ConfigError("{} is not allowed to be None".format(field_uri))
-
-    @property
-    def type(self):
-        return str
-
-
-class StringField(BaseField):
-    def __init__(self, choices=None, regex=None, case_sensitive=False, **kwargs):
-        super().__init__(**kwargs)
-        self.choices = choices if case_sensitive or not choices else list(map(str.lower, choices))
-        self.regex = re.compile(regex, flags=re.IGNORECASE if not case_sensitive else 0) if regex else None
-        self.case_sensitive = case_sensitive
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        if entry is None:
-            return
-
-        field_uri = field_uri or self.field_uri
-        source_entry = entry
-
-        if not isinstance(entry, str):
-            raise ConfigError("{} is expected to be str".format(source_entry))
-
-        if not self.case_sensitive:
-            entry = entry.lower()
-
-        if self.choices and entry not in self.choices:
-            reason = "unsupported option, expected one of: {}".format(', '.join(map(str, self.choices)))
-            self.raise_error(source_entry, field_uri, reason)
-
-        if self.regex and not self.regex.match(entry):
-            self.raise_error(source_entry, field_uri, reason=None)
-
-    @property
-    def type(self):
-        return str
-
-
-class DictField(BaseField):
-    def __init__(self, key_type=None, value_type=None, validate_keys=True, validate_values=True, allow_empty=True,
-                 **kwargs):
-        super().__init__(**kwargs)
-        self.validate_keys = validate_keys if key_type else False
-        self.validate_values = validate_values if value_type else False
-        self.key_type = _get_field_type(key_type)
-        self.value_type = _get_field_type(value_type)
-
-        self.allow_empty = allow_empty
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        if entry is None:
-            return
-
-        field_uri = field_uri or self.field_uri
-        if not isinstance(entry, dict):
-            raise ConfigError("{} is expected to be dict".format(field_uri))
-
-        if not entry and not self.allow_empty:
-            self.raise_error(entry, field_uri, "value is empty")
-
-        for k, v in entry.items():
-            if self.validate_keys:
-                uri = "{}.keys.{}".format(field_uri, k)
-                self.key_type.validate(k, uri)
-
-            if self.validate_values:
-                uri = "{}.{}".format(field_uri, k)
-
-                self.value_type.validate(v, uri)
-    @property
-    def type(self):
-        return dict
-
-
-class ListField(BaseField):
-    def __init__(self, value_type=None, validate_values=True, allow_empty=True, **kwargs):
-        super().__init__(**kwargs)
-        self.validate_values = validate_values if value_type else False
-        self.value_type = _get_field_type(value_type)
-        self.allow_empty = allow_empty
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        if entry is None:
-            return
-
-        if not isinstance(entry, list):
-            raise ConfigError("{} is expected to be list".format(field_uri))
-
-        if not entry and not self.allow_empty:
-            self.raise_error(entry, field_uri, "value is empty")
-
-        if self.validate_values:
-            for i, val in enumerate(entry):
-                self.value_type.validate(val, "{}[{}]".format(val, i))
-
-    @property
-    def type(self):
-        return list
-
-
-class NumberField(BaseField):
-    def __init__(self, floats=True, min_value=None, max_value=None, allow_inf=False, allow_nan=False, **kwargs):
-        super().__init__(**kwargs)
-        self.floats = floats
-        self.min = min_value
-        self.max = max_value
-        self.allow_inf = allow_inf
-        self.allow_nan = allow_nan
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        if entry is None:
-            return
-
-        field_uri = field_uri or self.field_uri
-        if not self.floats and isinstance(entry, float):
-            raise ConfigError("{} is expected to be int".format(field_uri))
-        if not isinstance(entry, int) and not isinstance(entry, float):
-            raise ConfigError("{} is expected to be number".format(field_uri))
-
-        if self.min is not None and entry < self.min:
-            reason = "value is less than minimal allowed - {}".format(self.min)
-            self.raise_error(entry, field_uri, reason)
-        if self.max is not None and entry > self.max:
-            reason = "value is greater than maximal allowed - {}".format(self.max)
-            self.raise_error(entry, field_uri, reason)
-
-        if math.isinf(entry) and not self.allow_inf:
-            self.raise_error(entry, field_uri, "value is infinity")
-        if math.isnan(entry) and not self.allow_nan:
-            self.raise_error(entry, field_uri, "value is NaN")
-
-    @property
-    def type(self):
-        return float if self.floats else int
-
-
-class PathField(BaseField):
-    def __init__(self, is_directory=False, check_exists=True, **kwargs):
-        super().__init__(**kwargs)
-        self.is_directory = is_directory
-        self.check_exists = check_exists
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        if entry is None:
-            return
-
-        field_uri = field_uri or self.field_uri
-        try:
-            get_path(entry, self.is_directory, self.check_exists)
-        except TypeError:
-            self.raise_error(entry, field_uri, "values is expected to be path-like")
-        except FileNotFoundError:
-            self.raise_error(entry, field_uri, "path does not exist")
-        except NotADirectoryError:
-            self.raise_error(entry, field_uri, "path is not a directory")
-        except IsADirectoryError:
-            self.raise_error(entry, field_uri, "path is a directory, regular file expected")
-
-    @property
-    def type(self):
-        return Path
-
-
-class BoolField(BaseField):
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        if entry is None:
-            return
-
-        field_uri = field_uri or self.field_uri
-        if not isinstance(entry, bool):
-            raise ConfigError("{} is expected to be bool".format(field_uri))
-
-    @property
-    def type(self):
-        return string_to_bool
-
-
-def _get_field_type(key_type):
-    if not isinstance(key_type, BaseField):
-        type_ = _TYPE_TO_FIELD_CLASS.get(key_type)
-        if callable(type_):
-            return type_()
-
-    return key_type
-
-
-_TYPE_TO_FIELD_CLASS = {
-    int: partial(NumberField, floats=False),
-    float: partial(NumberField, floats=True),
-    dict: partial(DictField, validate_keys=False, validate_values=False),
-    list: partial(ListField, validate_values=False),
-    Path: PathField,
-    str: StringField,
-    bool: BoolField,
-}
diff --git a/tools/accuracy_checker/accuracy_checker/data_readers/__init__.py b/tools/accuracy_checker/accuracy_checker/data_readers/__init__.py
deleted file mode 100644 (file)
index 8906529..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .data_reader import (
-    BaseReader,
-    DataReaderField,
-    ReaderCombiner,
-    JSONReaderConfig,
-    OpenCVFrameReader,
-    OpenCVImageReader,
-    PillowImageReader,
-    ScipyImageReader,
-    NiftiImageReader,
-
-    DataRepresentation,
-    ClipIdentifier,
-    create_reader
-)
-
-__all__ = [
-    'BaseReader',
-    'DataReaderField',
-    'DataRepresentation',
-    'ReaderCombiner',
-    'JSONReaderConfig',
-    'OpenCVFrameReader',
-    'OpenCVImageReader',
-    'PillowImageReader',
-    'ScipyImageReader',
-    'NiftiImageReader',
-
-    'DataRepresentation',
-    'ClipIdentifier',
-    'create_reader'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/data_readers/data_reader.py b/tools/accuracy_checker/accuracy_checker/data_readers/data_reader.py
deleted file mode 100644 (file)
index 66c3c4d..0000000
+++ /dev/null
@@ -1,267 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from pathlib import Path
-from functools import singledispatch
-from collections import OrderedDict, namedtuple
-import re
-import cv2
-from PIL import Image
-import scipy.misc
-import numpy as np
-import nibabel as nib
-
-from ..utils import get_path, read_json, zipped_transform, set_image_metadata
-from ..dependency import ClassProvider
-from ..config import BaseField, StringField, ConfigValidator, ConfigError, DictField
-
-
-class DataRepresentation:
-    def __init__(self, data, meta=None, identifier=''):
-        self.identifier = identifier
-        self.data = data
-        self.metadata = meta or {}
-        if np.isscalar(data):
-            self.metadata['image_size'] = 1
-        elif isinstance(data, list) and np.isscalar(data[0]):
-            self.metadata['image_size'] = len(data)
-        else:
-            self.metadata['image_size'] = data.shape if not isinstance(data, list) else data[0].shape
-
-
-ClipIdentifier = namedtuple('ClipIdentifier', ['video', 'clip_id', 'frames'])
-
-
-def create_reader(config):
-    return BaseReader.provide(config.get('type', 'opencv_imread'), config.get('data_source'), config=config)
-
-
-class DataReaderField(BaseField):
-    def validate(self, entry_, field_uri=None):
-        super().validate(entry_, field_uri)
-
-        if entry_ is None:
-            return
-
-        field_uri = field_uri or self.field_uri
-        if isinstance(entry_, str):
-            StringField(choices=BaseReader.providers).validate(entry_, 'reader')
-        elif isinstance(entry_, dict):
-            class DictReaderValidator(ConfigValidator):
-                type = StringField(choices=BaseReader.providers)
-            dict_reader_validator = DictReaderValidator(
-                'reader', on_extra_argument=DictReaderValidator.IGNORE_ON_EXTRA_ARGUMENT
-            )
-            dict_reader_validator.validate(entry_)
-        else:
-            self.raise_error(entry_, field_uri, 'reader must be either string or dictionary')
-
-
-class BaseReader(ClassProvider):
-    __provider_type__ = 'reader'
-
-    def __init__(self, data_source, config=None):
-        self.config = config
-        self.data_source = data_source
-        self.read_dispatcher = singledispatch(self.read)
-        self.read_dispatcher.register(list, self._read_list)
-        self.read_dispatcher.register(ClipIdentifier, self._read_clip)
-
-        self.validate_config()
-        self.configure()
-
-    def __call__(self, context=None, identifier=None, **kwargs):
-        if identifier is not None:
-            return self.read_item(identifier)
-
-        if not context:
-            raise ValueError('identifier or context should be specified')
-
-        read_data = [self.read_item(identifier) for identifier in context.identifiers_batch]
-        context.data_batch = read_data
-        context.annotation_batch, context.data_batch = zipped_transform(
-            set_image_metadata,
-            context.annotation_batch,
-            context.data_batch
-        )
-        return context
-
-    def configure(self):
-        self.data_source = get_path(self.data_source, is_directory=True)
-
-    def validate_config(self):
-        pass
-
-    def read(self, data_id):
-        raise NotImplementedError
-
-    def _read_list(self, data_id):
-        return [self.read(identifier) for identifier in data_id]
-
-    def _read_clip(self, data_id):
-        video = Path(data_id.video)
-        frames_identifiers = [video / frame for frame in data_id.frames]
-        return self.read_dispatcher(frames_identifiers)
-
-    def read_item(self, data_id):
-        return DataRepresentation(self.read_dispatcher(data_id), identifier=data_id)
-
-
-
-class ReaderCombinerConfig(ConfigValidator):
-    type = StringField()
-    scheme = DictField(
-        value_type=DataReaderField(), key_type=StringField(), allow_empty=False
-    )
-
-
-class ReaderCombiner(BaseReader):
-    __provider__ = 'combine_reader'
-
-    def validate_config(self):
-        config_validator = ReaderCombinerConfig('reader_combiner_config')
-        config_validator.validate(self.config)
-
-    def configure(self):
-        scheme = self.config['scheme']
-        reading_scheme = OrderedDict()
-        for pattern, reader_config in scheme.items():
-            reader = BaseReader.provide(
-                reader_config['type'] if isinstance(reader_config, dict) else reader_config,
-                self.data_source, reader_config
-            )
-            pattern = re.compile(pattern)
-            reading_scheme[pattern] = reader
-
-        self.reading_scheme = reading_scheme
-
-    def read(self, data_id):
-        for pattern, reader in self.reading_scheme.items():
-            if pattern.match(str(data_id)):
-                return reader.read(data_id)
-
-        raise ConfigError('suitable data reader for {} not found'.format(data_id))
-
-
-class OpenCVImageReader(BaseReader):
-    __provider__ = 'opencv_imread'
-
-    def read(self, data_id):
-        return cv2.imread(str(get_path(self.data_source / data_id)))
-
-
-class PillowImageReader(BaseReader):
-    __provider__ = 'pillow_imread'
-
-    def __init__(self, config=None):
-        super().__init__(config)
-        self.convert_to_rgb = True
-
-    def read(self, data_id):
-        with open(str(self.data_source / data_id), 'rb') as f:
-            img = Image.open(f)
-
-            return np.array(img.convert('RGB') if self.convert_to_rgb else img)
-
-
-class ScipyImageReader(BaseReader):
-    __provider__ = 'scipy_imread'
-
-    def read(self, data_id):
-        return np.array(scipy.misc.imread(str(get_path(self.data_source / data_id))))
-
-
-class OpenCVFrameReader(BaseReader):
-    __provider__ = 'opencv_capture'
-
-    def __init__(self, data_source, config=None):
-        super().__init__(data_source, config)
-        self.current = -1
-
-    def read(self, data_id):
-        if data_id < 0:
-            raise IndexError('frame with {} index can not be grabbed, non-negative index is expected')
-        if data_id < self.current:
-            self.videocap.set(cv2.CAP_PROP_POS_FRAMES, data_id)
-            self.current = data_id - 1
-
-        return self._read_sequence(data_id)
-
-    def _read_sequence(self, data_id):
-        frame = None
-        while self.current != data_id:
-            success, frame = self.videocap.read()
-            self.current += 1
-            if not success:
-                raise EOFError('frame with {} index does not exists in {}'.format(self.current, self.data_source))
-
-        return frame
-
-    def configure(self):
-        self.data_source = get_path(self.data_source)
-        self.videocap = cv2.VideoCapture(str(self.data_source))
-
-
-class JSONReaderConfig(ConfigValidator):
-    type = StringField()
-    key = StringField(optional=True, case_sensitive=True)
-
-
-class JSONReader(BaseReader):
-    __provider__ = 'json_reader'
-
-    def validate_config(self):
-        config_validator = JSONReaderConfig('json_reader_config')
-        config_validator.validate(self.config)
-
-    def configure(self):
-        self.key = self.config.get('key')
-
-    def read(self, data_id):
-        data = read_json(str(self.data_source / data_id))
-        if self.key:
-            data = data.get(self.key)
-
-            if not data:
-                raise ConfigError('{} does not contain {}'.format(data_id, self.key))
-
-        return np.array(data).astype(np.float32)
-
-
-
-class NCF_DataReader(BaseReader):
-    __provider__ = 'ncf_data_reader'
-
-    def configure(self):
-        pass
-
-    def read(self, data_id):
-        if not isinstance(data_id, str):
-            raise IndexError('Data identifier must be a string')
-
-        return float(data_id.split(":")[1])
-
-
-class NiftiImageReader(BaseReader):
-    __provider__ = 'nifti_reader'
-
-    def read(self, data_id):
-        nib_image = nib.load(str(get_path(self.data_source / data_id)))
-        image = np.array(nib_image.dataobj)
-        if len(image.shape) != 4:  # Make sure 4D
-            image = np.expand_dims(image, -1)
-        image = np.swapaxes(np.array(image), 0, -2)
-
-        return image
diff --git a/tools/accuracy_checker/accuracy_checker/dataset.py b/tools/accuracy_checker/accuracy_checker/dataset.py
deleted file mode 100644 (file)
index f02add1..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from pathlib import Path
-
-from .annotation_converters import BaseFormatConverter, save_annotation, make_subset
-from .config import ConfigValidator, StringField, PathField, ListField, DictField, BaseField, NumberField, ConfigError
-from .utils import JSONDecoderWithAutoConversion, read_json, get_path, contains_all
-from .representation import BaseRepresentation
-from .data_readers import DataReaderField
-
-
-class DatasetConfig(ConfigValidator):
-    """
-    Specifies configuration structure for dataset
-    """
-    name = StringField()
-    annotation = PathField(optional=True, check_exists=False)
-    data_source = PathField(optional=True, check_exists=False)
-    dataset_meta = PathField(optional=True, check_exists=False)
-    metrics = ListField(allow_empty=False, optional=True)
-    postprocessing = ListField(allow_empty=False, optional=True)
-    preprocessing = ListField(allow_empty=False, optional=True)
-    reader = DataReaderField(optional=True)
-    annotation_conversion = DictField(optional=True)
-    subsample_size = BaseField(optional=True)
-    subsample_seed = NumberField(floats=False, min_value=0, optional=True)
-
-
-class Dataset:
-    def __init__(self, config_entry):
-        self._config = config_entry
-        self.batch = 1
-        self.iteration = 0
-        dataset_config = DatasetConfig('Dataset')
-        dataset_config.validate(self._config)
-        annotation, meta = None, None
-        use_converted_annotation = True
-        self._images_dir = Path(self._config.get('data_source', ''))
-        if 'annotation' in self._config:
-            annotation_file = Path(self._config['annotation'])
-            if annotation_file.exists():
-                annotation = read_annotation(get_path(annotation_file))
-                meta = self._load_meta()
-                use_converted_annotation = False
-        if not annotation and 'annotation_conversion' in self._config:
-            annotation, meta = self._convert_annotation()
-
-        if not annotation:
-            raise ConfigError('path to converted annotation or data for conversion should be specified')
-
-        subsample_size = self._config.get('subsample_size')
-        if subsample_size:
-            subsample_seed = self._config.get('subsample_seed', 666)
-            if isinstance(subsample_size, str):
-                if subsample_size.endswith('%'):
-                    subsample_size = float(subsample_size[:-1]) / 100 * len(annotation)
-            subsample_size = int(subsample_size)
-            annotation = make_subset(annotation, subsample_size, subsample_seed)
-
-        if use_converted_annotation and contains_all(self._config, ['annotation', 'annotation_conversion']):
-            annotation_name = self._config['annotation']
-            meta_name = self._config.get('dataset_meta')
-            if meta_name:
-                meta_name = Path(meta_name)
-            save_annotation(annotation, meta, Path(annotation_name), meta_name)
-
-        self._annotation = annotation
-        self._meta = meta
-        self.size = len(self._annotation)
-        self.name = self._config.get('name')
-
-    @property
-    def annotation(self):
-        return self._annotation
-
-    def __len__(self):
-        return self.size
-
-    @property
-    def metadata(self):
-        return self._meta
-
-    @property
-    def labels(self):
-        return self._meta.get('label_map', {})
-
-    def __call__(self, context, *args, **kwargs):
-        batch_annotation = self.__getitem__(self.iteration)
-        self.iteration += 1
-        context.annotation_batch = batch_annotation
-        context.identifiers_batch = [annotation.identifier for annotation in batch_annotation]
-
-    def __getitem__(self, item):
-        if self.size <= item * self.batch:
-            raise IndexError
-
-        batch_start = item * self.batch
-        batch_end = min(self.size, batch_start + self.batch)
-        batch_annotation = self._annotation[batch_start:batch_end]
-
-        return batch_annotation
-
-    @staticmethod
-    def set_image_metadata(annotation, images):
-        image_sizes = []
-        data = images.data
-        if not isinstance(data, list):
-            data = [data]
-        for image in data:
-            image_sizes.append(image.shape)
-        annotation.set_image_size(image_sizes)
-
-    def set_annotation_metadata(self, annotation, image, data_source):
-        self.set_image_metadata(annotation, image.data)
-        annotation.set_data_source(data_source)
-
-    def _load_meta(self):
-        meta_data_file = self._config.get('dataset_meta')
-        return read_json(meta_data_file, cls=JSONDecoderWithAutoConversion) if meta_data_file else None
-
-    def _convert_annotation(self):
-        conversion_params = self._config.get('annotation_conversion')
-        converter = conversion_params['converter']
-        annotation_converter = BaseFormatConverter.provide(converter, conversion_params)
-        annotation, meta = annotation_converter.convert()
-
-        return annotation, meta
-
-
-def read_annotation(annotation_file: Path):
-    annotation_file = get_path(annotation_file)
-
-    result = []
-    with annotation_file.open('rb') as file:
-        while True:
-            try:
-                result.append(BaseRepresentation.load(file))
-            except EOFError:
-                break
-
-    return result
diff --git a/tools/accuracy_checker/accuracy_checker/dependency.py b/tools/accuracy_checker/accuracy_checker/dependency.py
deleted file mode 100644 (file)
index 947a3ec..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-# pylint: disable=protected-access
-
-
-class ProvidedWrapper:
-    def __init__(self, provided):
-        self.provided = provided
-
-
-class UnresolvedDependencyException(ValueError):
-
-    def __init__(self, provider, missing_dependencies) -> None:
-        super().__init__()
-        self.provider = provider
-        self.missing_dependencies = missing_dependencies
-        self.message = "Unresolved dependencies ({}) for provider {}".format(
-            ", ".join(self.missing_dependencies), self.provider
-        )
-
-
-def get_opts(options):
-    """
-    Args:
-        options: options object.
-    Returns:
-        args (tuple): positional options.
-        kwargs (map): keyword arguments.
-    """
-
-    if isinstance(options, tuple):
-        if len(options) == 2 and isinstance(options[-1], dict):
-            args, kwargs = options
-        else:
-            args = options
-            kwargs = {}
-    elif isinstance(options, dict):
-        args, kwargs = (), options
-    else:
-        raise ValueError("Options object expected to be either pair of (args, kwargs) or only args/kwargs")
-
-    return args, kwargs
-
-
-class BaseProvider:
-    providers = {}
-    __provider_type__ = None
-    __provider__ = None
-
-    @classmethod
-    def provide(cls, provider, *args, **kwargs):
-        root_provider = cls.resolve(provider)
-        return root_provider(*args, **kwargs)
-
-    @classmethod
-    def resolve(cls, name):
-        if name not in cls.providers:
-            raise ValueError("Requested provider not registered")
-        return cls.providers[name]
-
-
-class ClassProviderMeta(type):
-    def __new__(mcs, name, bases, attrs, **kwargs):
-        cls = super().__new__(mcs, name, bases, attrs)
-        # do not create container for abstract provider
-        if '_is_base_provider' in attrs:
-            return cls
-
-        assert issubclass(cls, ClassProvider), "Do not use metaclass directly"
-        if '__provider_type__' in attrs:
-            cls.providers = {}
-        else:
-            cls.register_provider(cls)
-
-        return cls
-
-
-class ClassProvider(BaseProvider, metaclass=ClassProviderMeta):
-    _is_base_provider = True
-
-    @classmethod
-    def get_provider_name(cls):
-        return getattr(cls, '__provider__', cls.__name__)
-
-    @classmethod
-    def register_provider(cls, provider):
-        provider_name = cls.get_provider_name()
-        if not provider_name:
-            return
-        cls.providers[provider_name] = provider
-
-
-def provide(service):
-    return ProvidedWrapper(service)
diff --git a/tools/accuracy_checker/accuracy_checker/evaluators/__init__.py b/tools/accuracy_checker/accuracy_checker/evaluators/__init__.py
deleted file mode 100644 (file)
index 278615c..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-from .model_evaluator import ModelEvaluator
-from .pipeline_evaluator import PipeLineEvaluator, get_processing_info
-
-__all__ = [
-    'ModelEvaluator',
-    'PipeLineEvaluator',
-    'get_processing_info'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/evaluators/model_evaluator.py b/tools/accuracy_checker/accuracy_checker/evaluators/model_evaluator.py
deleted file mode 100644 (file)
index 21620b6..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import copy
-import pickle
-
-from ..utils import get_path, set_image_metadata, extract_image_representations
-from ..dataset import Dataset
-from ..launcher import create_launcher, DummyLauncher, InputFeeder
-from ..launcher.loaders import PickleLoader
-from ..logging import print_info
-from ..metrics import MetricsExecutor
-from ..postprocessor import PostprocessingExecutor
-from ..preprocessor import PreprocessingExecutor
-from ..adapters import create_adapter
-from ..config import ConfigError
-from ..data_readers import BaseReader
-
-
-class ModelEvaluator:
-    def __init__(self, launcher, input_feeder, adapter, reader, preprocessor, postprocessor, dataset, metric):
-        self.launcher = launcher
-        self.input_feeder = input_feeder
-        self.adapter = adapter
-        self.reader = reader
-        self.preprocessor = preprocessor
-        self.postprocessor = postprocessor
-        self.dataset = dataset
-        self.metric_executor = metric
-
-        self._annotations = []
-        self._predictions = []
-
-    @classmethod
-    def from_configs(cls, launcher_config, dataset_config):
-        dataset_name = dataset_config['name']
-        data_reader_config = dataset_config.get('reader', 'opencv_imread')
-        data_source = dataset_config.get('data_source')
-        if isinstance(data_reader_config, str):
-            data_reader = BaseReader.provide(data_reader_config, data_source)
-        elif isinstance(data_reader_config, dict):
-            data_reader = BaseReader.provide(data_reader_config['type'], data_source, data_reader_config)
-        else:
-            raise ConfigError('reader should be dict or string')
-
-        preprocessor = PreprocessingExecutor(dataset_config.get('preprocessing'), dataset_name)
-        dataset = Dataset(dataset_config)
-        launcher = create_launcher(launcher_config)
-        config_adapter = launcher_config.get('adapter')
-        adapter = None if not config_adapter else create_adapter(config_adapter, launcher, dataset)
-        input_feeder = InputFeeder(launcher.config.get('inputs') or [], launcher.get_all_inputs())
-        launcher.const_inputs = input_feeder.const_inputs
-        postprocessor = PostprocessingExecutor(dataset_config.get('postprocessing'), dataset_name, dataset.metadata)
-        metric_dispatcher = MetricsExecutor(dataset_config.get('metrics', []), dataset)
-
-        return cls(
-            launcher, input_feeder, adapter, data_reader, preprocessor, postprocessor, dataset, metric_dispatcher
-        )
-
-    def process_dataset(self, stored_predictions, progress_reporter, *args, **kwargs):
-        if self._is_stored(stored_predictions) or isinstance(self.launcher, DummyLauncher):
-            self._annotations, self._predictions = self.load(stored_predictions, progress_reporter)
-            self._annotations, self._predictions = self.postprocessor.full_process(self._annotations, self._predictions)
-
-            self.metric_executor.update_metrics_on_batch(self._annotations, self._predictions)
-            return self._annotations, self._predictions
-
-        self.dataset.batch = self.launcher.batch
-        predictions_to_store = []
-        for batch_id, batch_annotation in enumerate(self.dataset):
-            batch_identifiers = [annotation.identifier for annotation in batch_annotation]
-            batch_input = [self.reader(identifier=identifier) for identifier in batch_identifiers]
-            for annotation, input_data in zip(batch_annotation, batch_input):
-                set_image_metadata(annotation, input_data)
-                annotation.metadata['data_source'] = self.reader.data_source
-            batch_input = self.preprocessor.process(batch_input, batch_annotation)
-            _, batch_meta = extract_image_representations(batch_input)
-            filled_inputs = self.input_feeder.fill_non_constant_inputs(batch_input)
-            batch_predictions = self.launcher.predict(filled_inputs, batch_meta, *args, **kwargs)
-            if self.adapter:
-                self.adapter.output_blob = self.adapter.output_blob or self.launcher.output_blob
-                batch_predictions = self.adapter.process(batch_predictions, batch_identifiers, batch_meta)
-
-            if stored_predictions:
-                predictions_to_store.extend(copy.deepcopy(batch_predictions))
-
-            annotations, predictions = self.postprocessor.process_batch(batch_annotation, batch_predictions)
-            if not self.postprocessor.has_dataset_processors:
-                self.metric_executor.update_metrics_on_batch(annotations, predictions)
-
-            self._annotations.extend(annotations)
-            self._predictions.extend(predictions)
-
-            if progress_reporter:
-                progress_reporter.update(batch_id, len(batch_predictions))
-
-        if progress_reporter:
-            progress_reporter.finish()
-
-        if stored_predictions:
-            self.store_predictions(stored_predictions, predictions_to_store)
-
-        if self.postprocessor.has_dataset_processors:
-            self.metric_executor.update_metrics_on_batch(self._annotations, self._predictions)
-
-        return self.postprocessor.process_dataset(self._annotations, self._predictions)
-
-    @staticmethod
-    def _is_stored(stored_predictions=None):
-        if not stored_predictions:
-            return False
-
-        try:
-            get_path(stored_predictions)
-            return True
-        except OSError:
-            return False
-
-    def compute_metrics(self, output_callback=None, ignore_results_formatting=False):
-        for result_presenter, evaluated_metric in self.metric_executor.iterate_metrics(
-                self._annotations, self._predictions):
-            result_presenter.write_result(evaluated_metric, output_callback, ignore_results_formatting)
-
-    def load(self, stored_predictions, progress_reporter):
-        self._annotations = self.dataset.annotation
-        launcher = self.launcher
-        if not isinstance(launcher, DummyLauncher):
-            launcher = DummyLauncher({
-                'framework': 'dummy',
-                'loader': PickleLoader.__provider__,
-                'data_path': stored_predictions
-            }, adapter=None)
-
-        predictions = launcher.predict([annotation.identifier for annotation in self._annotations])
-
-        if progress_reporter:
-            progress_reporter.finish(False)
-
-        return self._annotations, predictions
-
-    @staticmethod
-    def store_predictions(stored_predictions, predictions):
-        # since at the first time file does not exist and then created we can not use it as a pathlib.Path object
-        with open(stored_predictions, "wb") as content:
-            pickle.dump(predictions, content)
-            print_info("prediction objects are save to {}".format(stored_predictions))
-
-    def release(self):
-        self.launcher.release()
diff --git a/tools/accuracy_checker/accuracy_checker/evaluators/pipeline_evaluator.py b/tools/accuracy_checker/accuracy_checker/evaluators/pipeline_evaluator.py
deleted file mode 100644 (file)
index e34e701..0000000
+++ /dev/null
@@ -1,229 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from collections import OrderedDict
-import numpy as np
-
-from ..pipeline_connectors import create_connection_description
-from ..utils import get_indexs, find_nearest
-from ..adapters import create_adapter
-from ..data_readers import create_reader
-from ..dataset import Dataset
-from ..launcher import create_launcher, InputFeeder
-from ..metrics import MetricsExecutor
-from ..pipeline_connectors import StageConnectionDescription, Connection
-from ..postprocessor import PostprocessingExecutor
-from..preprocessor import PreprocessingExecutor
-
-
-def get_processing_info(pipeline_config):
-    name = pipeline_config['name']
-    stages = pipeline_config['stages']
-    dataset_name = stages[0]['dataset']['name']
-    launcher = {}
-    for stage in stages:
-        if 'launcher' in stage:
-            launcher = stage['launcher']
-            break
-    framework = launcher.get('framework')
-    device = launcher.get('device')
-    tags = launcher.get('tags')
-
-    return name, framework, device, tags, dataset_name
-
-
-def create_launcher_attribution(launchers_ids, launchers, datasets_ids, datasets, executors, executor_types):
-    launchers_ids = np.array(launchers_ids)
-    datasets_ids = np.array(datasets_ids)
-    for launcher_id_info, launcher in zip(enumerate(launchers_ids), launchers):
-        iteration, launcher_id = launcher_id_info
-        input_feeder = InputFeeder(
-            launcher.config.get('inputs', []), launcher.get_all_inputs(), launcher.fit_to_input
-        )
-        launchers_ids[iteration:] += 1
-        executors.insert(launcher_id, input_feeder)
-        executor_types.insert(launcher_id, 'input_feeder')
-        adapter_config = launcher.config.get('adapter')
-        dataset_id = find_nearest(datasets_ids, launcher_id, 'less')
-        datasets_ids[dataset_id + 1:] += 1
-        dataset = datasets[dataset_id] if dataset_id != -1 else None
-        launcher_id += 1
-        if adapter_config:
-            adapter = create_adapter(adapter_config, launcher, dataset)
-            executors.insert(launcher_id + 1, adapter)
-            executor_types.insert(launcher_id + 1, 'adapter')
-            if dataset_id != datasets_ids.size - 1:
-                datasets_ids[dataset_id + 1:] += 1
-            if iteration != launchers_ids.size - 1:
-                launchers_ids[iteration + 1:] += 1
-
-
-def set_metrics_dataset(metrics_ids, metrics_executors, datasets_ids, datasets):
-    for metrics_id, metric_executor in zip(metrics_ids, metrics_executors):
-        dataset_id = find_nearest(datasets_ids, metrics_id, 'less')
-        if dataset_id != -1:
-            metric_executor.dataset = datasets[dataset_id].metadata
-
-
-class PipeLineStage:
-    def __init__(self, evaluation_context, executors):
-        self._evaluation_context = evaluation_context
-        self.executors = executors
-
-    def run(self):
-        for executor in self.executors:
-            executor(self.evaluation_context)
-
-    @classmethod
-    def from_configs(cls, stage_name, stage_config):
-        config_mapping = {
-            'dataset': Dataset,
-            'preprocessing': PreprocessingExecutor,
-            'launcher': create_launcher,
-            'postprocessing': PostprocessingExecutor,
-            'metrics': MetricsExecutor,
-            'reader': create_reader,
-        }
-
-        executor_types = []
-        executors = []
-        for key, config in stage_config.items():
-            if key in config_mapping:
-                connection = create_connection_description(config, stage_name)
-                if connection:
-                    executors.append(connection)
-                    executor_types.append('connection')
-                executor_creator = config_mapping[key]
-                executor = executor_creator(config)
-                executor_types.append(key)
-                executors.append(executor)
-
-        dataset_ids = get_indexs(executor_types, 'dataset')
-        datasets = [executors[idx] for idx in dataset_ids]
-        launcher_ids = get_indexs(executor_types, 'launcher')
-        launchers = [executors[idx] for idx in launcher_ids]
-        create_launcher_attribution(launcher_ids, launchers, dataset_ids, datasets, executors, executor_types)
-
-        metrics_executors_id = get_indexs(executor_types, 'metrics')
-        dataset_ids = get_indexs(executor_types, 'dataset')
-        metrics_executors = [executors[idx] for idx in metrics_executors_id]
-        set_metrics_dataset(metrics_executors_id, metrics_executors, dataset_ids, datasets)
-        dataset = datasets[0] if datasets else None
-        eval_context = EvaluationContext(dataset, metrics_executors, launchers)
-
-        return cls(eval_context, executors)
-
-    @property
-    def evaluation_context(self):
-        return self._evaluation_context
-
-    @evaluation_context.setter
-    def evaluation_context(self, new_context):
-        _shared_context = new_context.shared_context
-        for field, value in _shared_context.items():
-            if value:
-                setattr(self._evaluation_context, field, value)
-
-
-class EvaluationContext:
-    def __init__(self, dataset, metric_executor=None, launcher=None):
-        self.annotations = []
-        self.predictions = []
-        self.annotation_batch = []
-        self.prediction_batch = []
-        self.data_batch = []
-        self.metrics_results = []
-        self.identifiers_batch = []
-        self.metrics_executor = metric_executor
-        self.dataset_size = dataset.size if dataset else 0
-        self.launcher = launcher
-        self.dataset = dataset
-
-    @property
-    def shared_context(self):
-        _shared_context = {
-            'annotations': self.annotations,
-            'predictions': self.predictions,
-            'annotation_batch': self.annotation_batch,
-            'prediction_batch': self.prediction_batch,
-            'data_batch': self.data_batch,
-            'identifiers_batch': self.identifiers_batch
-        }
-        return _shared_context
-
-
-class PipeLineEvaluator:
-    def __init__(self, stages):
-        self.stages = stages
-        self.create_connectors()
-        self.context = next(iter(stages.values())).evaluation_context
-
-    @classmethod
-    def from_configs(cls, pipeline_config):
-        stages = OrderedDict()
-        for stage_config in pipeline_config:
-            stage_name = stage_config['stage']
-            evaluation_stage = PipeLineStage.from_configs(stage_name, stage_config)
-            stages[stage_name] = evaluation_stage
-        return cls(stages)
-
-    def create_connectors(self):
-        def make_connection(stages, connection_template):
-            return Connection(stages, connection_template)
-
-        def replace_connections(stage, all_stages):
-            for executor_id, executor in enumerate(stage.executors):
-                if isinstance(executor, StageConnectionDescription):
-                    connector = make_connection(all_stages, executor)
-                    stage.executors[executor_id] = connector
-
-        for _, stage in self.stages.items():
-            replace_connections(stage, self.stages)
-
-    def process_dataset(self, stored_predictions, progress_reporter, *args, **kwargs):
-        self.progress_reporter = progress_reporter
-        dataset_size = self.context.dataset_size
-        dataset_size = dataset_size if dataset_size else 0
-        self.progress_reporter.reset(dataset_size)
-        iteration = 0
-        previous_context = self.context
-        while self.progress_reporter.progress != 100:
-            for _, stage in self.stages.items():
-                stage.evaluation_context = previous_context
-                stage.run()
-                previous_context = stage.evaluation_context
-            iteration += 1
-            progress_reporter.update(iteration, len(previous_context.data_batch))
-        self.context = previous_context
-
-        if progress_reporter:
-            progress_reporter.finish()
-
-    def compute_metrics(self, output_callback=None, ignore_results_formatting=False):
-        def eval_metrics(metrics_executor, annotations, predictions):
-            for result_presenter, evaluated_metric in metrics_executor.iterate_metrics(annotations, predictions):
-                result_presenter.write_result(evaluated_metric, output_callback, ignore_results_formatting)
-
-        for _, stage in self.stages.items():
-            metrics_executors = stage.evaluation_context.metrics_executor
-            for metrics_executor in metrics_executors:
-                eval_context = stage.evaluation_context
-                eval_metrics(metrics_executor, eval_context.annotations, eval_context.predictions)
-
-    def release(self):
-        for _, stage in self.stages.items():
-            for launcher in stage.evaluation_context.launcher:
-                launcher.release()
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/__init__.py b/tools/accuracy_checker/accuracy_checker/launcher/__init__.py
deleted file mode 100644 (file)
index 5ab09b0..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .dummy_launcher import DummyLauncher
-from .launcher import Launcher, create_launcher, unsupported_launcher
-from .input_feeder import InputFeeder
-
-try:
-    from .caffe_launcher import CaffeLauncher
-except ImportError as import_error:
-    CaffeLauncher = unsupported_launcher(
-        'caffe', "Caffe isn't installed. Please, install it before using. \n{}".format(import_error.msg)
-    )
-
-try:
-    from .dlsdk_launcher import DLSDKLauncher
-except ImportError as import_error:
-    DLSDKLauncher = unsupported_launcher(
-        'dlsdk', "IE Python isn't installed. Please, install it before using. \n{}".format(import_error.msg)
-    )
-
-__all__ = ['create_launcher', 'Launcher', 'CaffeLauncher', 'DLSDKLauncher', 'DummyLauncher', 'InputFeeder']
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/caffe_installation_readme.md b/tools/accuracy_checker/accuracy_checker/launcher/caffe_installation_readme.md
deleted file mode 100644 (file)
index 8118dcd..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-# Caffe Installation Tips
-
-## Install OpenCV 3.3 or later with Python3 bindings
-
-Accuracy Checker uses OpenCV library for image processing. You can miss this step if you are using OpenCV from [OpenVINO toolkit][openvino-get-started].
-
-```bash
-sudo apt-get install libopencv-dev
-pip install opencv-python
-```
-
-## Install Caffe with Python3 bindings
-
-* Clone repository:
-
-```bash
-git clone https://github.com/BVLC/caffe.git
-cd caffe
-```
-
-* Install Caffe dependencies:
-
-```bash
-sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler libgflags-dev libgoogle-glog-dev liblmdb-dev
-sudo apt-get install --no-install-recommends libboost-all-dev
-pip install -r python/requirements.txt
-pip install matplotlib
-```
-
-* Build
-
-If you need CPU only version of caffe add `-DCPU_ONLY=ON` to cmake command.
-
-```bash
-mkdir build && cd build
-cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=<caffe/install/dir> -Dpython_version=3 -DBLAS=open ..
-make
-sudo make install
-```
-
-* Copy Python library to your python installation.
-
-```bash
-cp -r ../python/caffe $VIRTUAL_ENV/lib/python3.5/site-packages
-cp --remove-destination lib/_caffe.so $VIRTUAL_ENV/lib/python3.5/site-packages/caffe
-```
-
-## Check your installation
-
-You can test prerequisites with the following command. If it does not fail, then you are installed prerequisites correctly:
-
-```bash
-python3 -c 'import caffe, cv2'
-```
-
-[openvino-get-started]: https://software.intel.com/en-us/openvino-toolkit/documentation/get-started
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher.py
deleted file mode 100644 (file)
index b9c28c5..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import re
-
-import caffe
-
-from ..config import PathField, StringField, NumberField, BoolField
-from .launcher import Launcher, LauncherConfig
-
-DEVICE_REGEX = r'(?P<device>cpu$|gpu)(_(?P<identifier>\d+))?'
-
-
-class CaffeLauncherConfig(LauncherConfig):
-    """
-    Specifies configuration structure for Caffe launcher.
-    """
-
-    model = PathField()
-    weights = PathField()
-    device = StringField(regex=DEVICE_REGEX)
-    batch = NumberField(floats=False, min_value=1, optional=True)
-    output_name = StringField(optional=True)
-    allow_reshape_input = BoolField(optional=True)
-
-
-class CaffeLauncher(Launcher):
-    """
-    Class for infer model using Caffe framework.
-    """
-
-    __provider__ = 'caffe'
-
-    def __init__(self, config_entry: dict, *args, **kwargs):
-        super().__init__(config_entry, *args, **kwargs)
-
-        caffe_launcher_config = CaffeLauncherConfig('Caffe_Launcher')
-        caffe_launcher_config.validate(self.config)
-
-        self.model = str(self.config['model'])
-        self.weights = str(self.config['weights'])
-
-        self.network = caffe.Net(self.model, self.weights, caffe.TEST)
-        self.allow_reshape_input = self.config.get('allow_reshape_input', False)
-
-        match = re.match(DEVICE_REGEX, self.config['device'].lower())
-        if match.group('device') == 'gpu':
-            caffe.set_mode_gpu()
-            identifier = match.group('identifier') or 0
-            caffe.set_device(int(identifier))
-        elif match.group('device') == 'cpu':
-            caffe.set_mode_cpu()
-
-        self._batch = self.config.get('batch', 1)
-        self.const_inputs = self.config.get('_list_const_inputs', [])
-
-    @property
-    def inputs(self):
-        """
-        Returns:
-            inputs in NCHW format.
-        """
-        self._inputs_shapes = {}
-
-        for input_blob in self.network.inputs:
-            if input_blob in self.const_inputs:
-                continue
-            channels, height, width = self.network.blobs[input_blob].data.shape[1:]
-            self.network.blobs[input_blob].reshape(self._batch, channels, height, width)
-            self._inputs_shapes[input_blob] = channels, height, width
-
-        return self._inputs_shapes
-
-    @property
-    def batch(self):
-        return self._batch
-
-    @property
-    def output_blob(self):
-        return next(iter(self.network.outputs))
-
-    def predict(self, inputs, metadata, *args, **kwargs):
-        """
-        Args:
-            inputs: dictionary where keys are input layers names and values are data for them.
-            metadata: metadata of input representations
-        Returns:
-            raw data from network.
-        """
-        results = []
-        for infer_input in inputs:
-            for input_blob in self.network.inputs:
-                if input_blob in self.const_inputs:
-                    continue
-
-                data = infer_input[input_blob]
-                if self.allow_reshape_input:
-                    self.network.blobs[input_blob].reshape(*data.shape)
-
-                if data.shape[0] != self._batch:
-                    self.network.blobs[input_blob].reshape(
-                        data.shape[0], *self.network.blobs[input_blob].data.shape[1:]
-                    )
-
-            results.append(self.network.forward(**infer_input))
-            for image_meta in metadata:
-                self._provide_inputs_info_to_meta(image_meta)
-
-        return results
-
-    def get_all_inputs(self):
-        inputs_map = {}
-        for input_blob in self.network.inputs:
-            inputs_map[input_blob] = self.network.blobs[input_blob].data.shape
-
-        return inputs_map
-
-    def release(self):
-        """
-        Releases launcher.
-        """
-        del self.network
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher_readme.md b/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher_readme.md
deleted file mode 100644 (file)
index 2ff6013..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-# How to configure Caffe launcher
-
-For enabling Caffe launcher you need to add `framework: caffe` in launchers section of your configuration file and provide following parameters:
-
-* `device` - specifies which device will be used for infer (`cpu`, `gpu_0` and so on).
-* `model` - path to prototxt file with Caffe model for your topology.
-* `weights` - path to caffemodel file with weights for your topology.
-* `adapter` - approach how raw output will be converted to representation of dataset problem, some adapters can be specific to framework. You can find detailed instruction how to use adapters [here][adapters].
-
-You also can specify batch size for your model using `batch` and allow to reshape input layer to data shape, using specific parameter: `allow_reshape_input` (default value is False).
-
-Caffe launcher config example:
-
-```yml
-launchers:
-  - framework: caffe
-    device: CPU
-    model: path_to_model/alexnet.prototxt
-    weights: path_to_weights/alexnet.caffemodel
-    adapter: classification
-    batch: 4
-```
-
-[adapters]: ./tools/accuracy_checker/accuracy_checker/adapters/README.md
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher.py
deleted file mode 100644 (file)
index ab7a108..0000000
+++ /dev/null
@@ -1,474 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import subprocess
-from pathlib import Path
-import os
-import platform
-import numpy as np
-from cpuinfo import get_cpu_info
-import openvino.inference_engine as ie
-
-from ..config import ConfigError, NumberField, PathField, StringField, DictField, ListField, BoolField
-from ..logging import warning
-from ..utils import read_yaml, contains_all, get_path, contains_any
-from .launcher import Launcher, LauncherConfig
-from .model_conversion import convert_model, FrameworkParameters
-from ..logging import print_info
-
-HETERO_KEYWORD = 'HETERO:'
-FPGA_COMPILER_MODE_VAR = 'CL_CONTEXT_COMPILER_MODE_INTELFPGA'
-DEVICE_REGEX = r"(?:^{hetero}(?P<devices>(?:{devices})(?:,(?:{devices}))*)$)|(?:^(?P<device>{devices})$)".format(
-    hetero=HETERO_KEYWORD, devices="|".join(plugin for plugin in ie.known_plugins)
-)
-VPU_PLUGINS = ('HDDL', "MYRIAD")
-VPU_LOG_LEVELS = ('LOG_NONE', 'LOG_WARNING', 'LOG_INFO', 'LOG_DEBUG')
-
-
-class CPUExtensionPathField(PathField):
-    def __init__(self, **kwargs):
-        super().__init__(is_directory=False, **kwargs)
-
-    def validate(self, entry, field_uri=None):
-        if entry is None:
-            return
-
-        field_uri = field_uri or self.field_uri
-        validation_entry = ''
-        try:
-            validation_entry = Path(entry)
-        except TypeError:
-            self.raise_error(entry, field_uri, "values is expected to be path-like")
-        is_directory = False
-        if validation_entry.parts[-1] == 'AUTO':
-            validation_entry = validation_entry.parent
-            is_directory = True
-        try:
-            get_path(validation_entry, is_directory)
-        except FileNotFoundError:
-            self.raise_error(validation_entry, field_uri, "path does not exist")
-        except NotADirectoryError:
-            self.raise_error(validation_entry, field_uri, "path is not a directory")
-        except IsADirectoryError:
-            self.raise_error(validation_entry, field_uri, "path is a directory, regular file expected")
-
-
-class DLSDKLauncherConfig(LauncherConfig):
-    """
-    Specifies configuration structure for DLSDK launcher.
-    """
-
-    device = StringField(regex=DEVICE_REGEX)
-    model = PathField(optional=True)
-    weights = PathField(optional=True)
-    caffe_model = PathField(optional=True)
-    caffe_weights = PathField(optional=True)
-    mxnet_weights = PathField(optional=True)
-    tf_model = PathField(optional=True)
-    tf_meta = PathField(optional=True)
-    onnx_model = PathField(optional=True)
-    kaldi_model = PathField(optional=True)
-    cpu_extensions = CPUExtensionPathField(optional=True)
-    gpu_extensions = PathField(optional=True)
-    bitstream = PathField(optional=True)
-    mo_params = DictField(optional=True)
-    mo_flags = ListField(optional=True)
-    outputs = ListField(optional=True)
-    allow_reshape_input = BoolField(optional=True)
-    affinity_map = PathField(optional=True)
-    batch = NumberField(floats=False, min_value=1, optional=True)
-
-    _models_prefix = PathField(is_directory=True, optional=True)
-    _model_optimizer = PathField(optional=True, allow_none=True, is_directory=True)
-    _tf_obj_detection_api_config_dir = PathField(optional=True, allow_none=True, is_directory=True)
-    _tf_custom_op_config_dir = PathField(optional=True, allow_none=True, is_directory=True)
-    _cpu_extensions_mode = StringField(optional=True, allow_none=True)
-    _aocl = PathField(optional=True)
-    _vpu_log_level = StringField(optional=True, choices=VPU_LOG_LEVELS)
-
-    def __init__(self, config_uri, **kwargs):
-        super().__init__(config_uri, **kwargs)
-        self.need_conversion = None
-
-    def validate(self, entry, field_uri=None):
-        """
-        Validate that launcher entry meets all configuration structure requirements.
-
-        Args:
-            entry: launcher configuration file entry.
-            field_uri: id of launcher entry.
-        """
-
-        dlsdk_model_options = ['model', 'weights']
-        caffe_model_options = ['caffe_model', 'caffe_weights']
-        mxnet_model_options = ['mxnet_weights']
-        tf_model_options = ['tf_model']
-        tf_meta_options = ['tf_meta']
-        onnx_model_options = ['onnx_model']
-        kaldi_model_options = ['kaldi_model']
-
-        multiple_model_sources_err = (
-            'Either model and weights or caffe_model and caffe_weights '
-            'or mxnet_weights or tf_model or tf_meta should be specified.'
-        )
-        sources = {
-            FrameworkParameters('dlsdk', False): dlsdk_model_options,
-            FrameworkParameters('caffe', False): caffe_model_options,
-            FrameworkParameters('tf', False): tf_model_options,
-            FrameworkParameters('mxnet', False): mxnet_model_options,
-            FrameworkParameters('onnx', False): onnx_model_options,
-            FrameworkParameters('kaldi', False): kaldi_model_options,
-            FrameworkParameters('tf', True): tf_meta_options
-        }
-
-        specified = []
-        for mo_source_option in sources:
-            if contains_all(entry, sources[mo_source_option]):
-                specified.append(mo_source_option)
-
-        if not specified:
-            raise ConfigError('{} None provided'.format(multiple_model_sources_err))
-        if len(specified) > 1:
-            raise ConfigError('{} Several provided'.format(multiple_model_sources_err))
-
-        self._set_model_source(specified[0])
-        super().validate(entry, field_uri)
-
-    def _set_model_source(self, framework):
-        self.need_conversion = framework.name != 'dlsdk'
-        self.framework = framework
-        self.fields['model'].optional = self.need_conversion
-        self.fields['weights'].optional = self.need_conversion
-        self.fields['caffe_model'].optional = framework.name != 'caffe'
-        self.fields['caffe_weights'].optional = framework.name != 'caffe'
-        self.fields['mxnet_weights'].optional = framework.name != 'mxnet'
-        self.fields['tf_model'].optional = framework != FrameworkParameters('tf', False)
-        self.fields['tf_meta'].optional = framework != FrameworkParameters('tf', True)
-        self.fields['onnx_model'].optional = framework.name != 'onnx'
-        self.fields['kaldi_model'].optional = framework.name != 'kaldi'
-
-
-class DLSDKLauncher(Launcher):
-    """
-    Class for infer model using DLSDK framework.
-    """
-
-    __provider__ = 'dlsdk'
-
-    def __init__(self, config_entry):
-        super().__init__(config_entry)
-
-        dlsdk_launcher_config = DLSDKLauncherConfig('DLSDK_Launcher')
-        dlsdk_launcher_config.validate(self.config)
-
-        self._device = self.config['device'].upper()
-        self._set_variable = False
-        self._prepare_bitstream_firmware(self.config)
-
-        if dlsdk_launcher_config.need_conversion:
-            self._model, self._weights = DLSDKLauncher.convert_model(self.config, dlsdk_launcher_config.framework)
-        else:
-            self._model = self.config['model']
-            self._weights = self.config['weights']
-
-        self._create_ie_plugin()
-        self.network = ie.IENetwork(model=str(self._model), weights=str(self._weights))
-        self.original_outputs = self.network.outputs
-        outputs = self.config.get('outputs')
-        if outputs:
-            self.network.add_outputs(outputs)
-        self.const_inputs = self.config.get('_list_const_inputs', [])
-        self._batch = self.config.get('batch', self.network.batch_size)
-        if self._batch != self.network.batch_size:
-            self._set_batch_size(self._batch)
-        affinity_map_path = self.config.get('affinity_map')
-        if affinity_map_path and self._is_hetero():
-            self._set_affinity(affinity_map_path)
-        elif affinity_map_path:
-            warning('affinity_map config is applicable only for HETERO device')
-        self.exec_network = self.plugin.load(network=self.network)
-        self.allow_reshape_input = self.config.get('allow_reshape_input', False)
-
-    @property
-    def inputs(self):
-        """
-        Returns:
-            inputs in NCHW format.
-        """
-
-        # reverse and omit N
-        return {k: v.shape[1:] for k, v in self.network.inputs.items() if k not in self.const_inputs}
-
-    @property
-    def batch(self):
-        return self._batch
-
-    @property
-    def output_blob(self):
-        return next(iter(self.original_outputs))
-
-    def predict(self, inputs, metadata, *args, **kwargs):
-        """
-        Args:
-            inputs: dictionary where keys are input layers names and values are data for them.
-            metadata: metadata of input representations
-        Returns:
-            raw data from network.
-        """
-        results = []
-        for infer_inputs in inputs:
-            input_shapes = {}
-            do_reshape = False
-            for input_blob in self.network.inputs:
-                if input_blob in self.const_inputs:
-                    input_shapes[input_blob] = self.network.inputs[input_blob].shape
-                    continue
-
-                data = infer_inputs[input_blob]
-                input_shapes[input_blob] = data.shape
-                if self.allow_reshape_input:
-                    if tuple(self.network.inputs[input_blob].shape) != data.shape:
-                        do_reshape = True
-
-            if do_reshape:
-                self._reshape_input(input_shapes)
-
-            for input_blob, data in infer_inputs.items():
-                if input_blob in self.const_inputs:
-                    continue
-                infer_inputs[input_blob] = self._align_data_shape(data, input_blob)
-
-            network_inputs_data = {**infer_inputs}
-
-            benchmark = kwargs.get('benchmark')
-            if benchmark:
-                benchmark(network_inputs_data)
-
-            result = self.exec_network.infer(network_inputs_data)
-
-            raw_outputs_callback = kwargs.get('output_callback')
-            if raw_outputs_callback:
-                raw_outputs_callback(result)
-
-            results.append(result)
-            for meta_ in metadata:
-                self._provide_inputs_info_to_meta(meta_)
-
-            for meta in metadata:
-                self._provide_inputs_info_to_meta(meta)
-
-        return results
-
-    def _is_hetero(self):
-        return self._device.startswith(HETERO_KEYWORD)
-
-    def _devices_list(self):
-        device = self._device
-        if HETERO_KEYWORD in self._device:
-            device = self._device[len(HETERO_KEYWORD):]
-
-        return [platform_.upper().strip() for platform_ in device.split(',')]
-
-    def _set_affinity(self, affinity_map_path):
-        self.plugin.set_initial_affinity(self.network)
-        layers = self.network.layers
-        for layer, device in read_yaml(affinity_map_path).items():
-            if layer not in layers:
-                raise ConfigError('Layer \'{layer}\' is not present in network'.format(layer=layer))
-            if device not in self._devices_list():
-                raise ConfigError(
-                    'Device \'{device}\' set for \'{layer}\' layer is not present in '
-                    'provided configuration \'{configuration}\''.format(
-                        device=device, layer=layer, configuration=self._device
-                    )
-                )
-            layers[layer].affinity = device
-
-    def _is_fpga(self):
-        return 'FPGA' in self._devices_list()
-
-    def _is_vpu(self):
-        return contains_any(self._devices_list(), VPU_PLUGINS)
-
-    def _prepare_bitstream_firmware(self, config):
-        if not self._is_fpga():
-            return
-
-        compiler_mode = os.environ.get(FPGA_COMPILER_MODE_VAR)
-        if compiler_mode == '3':
-            return
-
-        bitstream = config.get('bitstream')
-        if bitstream:
-            print_info('programming bitstream: {}'.format(bitstream.name))
-            aocl_executable = config.get('_aocl')
-            if aocl_executable:
-                subprocess.run([str(aocl_executable), 'program', 'acl0', str(bitstream)], check=True)
-                os.environ[FPGA_COMPILER_MODE_VAR] = '3'
-                self._set_variable = True
-            else:
-                aocx_variable = 'DLA_AOCX'
-                previous_bitstream = os.environ.get(aocx_variable)
-                if previous_bitstream == str(bitstream):
-                    return
-                os.environ[aocx_variable] = str(bitstream)
-                if not os.environ.get(aocx_variable):
-                    warning('Warning: {} has not been set'.format(aocx_variable))
-
-    @staticmethod
-    def get_cpu_extension(cpu_extensions, selection_mode):
-        cpu_extensions_name = cpu_extensions.parts[-1]
-        if cpu_extensions_name != 'AUTO':
-            return cpu_extensions
-        extensions_path = cpu_extensions.parent
-        file_format = '{}.dll' if platform.system() == 'Windows' else 'lib{}.so'
-        if not selection_mode:
-            default_cpu_extension = file_format.format('cpu_extension')
-            extension_list = list(extensions_path.glob(default_cpu_extension))
-
-            if extension_list:
-                return extension_list[0]
-
-            cpu_info_flags = get_cpu_info()['flags']
-            supported_flags = ['avx512', 'avx2', 'sse4']
-            for flag in supported_flags:
-                selection_mode = flag
-                if selection_mode in cpu_info_flags:
-                    break
-        extension_list = list(extensions_path.glob(file_format.format('cpu_extension_{}'.format(selection_mode))))
-
-        if not extension_list:
-            raise ConfigError('suitable CPU extension lib not found in {}'.format(extensions_path))
-
-        return extension_list[0]
-
-    @staticmethod
-    def convert_model(config, framework=FrameworkParameters('caffe', False)):
-        config_model = config.get('{}_model'.format(framework.name), '')
-        config_weights = config.get('{}_weights'.format(framework.name), '')
-        config_meta = config.get('{}_meta'.format(framework.name), '')
-
-        mo_search_paths = []
-        model_optimizer = config.get('_model_optimizer')
-        if model_optimizer:
-            mo_search_paths.append(model_optimizer)
-
-        model_optimizer_directory_env = os.environ.get('MO_DIR')
-        if model_optimizer_directory_env:
-            mo_search_paths.append(model_optimizer_directory_env)
-
-        model_name = (
-            Path(config_model).name.rsplit('.', 1)[0] or
-            Path(config_weights).name.rsplit('.', 1)[0] or
-            Path(config_meta).name.rsplit('.', 1)[0]
-        )
-
-        return convert_model(
-            model_name,
-            config_model, config_weights, config_meta, framework,
-            mo_search_paths, config.get('mo_params'),
-            config.get('mo_flags'),
-            config.get('_tf_custom_op_config_dir'),
-            config.get('_tf_obj_detection_api_pipeline_config_path')
-        )
-
-    def get_all_inputs(self):
-        return self.network.inputs
-
-    def _reshape_input(self, shapes):
-        self.network.reshape(shapes)
-        del self.exec_network
-        self.exec_network = self.plugin.load(network=self.network)
-
-    def _set_batch_size(self, batch_size):
-        # in some cases we can not use explicit property for setting batch size, so we need to use reshape instead
-        # save const inputs without changes
-        const_inputs_shapes = {
-            input_name: self.network.inputs[input_name].shape for input_name in self.const_inputs
-        }
-        new_non_const_input_shapes = {}
-        for layer_name, layer in self.network.inputs.items():
-            if layer_name in const_inputs_shapes:
-                continue
-            layer_shape = layer.shape
-            ind_batch = layer.layout.find('N')
-            if ind_batch != -1:
-                layer_shape[ind_batch] = batch_size
-            new_non_const_input_shapes[layer_name] = layer_shape
-
-        self.network.reshape({**const_inputs_shapes, **new_non_const_input_shapes})
-
-    def _align_data_shape(self, data, input_blob):
-        input_shape = self.network.inputs[input_blob].shape
-        data_batch_size = data.shape[0]
-        input_batch_size = input_shape[0]
-
-        if data_batch_size < input_batch_size:
-            warning_message = 'data batch {} is not equal model input batch_size {}. '.format(
-                data_batch_size, input_batch_size
-            )
-            warning(warning_message)
-            diff_number = input_batch_size - data_batch_size
-            filled_part = [data[-1]] * diff_number
-            data = np.concatenate([data, filled_part])
-
-        if len(data.shape) > 1 and len(input_shape) > 1 and data.shape[1] != input_shape[1]:
-            data = data[:, :input_shape[1]]
-
-        return data.reshape(input_shape)
-
-    def _create_ie_plugin(self, log=True):
-        if hasattr(self, 'plugin'):
-            del self.plugin
-        self.plugin = ie.IEPlugin(self._device)
-        if log:
-            print_info('IE version: {}'.format(ie.get_version()))
-            print_info('Loaded {} plugin version: {}'.format(self.plugin.device, self.plugin.version))
-
-        cpu_extensions = self.config.get('cpu_extensions')
-        if cpu_extensions and 'CPU' in self._devices_list():
-            selection_mode = self.config.get('_cpu_extensions_mode')
-            cpu_extensions = DLSDKLauncher.get_cpu_extension(cpu_extensions, selection_mode)
-            self.plugin.add_cpu_extension(str(cpu_extensions))
-        gpu_extensions = self.config.get('gpu_extensions')
-        if gpu_extensions and 'GPU' in self._devices_list():
-            self.plugin.set_config('CONFIG_FILE', str(gpu_extensions))
-        if self._is_vpu():
-            log_level = self.config.get('_vpu_log_level')
-            if log_level:
-                self.plugin.set_config({'VPU_LOG_LEVEL': log_level})
-
-    @staticmethod
-    def fit_to_input(data, input_layer):
-        shape_len = len(input_layer.shape)
-        if shape_len == 4:
-            if len(np.shape(data)) == 5:
-                data = data[0]
-            return np.transpose(data, [0, 3, 1, 2])
-        if shape_len == 2:
-            if len(np.shape(data)) == 1:
-                return np.transpose([data])
-        return np.array(data)
-
-    def release(self):
-        if 'network' in self.__dict__:
-            del self.network
-        if 'exec_network' in self.__dict__:
-            del self.exec_network
-        if 'plugin' in self.__dict__:
-            del self.plugin
-        if self._set_variable:
-            del os.environ[FPGA_COMPILER_MODE_VAR]
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher_readme.md b/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher_readme.md
deleted file mode 100644 (file)
index 2a060f4..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-# How to configure OpenVINOâ„¢ launcher
-
-For enabling OpenVINOâ„¢ launcher you need to add `framework: dlsdk` in launchers section of your configuration file and provide following parameters:
-
-* `device` - specifies which device will be used for infer. Supported: `CPU`, `GPU`, `FPGA`, `MYRIAD` and Heterogeneous plugin as `HETERO:target_device,fallback_device`.
-* `model` - path to xml file with Caffe model for your topology.
-* `weights` - path to bin file with weights for your topology.
-
-launcher may optionally provide model parameters in source framework format which will be converted to Inference Engine IR using Model Optimizer.
-If you want to use Model Optimizer for model conversion, please view [Model Optimizer Developer Guide][openvino-mo].
-You can provide:
-
-* `caffe_model` and `caffe_weights` for Caffe model and weights (*.prototxt and *.caffemodel).
-* `tf_model` for TensorFlow model (*.pb, *.pb.frozen, *.pbtxt).
-* `tf_meta` for TensorFlow MetaGraph (*.meta).
-* `mxnet_weights` for MXNet params (*.params).
-* `onnx_model` for ONNX model (*.onnx).
-* `kaldi_model` for Kaldi model (*.nnet).
-
-In case when you want to determine additional parameters for model conversion (data_type, input_shape and so on), you can use `mo_params` for arguments with values and `mo_flags` for positional arguments like `legacy_mxnet_model` .
-Full list of supported parameters you can find in Model Optimizer Developer Guide.
-
-Model will be converted before every evaluation. 
-You can provide `converted_model_dir` for saving converted model in specific folder, otherwise, converted models will be saved in path provided via `-C` command line argument or source model directory.
-
-* `adapter` - approach how raw output will be converted to representation of dataset problem, some adapters can be specific to framework. You can find detailed instruction how to use adapters [here][adapters].
-
-Launcher understands which batch size will be used from model intermediate representation (IR). If you want to use batch for infer, please, provide model with required batch or convert it using specific parameter in `mo_params`.
-
-* `allow_reshape_input` - parameter, which allows to reshape input layer to data shape (default value is False).
-
-Additionally you can provide device specific parameters:
-
-* `cpu_extensions` (path to extension *.so file with custom layers for cpu).
-* `gpu_extensions` (path to extension *.xml file with OpenCL kernel description for gpu).
-* `bitstream` for running on FPGA.
-
-OpenVINOâ„¢ launcher config example:
-
-```yml
-launchers:
-  - framework: dlsdk
-    device: HETERO:FPGA,CPU
-    caffe_model: path_to_model/alexnet.prototxt
-    caffe_weights: path_to_weights/alexnet.caffemodel
-    adapter: classification
-    mo_params:
-      batch: 4
-    mo_flags:
-      - reverse_input_channels
-    cpu_extensions: cpu_extentions_avx512.so
-```
-
-[adapters]: ./tools/accuracy_checker/accuracy_checker/adapters/README.md
-[openvino-mo]: https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/dummy_launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/dummy_launcher.py
deleted file mode 100644 (file)
index ce004af..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..utils import get_path
-from ..logging import print_info
-from ..adapters import Adapter
-from ..config import PathField, StringField
-from .loaders import Loader
-from .launcher import Launcher, LauncherConfig
-
-
-class DummyLauncherConfig(LauncherConfig):
-    """
-    Specifies configuration structure for Dummy launcher.
-    """
-
-    loader = StringField(choices=Loader.providers)
-    data_path = PathField()
-    adapter = StringField(choices=Adapter.providers, optional=True)
-
-
-class DummyLauncher(Launcher):
-    """
-    Class for using predictions from another tool.
-    """
-
-    __provider__ = 'dummy'
-
-    def __init__(self, config_entry: dict, *args, **kwargs):
-        super().__init__(config_entry, *args, **kwargs)
-
-        dummy_launcher_config = DummyLauncherConfig('Dummy_Launcher')
-        dummy_launcher_config.validate(self.config)
-
-        self.data_path = get_path(self.config['data_path'])
-
-        self._loader = Loader.provide(self.config['loader'], self.data_path)
-
-        print_info("{} predictions objects loaded from {}".format(len(self._loader), self.data_path))
-
-    def predict(self, identifiers, *args, **kwargs):
-        return [self._loader[identifier] for identifier in identifiers]
-
-    def release(self):
-        pass
-
-    @property
-    def batch(self):
-        return 1
-
-    @property
-    def inputs(self):
-        return None
-
-    def get_all_inputs(self):
-        return self.inputs
-
-    @property
-    def output_blob(self):
-        return self.data_path
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/input_feeder.py b/tools/accuracy_checker/accuracy_checker/launcher/input_feeder.py
deleted file mode 100644 (file)
index 5a5d8fc..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import re
-import numpy as np
-
-from ..config import ConfigError
-from ..utils import extract_image_representations
-
-
-class InputFeeder:
-    def __init__(self, inputs_config, network_inputs, prepare_input_data=None):
-        def fit_to_input(data, input_layer):
-            if len(np.shape(data)) == 4:
-                return np.transpose(data, [0, 3, 1, 2])
-            return np.array(data)
-
-        self.input_transform_func = prepare_input_data or fit_to_input
-        self.network_inputs = network_inputs
-        self.configure(inputs_config)
-
-    def configure(self, inputs_config):
-        self.const_inputs, self.non_constant_inputs, self.inputs_mapping = self._parse_inputs_config(inputs_config)
-        if not self.non_constant_inputs:
-            raise ConfigError('Network should contain at least one layer for setting variable data.')
-
-    def fill_non_constant_inputs(self, data_representation_batch):
-        filled_inputs = {}
-        for input_layer in self.non_constant_inputs:
-            input_regex = None
-            input_batch = []
-            if self.inputs_mapping:
-                input_regex = self.inputs_mapping[input_layer]
-            for data_representation in data_representation_batch:
-                input_data = None
-                identifiers = data_representation.identifier
-                data = data_representation.data
-                if not isinstance(identifiers, list) and not input_regex:
-                    input_data = data
-                    input_batch.append(input_data)
-                    continue
-
-                if not input_regex:
-                    raise ConfigError('Impossible to choose correct data for layer {}.'
-                                      'Please provide regular expression for matching in config.'.format(input_layer))
-                data = [data] if np.isscalar(identifiers) else data
-                identifiers = [identifiers] if np.isscalar(identifiers) else identifiers
-                for identifier, data_value in zip(identifiers, data):
-                    if input_regex.match(identifier):
-                        input_data = data_value
-                        break
-                if input_data is None:
-                    raise ConfigError('Suitable data for filling layer {} not found'.format(input_layer))
-                input_batch.append(input_data)
-
-            filled_inputs[input_layer] = input_batch
-
-        return self._transform_batch(filled_inputs, extract_image_representations(data_representation_batch)[1])
-
-    def fill_inputs(self, data_representation_batch):
-        inputs = self.fill_non_constant_inputs(data_representation_batch)
-        for infer_inputs in inputs:
-            infer_inputs.update(self.const_inputs)
-        return inputs
-
-    def __call__(self, context, *args, **kwargs):
-        data_batch = context.data_batch
-        _, meta = extract_image_representations(data_batch)
-        context.input_blobs = self.fill_inputs(data_batch)
-        context.batch_meta = meta
-
-    def _parse_inputs_config(self, inputs_entry):
-        constant_inputs = {}
-        non_constant_inputs_mapping = {}
-        non_constant_inputs = []
-        for input_ in inputs_entry:
-            name = input_['name']
-            if not name in self.network_inputs:
-                raise ConfigError('network does not contain input "{}"'.format(name))
-            value = input_['value']
-
-            if input_['type'] == 'CONST_INPUT':
-                if isinstance(value, list):
-                    value = np.array(value)
-                constant_inputs[name] = value
-            else:
-                value = re.compile(value)
-                non_constant_inputs_mapping[name] = value
-
-        non_constant_inputs = list(non_constant_inputs_mapping.keys())
-        not_config_inputs = list(filter(
-            lambda input_layer: not input_layer in non_constant_inputs + list(constant_inputs.keys()),
-            self.network_inputs.keys()
-            ))
-        if non_constant_inputs and not_config_inputs:
-            raise ConfigError('input value for {} are not presented in config.'.format(','.join(not_config_inputs)))
-        non_constant_inputs += not_config_inputs
-
-        return constant_inputs, non_constant_inputs, non_constant_inputs_mapping or None
-
-    def _transform_batch(self, batch_data, meta):
-        def calculate_num_splits(layers_data, batch_size):
-            max_split_num = 1
-            for _, data in layers_data.items():
-                total_tiles_num = 0
-                for tiles in data:
-                    total_tiles_num += len(tiles)
-
-                offset = 0 if total_tiles_num % batch_size == 0 else 1
-                splits_for_layer = (total_tiles_num // batch_size) + offset
-                if max_split_num < splits_for_layer:
-                    max_split_num = splits_for_layer
-
-            return max_split_num
-
-        def separate_data(data, num_splits):
-            grouped_data = [[] for _ in range(num_splits)]
-            for data_part in data:
-                for split_id, data_split in enumerate(data_part):
-                    grouped_data[split_id % num_splits].append(data_split)
-            return grouped_data
-
-        batch_size = len(meta)
-        if meta[-1].get('multi_infer', False):
-            num_splits = calculate_num_splits(batch_data, batch_size)
-            infers_data = [{} for _ in range(num_splits)]
-            for layer_name, layer_data in batch_data.items():
-                batch_for_all_infers = separate_data(layer_data, num_splits)
-                for infer_id, on_infer_batch in enumerate(batch_for_all_infers):
-                    infers_data[infer_id][layer_name] = self.input_transform_func(
-                        on_infer_batch, self.network_inputs[layer_name]
-                    )
-            return infers_data
-
-        for layer_name, layer_data in batch_data.items():
-            batch_data[layer_name] = self.input_transform_func(layer_data, self.network_inputs[layer_name])
-
-        return [batch_data]
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/launcher.py
deleted file mode 100644 (file)
index 2ed64db..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-from ..config import BaseField
-from ..adapters import AdapterField
-from ..config import ConfigValidator, StringField, ListField
-from ..dependency import ClassProvider
-
-
-class Launcher(ClassProvider):
-    """
-    Interface for inferring model.
-    """
-
-    __provider_type__ = 'launcher'
-
-    def __init__(self, config_entry, *args, **kwargs):
-        self.config = config_entry
-
-    def predict(self, inputs, metadata, *args, **kwargs):
-        """
-        Args:
-            inputs: dictionary where keys are input layers names and values are data for them.
-            metadata: metadata of input representations
-        Returns:
-            raw data from network.
-        """
-
-        raise NotImplementedError
-
-    def __call__(self, context, *args, **kwargs):
-        context.prediction_batch = self.predict(context.input_blobs, context.batch_meta)
-
-
-    def get_all_inputs(self):
-        raise NotImplementedError
-
-    def release(self):
-        raise NotImplementedError
-
-    @property
-    def batch(self):
-        raise NotImplementedError
-
-    @property
-    def output_blob(self):
-        raise NotImplementedError
-
-    @property
-    def inputs(self):
-        raise NotImplementedError
-
-    def _provide_inputs_info_to_meta(self, meta):
-        meta['input_shape'] = self.inputs
-
-        return meta
-
-    @staticmethod
-    def fit_to_input(data, input_layer):
-        if len(np.shape(data)) == 4:
-            return np.transpose(data, [0, 3, 1, 2])
-        return np.array(data)
-
-INPUTS_TYPES = ('CONST_INPUT', 'INPUT')
-
-class InputValidator(ConfigValidator):
-    name = StringField()
-    type = StringField(choices=INPUTS_TYPES)
-    value = BaseField()
-
-
-class ListInputsField(ListField):
-    def __init__(self, **kwargs):
-        super().__init__(allow_empty=False, value_type=InputValidator('Inputs'), **kwargs)
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        names_set = set()
-        for input_layer in entry:
-            input_name = input_layer['name']
-            if input_name not in names_set:
-                names_set.add(input_name)
-            else:
-                self.raise_error(entry, field_uri, '{} repeated name'.format(input_name))
-
-
-class LauncherConfig(ConfigValidator):
-    """
-    Specifies common part of configuration structure for launchers.
-    """
-
-    framework = StringField(choices=Launcher.providers)
-    tags = ListField(allow_empty=False, optional=True)
-    inputs = ListInputsField(optional=True)
-    adapter = AdapterField(optional=True)
-
-    def validate(self, entry, field_uri=None):
-        super().validate(entry, field_uri)
-        inputs = entry.get('inputs')
-        if inputs:
-            inputs_by_type = {input_type: [] for input_type in INPUTS_TYPES}
-            for input_layer in inputs:
-                input_type = input_layer['type']
-                inputs_by_type[input_type].append(input_layer['name'])
-
-            additional_attributes = {
-                '_list_{}s'.format(input_type.lower()): inputs for input_type, inputs in inputs_by_type.items()
-            }
-            for additional_attribute, values in additional_attributes.items():
-                self.fields[additional_attribute] = values
-
-
-def unsupported_launcher(name, error_message=None):
-    class UnsupportedLauncher(Launcher):
-        __provider__ = name
-
-        def __init__(self, config_entry, *args, **kwargs):
-            super().__init__(config_entry, *args, **kwargs)
-
-            msg = "{launcher} launcher is disabled. Please install {launcher} to enable it.".format(launcher=name)
-            raise ValueError(error_message or msg)
-
-        def predict(self, identifiers, data, *args, **kwargs):
-            raise NotImplementedError
-
-        def release(self):
-            raise NotImplementedError
-
-        @property
-        def batch(self):
-            raise NotImplementedError
-
-    return UnsupportedLauncher
-
-
-def create_launcher(launcher_config):
-    """
-    Args:
-        launcher_config: launcher configuration file entry.
-    Returns:
-        framework-specific launcher object.
-    """
-
-    launcher_config_validator = LauncherConfig(
-        'Launcher_validator',
-        on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT
-    )
-    launcher_config_validator.validate(launcher_config)
-    config_framework = launcher_config['framework']
-
-    return Launcher.provide(config_framework, launcher_config)
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/__init__.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/__init__.py
deleted file mode 100644 (file)
index 98217dd..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .loader import Loader
-
-from .pickle_loader import PickleLoader
-from .xml_loader import XMLLoader
-
-__all__ = [
-    'Loader',
-    'PickleLoader',
-    'XMLLoader',
-]
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/loader.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/loader.py
deleted file mode 100644 (file)
index 7c07394..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from pathlib import Path
-
-from ...dependency import ClassProvider
-
-
-class Loader(ClassProvider):
-    """
-    Interface that describes loading output from another tool.
-    """
-
-    __provider_type__ = 'loader'
-
-    def __init__(self, data_path: Path):
-        self._data_path = data_path
-
-    def __len__(self):
-        raise NotImplementedError
-
-    def __getitem__(self, item):
-        raise NotImplementedError
-
-
-class DictLoaderMixin:
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.data = self.load()
-
-    def __len__(self):
-        return len(self.data)
-
-    def __getitem__(self, item):
-        if item not in self.data:
-            raise IndexError('There is no prediction object for "{}" input data'.format(item))
-
-        return self.data[item]
-
-    def load(self):
-        raise NotImplementedError
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/pickle_loader.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/pickle_loader.py
deleted file mode 100644 (file)
index ba3578b..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ...utils import read_pickle
-from .loader import Loader, DictLoaderMixin
-
-
-class PickleLoader(DictLoaderMixin, Loader):
-    """
-    Class for loading output from another tool in .pickle format.
-    """
-
-    __provider__ = 'pickle'
-
-    def load(self):
-        data = read_pickle(self._data_path)
-
-        if isinstance(data, list) and all(hasattr(entry, 'identifier') for entry in data):
-            return dict(zip([representation.identifier for representation in data], data))
-
-        return data
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/xml_loader.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/xml_loader.py
deleted file mode 100644 (file)
index 13c0de9..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ...utils import read_xml
-from .loader import Loader, DictLoaderMixin
-
-
-class XMLLoader(DictLoaderMixin, Loader):
-    """
-    Class for loading output from another tool in .xml format.
-    """
-
-    __provider__ = 'xml'
-
-    def load(self):
-        return read_xml(self._data_path)
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/model_conversion.py b/tools/accuracy_checker/accuracy_checker/launcher/model_conversion.py
deleted file mode 100644 (file)
index fa22360..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import sys
-import subprocess
-from pathlib import Path
-from typing import Union
-from collections import namedtuple
-from ..utils import get_path, format_key
-
-FrameworkParameters = namedtuple('FrameworkParameters', ['name', 'meta'])
-
-def convert_model(topology_name, model=None, weights=None, meta=None,
-                  framework=FrameworkParameters('caffe', False), mo_search_paths=None, mo_params=None, mo_flags=None,
-                  tf_custom_op_config_dir=None, tf_object_detection_api_config_dir=None):
-    """
-    Args:
-        topology_name: name for converted model files.
-        model: path to the topology file.
-        weights: path to the weights file.
-        meta: path to the meta file
-        framework: framework name for original model.
-        mo_search_paths: paths where ModelOptimizer may be found. If None only default paths is used.
-        mo_params: value parameters for ModelOptimizer execution.
-        mo_flags: flags parameters for ModelOptimizer execution.
-        tf_custom_op_config_dir: path to Tensor Flow custom operations directory.
-        tf_object_detection_api_config_dir: path to Tensor Flow directory with config for object detection API.
-    Returns:
-        paths to converted to IE IR model and weights.
-    """
-
-    mo_params = mo_params or {}
-    mo_flags = mo_flags or []
-
-    set_topology_name(mo_params, topology_name)
-
-    model_optimizer_executable = find_mo(mo_search_paths)
-    if not model_optimizer_executable:
-        raise EnvironmentError(
-            'Model optimizer not found. Please set MO_DIR environment variable to model optimizer folder '
-            'installation or refer to help for command line options for providing Model optimizer'
-        )
-
-    framework_specific_options = {
-        FrameworkParameters('caffe', False): {'input_model': weights, 'input_proto': model},
-        FrameworkParameters('mxnet', False): {'input_model': weights},
-        FrameworkParameters('tf', False): {'input_model': model},
-        FrameworkParameters('tf', True): {'input_meta_graph': meta},
-        FrameworkParameters('onnx', False): {'input_model': model},
-        FrameworkParameters('kaldi', False): {'input_model': model}
-    }
-
-    mo_params['framework'] = framework.name
-    mo_params.update(framework_specific_options.get(framework, {}))
-
-    set_path_to_custom_operation_configs(mo_params, framework, tf_custom_op_config_dir, model_optimizer_executable)
-    set_path_to_object_detection_api_pipeline_config(mo_params, framework, tf_object_detection_api_config_dir)
-    args = prepare_args(str(model_optimizer_executable), flag_options=mo_flags, value_options=mo_params)
-
-    code = exec_mo_binary(args)
-
-    if code.returncode != 0:
-        raise RuntimeError("Model optimizer conversion failed: ModelOptimizer returned non-zero code")
-
-    model_file, bin_file = find_dlsdk_ir(
-        get_path(mo_params.get('output_dir', Path.cwd()), is_directory=True), mo_params['model_name']
-    )
-    if not bin_file or not model_file:
-        raise RuntimeError("Model optimizer finished correctly, but converted model is not found.")
-
-    return model_file, bin_file
-
-
-def find_dlsdk_ir(search_path: Path, model_name):
-    """
-    Args:
-        search_path: path with IE IR of model.
-        model_name: name of the model.
-    Returns:
-        paths to IE IR of model.
-    """
-
-    xml_file = search_path / '{}.xml'.format(model_name)
-    bin_file = search_path / '{}.bin'.format(model_name)
-
-    return get_path(xml_file), get_path(bin_file)
-
-
-def find_mo(search_paths=None) -> Union[Path, None]:
-    """
-    Args:
-        search_paths: paths where ModelOptimizer may be found. If None only default paths is used.
-    Returns:
-        path to the ModelOptimizer or None if it wasn't found.
-    """
-
-    default_mo_path = ('intel', 'openvino', 'deployment_tools', 'model_optimizer')
-    default_paths = [Path.home().joinpath(*default_mo_path), Path('/opt').joinpath(*default_mo_path)]
-
-    executable = 'mo.py'
-    for path in search_paths or default_paths:
-        path = Path(path)
-        if not path.is_dir():
-            continue
-
-        mo = path / executable
-        if not mo.is_file():
-            continue
-
-        return mo
-
-    return None
-
-
-def prepare_args(executable, flag_options=None, value_options=None):
-    """
-    Args:
-        executable: path to the executable.
-        flag_options: positional arguments for executable.
-        value_options: keyword arguments for executable.
-    Returns:
-        list with command-line entries.
-    """
-
-    result = [sys.executable, executable]
-
-    for flag_option in flag_options or []:
-        result.append(str(format_key(flag_option)))
-
-    for key, value in (value_options or {}).items():
-        result.append(str(format_key(key)))
-        result.append(str(value))
-
-    return result
-
-
-def exec_mo_binary(args, timeout=None):
-    """
-    Args:
-        args: command-line entries.
-        timeout: timeout for execution.
-    Returns:
-        result of execution.
-    """
-
-    return subprocess.run(args, check=False, timeout=timeout)
-
-
-def set_path_to_custom_operation_configs(mo_params, framework, tf_custom_op_config_dir, mo_path):
-    if framework.name != 'tf':
-        return mo_params
-
-    config_path = mo_params.get('tensorflow_use_custom_operations_config')
-    if not config_path:
-        return mo_params
-
-    if tf_custom_op_config_dir:
-        tf_custom_op_config_dir = Path(tf_custom_op_config_dir)
-    else:
-        tf_custom_op_config_dir = Path('/').joinpath(*mo_path.parts[:-1]) / 'extensions' / 'front' / 'tf'
-
-    config_path = Path(config_path)
-    if not config_path.is_absolute():
-        config_path = tf_custom_op_config_dir / config_path
-
-    mo_params['tensorflow_use_custom_operations_config'] = str(get_path(config_path))
-
-    return mo_params
-
-
-def set_path_to_object_detection_api_pipeline_config(mo_params, framework, object_detection_api_config_dir=None):
-    object_detection_api_config = mo_params.get('tensorflow_object_detection_api_pipeline_config')
-    if framework.name != 'tf' or not object_detection_api_config:
-        return mo_params
-    model_path = mo_params.get('input_model') or mo_params.get('input_meta_graph')
-
-    object_detection_api_config_dir = Path(object_detection_api_config_dir or get_path(model_path).parent)
-    config_path = object_detection_api_config_dir / object_detection_api_config
-    mo_params['tensorflow_object_detection_api_pipeline_config'] = str(get_path(config_path))
-
-    return mo_params
-
-
-def set_topology_name(mo_params, topology_name):
-    if not mo_params.get('model_name'):
-        mo_params['model_name'] = topology_name
-
-    return mo_params
diff --git a/tools/accuracy_checker/accuracy_checker/logging.py b/tools/accuracy_checker/accuracy_checker/logging.py
deleted file mode 100644 (file)
index 742097c..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import logging
-import logging.config
-import os
-import sys
-import warnings
-
-_DEFAULT_LOGGER_NAME = 'accuracy_checker'
-_DEFAULT_LOG_FILE = 'accuracy_checker.log'
-
-PRINT_INFO = logging.INFO + 5
-logging.addLevelName(PRINT_INFO, "PRINT_INFO")
-
-_LOG_LEVEL_ENVIRON = "ACCURACY_CHECKER_LOG_LEVEL"
-_LOGGING_LEVEL = logging.getLevelName(os.environ.get(_LOG_LEVEL_ENVIRON, PRINT_INFO))
-
-
-class LoggingFormatter(logging.Formatter):
-    def format(self, record: logging.LogRecord):
-        if record.levelno == PRINT_INFO:
-            return record.msg
-        return super().format(record)
-
-
-class ConsoleHandler(logging.StreamHandler):
-    def __init__(self, default_stream=sys.stdout):
-        super().__init__(default_stream)
-        self.default_stream = default_stream
-        self.err_stream = sys.stderr
-
-    def emit(self, record):
-        if record.levelno >= logging.WARNING:
-            self.stream = self.err_stream
-        else:
-            self.stream = self.default_stream
-        super().emit(record)
-
-
-_LOGGING_CONFIGURATION = {
-    'loggers': {
-        _DEFAULT_LOGGER_NAME: {
-            'handlers': ['console'],
-            'level': _LOGGING_LEVEL,
-            'propagate': False
-        }
-    },
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'default': {
-            '()': LoggingFormatter,
-            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s',
-            'datefmt': '%H:%M:%S'
-        },
-        'detailed': {
-            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s'
-        }
-    },
-    'handlers': {
-        'console': {
-            'level': 'DEBUG',
-            '()': ConsoleHandler,
-            'formatter': 'default',
-        }
-    }
-}
-
-logging.config.dictConfig(_LOGGING_CONFIGURATION)
-
-_default_logger = logging.getLogger(_DEFAULT_LOGGER_NAME)
-
-
-def _warning_handler(message, category, filename, line_number, *args, **kwargs):
-    s = warnings.formatwarning(message, category, filename, line_number)
-    _default_logger.warning(s)
-
-
-warnings.showwarning = _warning_handler
-
-
-def get_logger(logger_name: str):
-    if logger_name.startswith(_DEFAULT_LOGGER_NAME):
-        return _default_logger.getChild(logger_name)
-    return logging.getLogger(logger_name)
-
-
-def error(msg, *args, **kwargs):
-    _default_logger.error(msg, *args, **kwargs)
-
-
-def warning(msg, *args, raise_warning=True, **kwargs):
-    if raise_warning:
-        warnings.warn(msg)
-    else:
-        _default_logger.warning(msg, *args, **kwargs)
-
-
-def info(msg, *args, **kwargs):
-    _default_logger.info(msg, *args, **kwargs)
-
-
-def debug(msg, *args, **kwargs):
-    _default_logger.debug(msg, *args, **kwargs)
-
-
-def print_info(msg, *args, **kwargs):
-    _default_logger.log(PRINT_INFO, msg, *args, **kwargs)
-
-
-def add_file_handler(file_name):
-    file_info_handler_config = {
-        'level': 'PRINT_INFO',
-        'class': 'logging.handlers.WatchedFileHandler',
-        'formatter': 'default',
-        'filename': file_name
-    }
-    _LOGGING_CONFIGURATION['handlers']['file_info'] = file_info_handler_config
-    _LOGGING_CONFIGURATION['loggers'][_DEFAULT_LOGGER_NAME]['handlers'].append('file_info')
-    logging.config.dictConfig(_LOGGING_CONFIGURATION)
diff --git a/tools/accuracy_checker/accuracy_checker/main.py b/tools/accuracy_checker/accuracy_checker/main.py
deleted file mode 100644 (file)
index c573c7d..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from pathlib import Path
-from argparse import ArgumentParser
-from functools import partial
-
-from .config import ConfigReader
-from .logging import print_info, add_file_handler
-from .evaluators import ModelEvaluator, PipeLineEvaluator, get_processing_info
-from .progress_reporters import ProgressReporter
-from .utils import get_path
-
-
-def build_arguments_parser():
-    parser = ArgumentParser(description='NN Validation on Caffe and IE', allow_abbrev=False)
-    parser.add_argument(
-        '-d', '--definitions',
-        help='path to the yml file with definitions',
-        type=get_path,
-        required=False
-    )
-    parser.add_argument(
-        '-c', '--config',
-        help='path to the yml file with local configuration',
-        type=get_path,
-        required=True
-    )
-    parser.add_argument(
-        '-m', '--models',
-        help='prefix path to the models and weights',
-        type=partial(get_path, is_directory=True),
-        default=Path.cwd(),
-        required=False
-    )
-    parser.add_argument(
-        '-s', '--source',
-        help='prefix path to the data source',
-        type=partial(get_path, is_directory=True),
-        default=Path.cwd(),
-        required=False
-    )
-    parser.add_argument(
-        '-a', '--annotations',
-        help='prefix path to the converted annotations and datasets meta data',
-        type=partial(get_path, is_directory=True),
-        default=Path.cwd(),
-        required=False
-    )
-    parser.add_argument(
-        '-e', '--extensions',
-        help='prefix path to extensions folder',
-        type=partial(get_path, is_directory=True),
-        default=Path.cwd(),
-        required=False
-    )
-    parser.add_argument(
-        '--cpu_extensions_mode',
-        help='specified preferable set of processor instruction for automatic searching cpu extension lib',
-        required=False,
-        choices=['avx512', 'avx2', 'sse4']
-    )
-    parser.add_argument(
-        '-b', '--bitstreams',
-        help='prefix path to bitstreams folder',
-        type=partial(get_path, is_directory=True),
-        default=Path.cwd(),
-        required=False
-    )
-    parser.add_argument(
-        '--stored_predictions',
-        help='path to file with saved predictions. Used for development',
-        # since at the first time file does not exist and then created we can not always check existence
-        required=False
-    )
-    parser.add_argument(
-        '-C', '--converted_models',
-        help='directory to store Model Optimizer converted models. Used for DLSDK launcher only',
-        type=partial(get_path, is_directory=True),
-        default=Path.cwd(),
-        required=False
-    )
-    parser.add_argument(
-        '-M', '--model_optimizer',
-        help='path to model optimizer directory',
-        type=partial(get_path, is_directory=True),
-        # there is no default value because if user did not specify it we use specific locations
-        # defined in model_conversion.py
-        required=False
-    )
-    parser.add_argument(
-        '--tf_custom_op_config_dir',
-        help='path to directory with tensorflow custom operation configuration files for model optimizer',
-        type=partial(get_path, is_directory=True),
-        # there is no default value because if user did not specify it we use specific location
-        # defined in model_conversion.py
-        required=False
-    )
-    parser.add_argument(
-        '--tf_obj_detection_api_pipeline_config_path',
-        help='path to directory with tensorflow object detection api pipeline configuration files for model optimizer',
-        type=partial(get_path, is_directory=True),
-        # there is no default value because if user did not specify it we use specific location
-        # defined in model_conversion.py
-        required=False
-    )
-    parser.add_argument(
-        '--progress',
-        help='progress reporter',
-        required=False,
-        default='bar'
-    )
-    parser.add_argument(
-        '-tf', '--target_framework',
-        help='framework for infer',
-        required=False
-    )
-    parser.add_argument(
-        '-td', '--target_devices',
-        help='Space separated list of devices for infer',
-        required=False,
-        nargs='+'
-    )
-
-    parser.add_argument(
-        '-tt', '--target_tags',
-        help='Space separated list of launcher tags for infer',
-        required=False,
-        nargs='+'
-    )
-
-    parser.add_argument(
-        '-l', '--log_file',
-        help='file for additional logging results',
-        required=False
-    )
-
-    parser.add_argument(
-        '--ignore_result_formatting',
-        help='allow to get raw metrics results without data formatting',
-        required=False,
-        default=False
-    )
-
-    parser.add_argument(
-        '-am', '--affinity_map',
-        help='prefix path to the affinity maps',
-        type=partial(get_path, is_directory=True),
-        default=Path.cwd(),
-        required=False
-    )
-
-    parser.add_argument(
-        '--aocl',
-        help='path to aocl executable for FPGA bitstream programming',
-        type=get_path,
-        required=False
-    )
-    parser.add_argument(
-        '--vpu_log_level',
-        help='log level for VPU devices',
-        required=False,
-        choices=['LOG_NONE', 'LOG_WARNING', 'LOG_INFO', 'LOG_DEBUG'],
-        default='LOG_WARNING'
-    )
-
-    return parser
-
-
-def main():
-    args = build_arguments_parser().parse_args()
-    progress_reporter = ProgressReporter.provide((
-        args.progress if ':' not in args.progress
-        else args.progress.split(':')[0]
-    ))
-    if args.log_file:
-        add_file_handler(args.log_file)
-
-    config, mode = ConfigReader.merge(args)
-    if mode == 'models':
-        model_evaluation_mode(config, progress_reporter, args)
-    else:
-        pipeline_evaluation_mode(config, progress_reporter, args)
-
-
-def model_evaluation_mode(config, progress_reporter, args):
-    for model in config['models']:
-        for launcher_config in model['launchers']:
-            for dataset_config in model['datasets']:
-                print_processing_info(
-                    model['name'],
-                    launcher_config['framework'],
-                    launcher_config['device'],
-                    launcher_config.get('tags'),
-                    dataset_config['name']
-                )
-                model_evaluator = ModelEvaluator.from_configs(launcher_config, dataset_config)
-                progress_reporter.reset(model_evaluator.dataset.size)
-                model_evaluator.process_dataset(args.stored_predictions, progress_reporter=progress_reporter)
-                model_evaluator.compute_metrics(ignore_results_formatting=args.ignore_result_formatting)
-
-                model_evaluator.release()
-
-
-def pipeline_evaluation_mode(config, progress_reporter, args):
-    for pipeline_config in config['pipelines']:
-        print_processing_info(*get_processing_info(pipeline_config))
-        evaluator = PipeLineEvaluator.from_configs(pipeline_config['stages'])
-        evaluator.process_dataset(args.stored_predictions, progress_reporter=progress_reporter)
-        evaluator.compute_metrics(ignore_results_formatting=args.ignore_result_formatting)
-
-        evaluator.release()
-
-
-def print_processing_info(model, launcher, device, tags, dataset):
-    print_info('Processing info:')
-    print_info('model: {}'.format(model))
-    print_info('launcher: {}'.format(launcher))
-    if tags:
-        print_info('launcher tags: {}'.format(' '.join(tags)))
-    print_info('device: {}'.format(device))
-    print_info('dataset: {}'.format(dataset))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/README.md b/tools/accuracy_checker/accuracy_checker/metrics/README.md
deleted file mode 100644 (file)
index c1381b2..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-# Metrics
-
-For correct work metrics require specific representation format. 
-(e. g. map expects detection annotation and detection prediction for evaluation). 
-
-In case when you use complicated representation located in representation container, you need to add options `annotation_source` and `prediction_source` in configuration file to
-select specific representation, another way metric calculation possible only if container has only one suitable representation and will be resolved automatically.
-`annotation_source` and `prediction_source` should contain only one annotation identifier and output layer name respectively.
-You may optionally provide `reference` field for metric, if you want calculated metric tested against specific value (i.e. reported in canonical paper) and acceptable `threshold` for metric deviation from reference value.
-
-Every metric has parameters available for configuration. 
-
-Accuracy Checker supports following set of metrics:
-
-* `accuracy` - classification accuracy metric, defined as the number of correct predictions divided by the total number of predictions.
-Supported representation: `ClassificationAnnotation`, `ClassificationPrediction`
-  * `top_k` - the number of classes with the highest probability, which will be used to decide if prediction is correct.
-* `accuracy_per_class` - classification accuracy metric which represents results for each class. Supported representation: `ClassificationAnnotation`, `ClassificationPrediction`.
-  * `top_k` - the number of classes with the highest probability, which will be used to decide if prediction is correct.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-* `character_recognition_accuracy` - accuracy metric for character recognition task. Supported representation: `CharacterRecognitionAnnotation`, `CharacterRecognitionPrediction`.
-* `map` - mean average precision. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
-  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
-  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
-  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
-  * `ignore_difficult` - allows to ignore difficult annotation boxes in metric calculation. In this case, difficult boxes are filtered annotations from postprocessing stage.
-  * `distinct_conf` - select only values for distinct confidences.
-  * `allow_multiple_matches_per_ignored` - allows multiple matches per ignored.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-  * `integral` - integral type for average precision calculation. Pascal VOC `11point` and `max` approaches are available.
-* `miss_rate` - miss rate metric of detection models.  Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
-  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
-  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
-  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
-  * `ignore_difficult` - allows to ignore difficult annotation boxes in metric calculation. In this case, difficult boxes are filtered annotations from postprocessing stage.
-  * `distinct_conf` - select only values for distinct confidences.
-  * `allow_multiple_matches_per_ignored` - allows multiple matches per ignored.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-  * `fppi_level` - false positive per image level.
-* `recall` - recall metric of detection models. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
-  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
-  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
-  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
-  * `ignore_difficult` - allows to ignore difficult annotation boxes in metric calculation. In this case, difficult boxes are filtered annotations from postprocessing stage.
-  * `distinct_conf` - select only values for distinct confidences.
-  * `allow_multiple_matches_per_ignored` - allows multiple matches per ignored.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-* `detection_accuracy` - accuracy for detection models. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
-  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
-  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
-  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-  * `use_normalization` - allows to normalize confusion_matrix for metric calculation.
-* `segmentation_accuracy` - pixel accuracy for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
-  * `use_argmax` - allows to use argmax for prediction mask.
-* `mean_iou` - mean intersection over union for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
-  * `use_argmax` - allows to use argmax for prediction mask.
-* `mean_accuracy` - mean accuracy for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
-  * `use_argmax` - allows to use argmax for prediction mask.
-* `frequency_weighted_accuracy` - frequency weighted accuracy for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
-  * `use_argmax` - allows to use argmax for prediction mask.
-More detailed information about calculation segmentation metrics you can find [here][segmentation_article].
-* `cmc` - Cumulative Matching Characteristics (CMC) score. Supported representations: `ReIdentificationAnnotation`, `ReIdentificationPrediction`.
-  * `top_k` -  number of k highest ranked samples to consider when matching.
-  * `separate_camera_set` - should identities from the same camera view be filtered out.
-  * `single_gallery_shot` -  each identity has only one instance in the gallery.
-  * `number_single_shot_repeats` - number of repeats for single_gallery_shot setting (required for CUHK).
-  * `first_match_break` - break on first matched gallery sample.
-* `reid_map` - Mean Average Precision score for object reidentification. Supported representations: `ReIdentificationAnnotation`, `ReIdentificationPrediction`.
-  * `uninterpolated_auc` - should area under precision recall curve be computed using trapezoidal rule or directly.
-*  `pairwise_accuracy` - pairwise accuracy for object reidentification. Supported representations: `ReIdentificationClassificationAnnotation`, `ReIdentificationPrediction`.
-  * `min_score` - min score for determining that objects are different. You can provide value or use `train_median` value which will be calculated if annotations has training subset.
-* `pairwise_accuracy_subsets` - object reidentification pairwise accuracy with division dataset on test and train subsets for calculation mean score. Supported representations: `ReIdentificationClassificationAnnotation`, `ReIdentificationPrediction`.
-  * `subset_number` - number of subsets for separating. 
-* `mae` - [Mean Absolute Error][mae]. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
-* `mae_on_intervals` - Mean Absolute Error estimated magnitude for specific value range. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
-  * `intervals` - comma-separated list of interval boundaries.
-  * `ignore_values_not_in_interval` - allows create additional intervals for values less than minimal value in interval and greater than maximal.
-  * `start` , `step`, `end` - way to generate range of intervals from `start` to `end` with length `step`.
-* `mse` - [Mean Squared Error][mse]. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
-* `mse_on_intervals` - Mean Squared Error estimated magnitude for specific value range. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
-  * `intervals` - comma-separated list of interval boundaries.
-  * `ignore_values_not_in_interval` - allows create additional intervals for values less than minimal value in interval and greater than maximal.
-  * `start`, `step`, `end` - generate range of intervals from `start` to `end` with length `step`.
-* `rmse` - [Root Mean Squared Error][rmse]. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
-* `rmse_on_intervals` - Root Mean Squared Error estimated magnitude for specific value range. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
-  * `intervals` - comma-separated list of interval boundaries.
-  * `ignore_values_not_in_interval` - allows create additional intervals for values less than minimal value in interval and greater than maximal.
-  * `start`, `step`, `end` - generate range of intervals from `start` to `end` with length `step`.
-* `per_point_normed_error` - Normed Error for measurement the quality of landmarks' positions. Estimated results for each point independently. Supported representations: `FacialLandmarksAnnotation`, `FacialLandmarksPrediction`.
-* `normed_error` - Normed Error for measurement the quality of landmarks' positions. Supported representations: `FacialLandmarksAnnotation`, `FacialLandmarksPrediction`.
-  * `calculate_std` - allows calculation of standard deviation (default value: `False`)
-  * `percentile` - calculate error rate for given percentile.
-* `per_point_regression` - Root Mean Squared Error for 2D points estimated results for each point independently. Supported representations: `PointRegressionAnnotation`, `PointRegressionPrediction`.
-  * `scaling_distance` - comma-separated list of 2 point indexes, distance between which will be used for scaling regression distances.
-* `average point error` - Root Mean Squared Error for 2D points estimated average results for all points. Supported representations: `PointRegressionAnnotation`, `PointRegressionPrediction`.
-  * `scaling_distance` - comma-separated list of 2 point indexes, distance between which will be used for scaling regression distances.
-* `multi_accuracy` - accuracy for multilabel recognition task. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-  * `calculate_average` - allows calculation of average accuracy (default value: `True`).
-* `multi_precision` - precision metric for multilabel recognition. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-  * `calculate_average` - allows calculation of average precision (default value: `True`).
-* `multi_recall` - recall metric for multilabel recognition. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-  * `calculate_average` - allows calculation of average recall (default value: `True`).
-* `f1_score` - [F score][f_score] metric for multilabel recognition. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
-  * `label_map` - the field in annotation metadata, which contains dataset label map.
-  * `calculate_average` - allows calculation of average f-score (default value: `True`).
-* `text_detection` - Harmonic mean of precision and recall for text detection task. Supported representations: `TextDetectionAnnotation`, `TextDetectionPrediction`.
-  * `iou_constrain` - minimal value for intersection over union that allows to make decision that prediction polygon is true positive.
-  * `ignore_difficult` - allows to ignore difficult ground truth text polygons in metric calculation.
-  * `area_precision_constrain` - minimal value for intersection over union that allows to make decision that prediction polygon matched with ignored annotation.
-* `coco_precision` - MS COCO Average Precision metric for keypoints recognition and object detection tasks. Supported representations: `PoseEstimationAnnotation`, `PoseEstimationPrediction`, `DetectionAnnotation`, `DetectionPrediction`.
-  * `max_detections` - max number of predicted results per image. If you have more predictions,the results with minimal confidence will be ignored.
-  * `threshold` - intersection over union threshold. You can specify one value or comma separated range of values. This parameter supports precomputed values for standard COCO thresholds (`.5`, `.75`, `.5:.05:.95`).
-* `coco_recall` - MS COCO Average Recall metric for keypoints recognition and object detection tasks. Supported representations: `PoseEstimationAnnotation`, `PoseEstimationPrediction`, `DetectionAnnotation`, `DetectionPrediction`.
-  * `max_detections` - max number of predicted results per image. If you have more predictions,the results with minimal confidence will be ignored.
-  * `threshold` - intersection over union threshold. You can specify one value or comma separated range of values. This parameter supports precomputed values for standard COCO thresholds (`.5`, `.75`, `.5:.05:.95`).
-* `angle_error` - Mean angle error and Standard deviation of angle error for gaze estimation. Supported representations: `GazeVectorAnnotation`, `GazeVectorPrediction`.
-  
-[segmentation_article]: https://arxiv.org/pdf/1411.4038v2.pdf
-[mae]: https://en.wikipedia.org/wiki/Mean_absolute_error
-[mse]: https://en.wikipedia.org/wiki/Mean_squared_error
-[rmse]: https://en.wikipedia.org/wiki/Root-mean-square_deviation
-[f_score]: https://en.wikipedia.org/wiki/F1_score
-[psnr]: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/__init__.py b/tools/accuracy_checker/accuracy_checker/metrics/__init__.py
deleted file mode 100644 (file)
index f5bc379..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .metric_executor import MetricsExecutor
-
-from .classification import ClassificationAccuracy, ClassificationAccuracyClasses, ClipAccuracy
-from .detection import (DetectionMAP, MissRate, Recall, DetectionAccuracyMetric)
-from .reid import CMCScore, ReidMAP, PairwiseAccuracy, PairwiseAccuracySubsets
-from .semantic_segmentation import SegmentationAccuracy, SegmentationIOU, SegmentationMeanAccuracy, SegmentationFWAcc
-from .character_recognition import CharacterRecognitionAccuracy
-from .regression import (
-    MeanAbsoluteErrorOnInterval,
-    MeanSquaredErrorOnInterval,
-
-    MeanAbsoluteError,
-    MeanSquaredError,
-
-    RootMeanSquaredErrorOnInterval,
-    RootMeanSquaredError,
-
-    FacialLandmarksPerPointNormedError,
-    FacialLandmarksNormedError,
-
-    PeakSignalToNoiseRatio,
-
-    AngleError
-)
-from .multilabel_recognition import MultiLabelRecall, MultiLabelPrecision, MultiLabelAccuracy, F1Score
-from .text_detection import TextDetectionMetric
-from .coco_metrics import MSCOCOAveragePresicion
-from .hit_ratio import HitRatioMetric, NDSGMetric
-
-
-__all__ = [
-    'MetricsExecutor',
-
-    'ClassificationAccuracy',
-    'ClassificationAccuracyClasses',
-    'ClipAccuracy',
-
-    'DetectionMAP',
-    'MissRate',
-    'Recall',
-    'DetectionAccuracyMetric',
-
-    'CMCScore',
-    'ReidMAP',
-    'PairwiseAccuracy',
-    'PairwiseAccuracySubsets',
-
-    'SegmentationAccuracy',
-    'SegmentationIOU',
-    'SegmentationMeanAccuracy',
-    'SegmentationFWAcc',
-
-    'CharacterRecognitionAccuracy',
-
-    'MeanAbsoluteError',
-    'MeanSquaredError',
-    'MeanAbsoluteErrorOnInterval',
-    'MeanSquaredErrorOnInterval',
-    'RootMeanSquaredError',
-    'RootMeanSquaredErrorOnInterval',
-    'FacialLandmarksPerPointNormedError',
-    'FacialLandmarksNormedError',
-    'PeakSignalToNoiseRatio',
-    'AngleError',
-
-    'MultiLabelAccuracy',
-    'MultiLabelRecall',
-    'MultiLabelPrecision',
-    'F1Score',
-
-    'TextDetectionMetric',
-
-    'MSCOCOAveragePresicion',
-
-    'HitRatioMetric',
-    'NDSGMetric'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/average_meter.py b/tools/accuracy_checker/accuracy_checker/metrics/average_meter.py
deleted file mode 100644 (file)
index 3c2e37a..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-
-class AverageMeter:
-    def __init__(self, loss=None, counter=None):
-        self.loss = loss or (lambda x, y: int(x == y))
-        self.counter = counter or (lambda x: 1)
-        self.accumulator = None
-        self.total_count = None
-
-    def update(self, annotation_val, prediction_val):
-        loss = self.loss(annotation_val, prediction_val)
-        increment = self.counter(annotation_val)
-
-        if self.accumulator is None and self.total_count is None:
-            # wrap in array for using numpy.divide with where attribute
-            # and support cases when loss function returns list-like object
-            self.accumulator = np.array(loss, dtype=float)
-            self.total_count = np.array(increment, dtype=float)
-        else:
-            self.accumulator += loss
-            self.total_count += increment
-
-    def evaluate(self):
-        if self.total_count is None:
-            return 0.0
-
-        return np.divide(
-            self.accumulator, self.total_count, out=np.zeros_like(self.accumulator), where=self.total_count != 0
-        )
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/character_recognition.py b/tools/accuracy_checker/accuracy_checker/metrics/character_recognition.py
deleted file mode 100644 (file)
index 1b7530a..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..representation import CharacterRecognitionAnnotation, CharacterRecognitionPrediction
-from .metric import PerImageEvaluationMetric
-from .average_meter import AverageMeter
-
-
-class CharacterRecognitionAccuracy(PerImageEvaluationMetric):
-    __provider__ = 'character_recognition_accuracy'
-
-    annotation_types = (CharacterRecognitionAnnotation, )
-    prediction_types = (CharacterRecognitionPrediction, )
-
-    def configure(self):
-        self.accuracy = AverageMeter(lambda annotation, prediction: int(annotation == prediction))
-
-    def update(self, annotation, prediction):
-        self.accuracy.update(annotation.label, prediction.label)
-
-    def evaluate(self, annotations, predictions):
-        return self.accuracy.evaluate()
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/classification.py b/tools/accuracy_checker/accuracy_checker/metrics/classification.py
deleted file mode 100644 (file)
index 1b8e953..0000000
+++ /dev/null
@@ -1,136 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..representation import ClassificationAnnotation, ClassificationPrediction
-from ..config import NumberField, StringField
-from .metric import BaseMetricConfig, PerImageEvaluationMetric
-from .average_meter import AverageMeter
-
-
-class AccuracyConfig(BaseMetricConfig):
-    top_k = NumberField(floats=False, min_value=1, optional=True)
-
-
-class PerClassAccuracyConfig(AccuracyConfig):
-    abel_map = StringField(optional=True)
-
-
-class ClassificationAccuracy(PerImageEvaluationMetric):
-    """
-    Class for evaluating accuracy metric of classification models.
-    """
-
-    __provider__ = 'accuracy'
-
-    annotation_types = (ClassificationAnnotation, )
-    prediction_types = (ClassificationPrediction, )
-    _config_validator_type = AccuracyConfig
-
-    def configure(self):
-        self.top_k = self.config.get('top_k', 1)
-
-        def loss(annotation_label, prediction_top_k_labels):
-            return int(annotation_label in prediction_top_k_labels)
-
-        self.accuracy = AverageMeter(loss)
-
-    def update(self, annotation, prediction):
-        self.accuracy.update(annotation.label, prediction.top_k(self.top_k))
-
-    def evaluate(self, annotations, predictions):
-        return self.accuracy.evaluate()
-
-
-class ClassificationAccuracyClasses(PerImageEvaluationMetric):
-    """
-    Class for evaluating accuracy for each class of classification models.
-    """
-
-    __provider__ = 'accuracy_per_class'
-
-    annotation_types = (ClassificationAnnotation, )
-    prediction_types = (ClassificationPrediction, )
-
-    _config_validator_type = PerClassAccuracyConfig
-
-    def configure(self):
-        self.top_k = self.config.get('top_k', 1)
-        label_map = self.config.get('label_map', 'label_map')
-        self.labels = self.dataset.metadata.get(label_map)
-        self.meta['names'] = list(self.labels.values())
-
-        def loss(annotation_label, prediction_top_k_labels):
-            result = np.zeros_like(list(self.labels.keys()))
-            if annotation_label in prediction_top_k_labels:
-                result[annotation_label] = 1
-
-            return result
-
-        def counter(annotation_label):
-            result = np.zeros_like(list(self.labels.keys()))
-            result[annotation_label] = 1
-            return result
-
-        self.accuracy = AverageMeter(loss, counter)
-
-    def update(self, annotation, prediction):
-        self.accuracy.update(annotation.label, prediction.top_k(self.top_k))
-
-    def evaluate(self, annotations, predictions):
-        return self.accuracy.evaluate()
-
-
-class AverageProbMeter(AverageMeter):
-    def __init__(self):
-        def loss(annotation_label, prediction_scores):
-            return prediction_scores
-        super().__init__(loss=loss)
-
-
-class ClipAccuracy(PerImageEvaluationMetric):
-    __provider__ = 'clip_accuracy'
-
-    annotation_types = (ClassificationAnnotation, )
-    prediction_types = (ClassificationPrediction, )
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.clip_accuracy = AverageMeter()
-        self.video_accuracy = AverageMeter()
-        self.video_avg_prob = AverageProbMeter()
-        self.previous_video_id = None
-        self.previous_video_label = None
-
-    def update(self, annotation, prediction):
-        video_id = annotation.identifier.video
-
-        if self.previous_video_id is not None and video_id != self.previous_video_id:
-            video_top_label = np.argmax(self.video_avg_prob.evaluate())
-            self.video_accuracy.update(video_top_label, self.previous_video_label)
-            self.video_avg_prob = AverageProbMeter()
-
-        self.video_avg_prob.update(annotation.label, prediction.scores)
-
-        self.clip_accuracy.update(annotation.label, prediction.label)
-
-        self.previous_video_id = video_id
-        self.previous_video_label = annotation.label
-
-    def evaluate(self, annotations, predictions):
-        self.meta['names'] = ['clip_accuracy', 'video_accuracy']
-        return [self.clip_accuracy.evaluate(), self.video_accuracy.evaluate()]
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/coco_metrics.py b/tools/accuracy_checker/accuracy_checker/metrics/coco_metrics.py
deleted file mode 100644 (file)
index e9c1ea2..0000000
+++ /dev/null
@@ -1,317 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from functools import singledispatch
-from typing import Union
-import numpy as np
-from ..config import NumberField, BaseField
-from ..representation import (
-    DetectionPrediction,
-    DetectionAnnotation,
-    PoseEstimationPrediction,
-    PoseEstimationAnnotation
-)
-from ..utils import get_or_parse_value
-from .overlap import Overlap
-from .metric import BaseMetricConfig, PerImageEvaluationMetric
-
-COCO_THRESHOLDS = {
-    '.50': [0.5],
-    '.75': [0.75],
-    '.50:.05:.95': np.linspace(.5, 0.95, np.round((0.95 - .5) / .05).astype(int) + 1, endpoint=True)
-}
-
-
-class MSCOCOMetricConfig(BaseMetricConfig):
-    max_detections = NumberField(optional=True)
-    threshold = BaseField(optional=True)
-
-
-class MSCOCOBaseMetric(PerImageEvaluationMetric):
-    annotation_types = (PoseEstimationAnnotation, DetectionAnnotation)
-    prediction_types = (PoseEstimationPrediction, DetectionPrediction)
-    _config_validator_type = MSCOCOMetricConfig
-
-    def configure(self):
-        self.max_detections = self.config.get('max_detections', 20)
-        self.thresholds = get_or_parse_value(self.config.get('threshold', '.50:.05:.95'), COCO_THRESHOLDS)
-        label_map = self.dataset.metadata.get('label_map', [])
-        self.labels = [
-            label for label in label_map
-            if label != self.dataset.metadata.get('background_label')
-        ]
-        self.meta['names'] = [label_map[label] for label in self.labels]
-        self.matching_results = [[] for _ in self.labels]
-
-    def update(self, annotation, prediction):
-        compute_iou, create_boxes = select_specific_parameters(annotation)
-
-        for label_id, label in enumerate(self.labels):
-            detections, scores, dt_difficult = prepare_predictions(prediction, label, self.max_detections)
-            ground_truth, gt_difficult, iscrowd, boxes, areas = prepare_annotations(annotation, label, create_boxes)
-            iou = compute_iou(ground_truth, detections, boxes, areas)
-            self.matching_results[label_id].append(
-                evaluate_image(
-                    ground_truth,
-                    gt_difficult,
-                    iscrowd,
-                    detections,
-                    dt_difficult,
-                    scores,
-                    iou,
-                    self.thresholds
-                    ))
-
-    def evaluate(self, annotations, predictions):
-        pass
-
-
-class MSCOCOAveragePresicion(MSCOCOBaseMetric):
-    __provider__ = 'coco_precision'
-
-    def evaluate(self, annotations, predictions):
-        precision = [
-            compute_precision_recall(self.thresholds, self.matching_results[i])[0]
-            for i, _ in enumerate(self.labels)
-        ]
-
-        return precision
-
-
-class MSCOCORecall(MSCOCOBaseMetric):
-    __provider__ = 'coco_recall'
-
-    def evaluate(self, annotations, predictions):
-        recalls = [
-            compute_precision_recall(self.thresholds, self.matching_results[i])[1]
-            for i, _ in enumerate(self.labels)
-        ]
-
-        return recalls
-@singledispatch
-def select_specific_parameters(annotation):
-    return compute_iou_boxes, False
-
-@select_specific_parameters.register(PoseEstimationAnnotation)
-def pose_estimation_params(annotation):
-    return compute_oks, True
-
-@singledispatch
-def prepare(entry, order):
-    return np.c_[entry.x_mins[order], entry.y_mins[order], entry.x_maxs[order], entry.y_maxs[order]]
-
-
-@prepare.register(Union[PoseEstimationPrediction, PoseEstimationAnnotation])
-def prepare_keypoints(entry, order):
-    if entry.size == 0:
-        return []
-
-    if np.size(entry.x_values[order]) == 0:
-        return []
-
-    return np.concatenate((entry.x_values[order], entry.y_values[order], entry.visibility[order]), axis=-1)
-
-
-def prepare_predictions(prediction, label, max_detections):
-    if prediction.size == 0:
-        return [], [], []
-    prediction_ids = prediction.labels == label
-    scores = prediction.scores[prediction_ids]
-    if np.size(scores) == 0:
-        return [], [], []
-    scores_ids = np.argsort(- scores, kind='mergesort')
-    difficult_box_mask = np.full(prediction.size, False)
-    difficult_box_mask[prediction.metadata.get('difficult_boxes', [])] = True
-    difficult_for_label = difficult_box_mask[prediction_ids]
-    if len(scores_ids) > max_detections:
-        scores_ids = scores_ids[:max_detections]
-    detections = prepare(prediction, prediction_ids)
-    detections = detections[scores_ids]
-
-    return detections, scores[scores_ids], difficult_for_label[scores_ids]
-
-
-def prepare_annotations(annotation, label, create_boxes=False):
-    annotation_ids = annotation.labels == label
-    difficult_box_mask = np.full(annotation.size, False)
-    difficult_box_indices = annotation.metadata.get("difficult_boxes", [])
-    iscrowd = np.array(annotation.metadata.get('iscrowd', [0]*annotation.size))
-    difficult_box_mask[difficult_box_indices] = True
-    difficult_box_mask[iscrowd > 0] = True
-    difficult_label = difficult_box_mask[annotation_ids]
-    not_difficult_box_indices = np.argwhere(~difficult_label).reshape(-1)
-    difficult_box_indices = np.argwhere(difficult_label).reshape(-1)
-    iscrowd_label = iscrowd[annotation_ids]
-    order = np.hstack((not_difficult_box_indices, difficult_box_indices)).astype(int)
-    boxes = None
-    areas = None
-    if create_boxes:
-        boxes = np.array(annotation.bboxes)
-        boxes = boxes[annotation_ids]
-        areas = np.array(annotation.areas)
-        areas = areas[annotation_ids] if np.size(areas) > 0 else np.array([])
-        boxes = boxes[order]
-        areas = areas[order]
-
-    return prepare(annotation, annotation_ids)[order], difficult_label[order], iscrowd_label[order], boxes, areas
-
-
-def compute_precision_recall(thresholds, matching_results):
-    num_thresholds = len(thresholds)
-    rectangle_thresholds = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
-    num_rec_thresholds = len(rectangle_thresholds)
-    precision = -np.ones((num_thresholds, num_rec_thresholds))  # -1 for the precision of absent categories
-    recall = -np.ones(num_thresholds)
-    dt_scores = np.concatenate([e['scores'] for e in matching_results])
-    inds = np.argsort(-dt_scores, kind='mergesort')
-    dtm = np.concatenate([e['dt_matches'] for e in matching_results], axis=1)[:, inds]
-    dt_ignored = np.concatenate([e['dt_ignore'] for e in matching_results], axis=1)[:, inds]
-    gt_ignored = np.concatenate([e['gt_ignore'] for e in matching_results])
-    npig = np.count_nonzero(gt_ignored == 0)
-    tps = np.logical_and(dtm, np.logical_not(dt_ignored))
-    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dt_ignored))
-    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
-    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
-    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-        tp = np.array(tp)
-        fp = np.array(fp)
-        num_detections = len(tp)
-        rc = tp / npig
-        pr = tp / (fp + tp + np.spacing(1))
-        q = np.zeros(num_rec_thresholds)
-
-        if num_detections:
-            recall[t] = rc[-1]
-        else:
-            recall[t] = 0
-
-        # numpy is slow without cython optimization for accessing elements
-        #  use python array gets significant speed improvement
-        pr = pr.tolist()
-        q = q.tolist()
-
-        for i in range(num_detections - 1, 0, -1):
-            if pr[i] > pr[i - 1]:
-                pr[i - 1] = pr[i]
-
-        inds = np.searchsorted(rc, rectangle_thresholds, side='left')
-        try:
-            for ri, pi in enumerate(inds):
-                q[ri] = pr[pi]
-        except IndexError:
-            pass
-        precision[t] = np.array(q)
-
-    mean_precision = 0 if np.size(precision[precision > -1]) == 0 else np.mean(precision[precision > -1])
-    mean_recall = 0 if np.size(recall[recall > -1]) == 0 else np.mean(recall[recall > -1])
-
-    return mean_precision, mean_recall
-
-
-def compute_iou_boxes(annotation, prediction, *args, **kwargs):
-    if np.size(annotation) == 0 or np.size(prediction) == 0:
-        return []
-    overlap = Overlap.provide('iou')
-    iou = np.zeros((prediction.size // 4, annotation.size // 4), dtype=np.float32)
-    for i, box_a in enumerate(annotation):
-        for j, box_b in enumerate(prediction):
-            iou[j, i] = overlap(box_a, box_b)
-
-    return iou
-
-
-def compute_oks(annotation_points, prediction_points, annotation_boxes, annotation_areas):
-    if np.size(prediction_points) == 0 or np.size(annotation_points) == 0:
-        return []
-    oks = np.zeros((len(prediction_points), len(annotation_points)))
-    sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89])/10.0
-    variance = (sigmas * 2)**2
-    # compute oks between each detection and ground truth object
-    for gt_idx, gt_points in enumerate(annotation_points):
-        # create bounds for ignore regions(double the gt bbox)
-        xgt = gt_points[:17]
-        ygt = gt_points[17:34]
-        vgt = gt_points[34:]
-        k1 = np.count_nonzero(vgt > 0)
-        x0_bbox, y0_bbox, x1_bbox, y1_bbox = annotation_boxes[gt_idx]
-        area_gt = annotation_areas[gt_idx]
-        w_bbox = x1_bbox - x0_bbox
-        h_bbox = y1_bbox - y0_bbox
-        x0 = x0_bbox - w_bbox
-        x1 = x0_bbox + w_bbox * 2
-        y0 = y0_bbox - h_bbox
-        y1 = y0_bbox + h_bbox * 2
-        for dt_idx, dt_points in enumerate(prediction_points):
-            xdt = dt_points[:17]
-            ydt = dt_points[17:34]
-            if k1 > 0:
-                # measure the per-keypoint distance if keypoints visible
-                x_diff = xdt - xgt
-                y_diff = ydt - ygt
-            else:
-                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
-                zeros = np.zeros(len(sigmas))
-                x_diff = np.max((zeros, x0 - xdt), axis=0) + np.max((zeros, xdt - x1), axis=0)
-                y_diff = np.max((zeros, y0 - ydt), axis=0) + np.max((zeros, ydt - y1), axis=0)
-            evaluation = (x_diff ** 2 + y_diff ** 2) / variance / (area_gt + np.spacing(1)) / 2
-            if k1 > 0:
-                evaluation = evaluation[vgt > 0]
-            oks[dt_idx, gt_idx] = np.sum(np.exp(- evaluation)) / evaluation.shape[0]
-
-    return oks
-
-
-def evaluate_image(ground_truth, gt_difficult, iscrowd, detections, dt_difficult, scores, iou, thresholds):
-    thresholds_num = len(thresholds)
-    gt_num = len(ground_truth)
-    dt_num = len(detections)
-    gt_matched = np.zeros((thresholds_num, gt_num))
-    dt_matched = np.zeros((thresholds_num, dt_num))
-    gt_ignored = gt_difficult
-    dt_ignored = np.zeros((thresholds_num, dt_num))
-    if np.size(iou):
-        for tind, t in enumerate(thresholds):
-            for dtind, _ in enumerate(detections):
-                # information about best match so far (matched_id = -1 -> unmatched)
-                iou_current = min([t, 1-1e-10])
-                matched_id = -1
-                for gtind, _ in enumerate(ground_truth):
-                    # if this gt already matched, and not a crowd, continue
-                    if gt_matched[tind, gtind] > 0 and not iscrowd[gtind]:
-                        continue
-                    # if dt matched to reg gt, and on ignore gt, stop
-                    if matched_id > -1 and not gt_ignored[matched_id] and gt_ignored[gtind]:
-                        break
-                    # continue to next gt unless better match made
-                    if iou[dtind, gtind] < iou_current:
-                        continue
-                    # if match successful and best so far, store appropriately
-                    iou_current = iou[dtind, gtind]
-                    matched_id = gtind
-                # if match made store id of match for both dt and gt
-                if matched_id == -1:
-                    continue
-                dt_ignored[tind, dtind] = gt_ignored[matched_id]
-                dt_matched[tind, dtind] = 1
-                gt_matched[tind, matched_id] = dtind
-    # store results for given image
-    return {
-        'dt_matches': dt_matched,
-        'gt_matches': gt_matched,
-        'gt_ignore': gt_ignored,
-        'dt_ignore': np.logical_or(dt_ignored, dt_difficult),
-        'scores': scores
-    }
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/detection.py b/tools/accuracy_checker/accuracy_checker/metrics/detection.py
deleted file mode 100644 (file)
index 7a5c29c..0000000
+++ /dev/null
@@ -1,473 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import bisect
-import enum
-import warnings
-from typing import List
-
-import numpy as np
-
-from ..utils import finalize_metric_result
-from .overlap import Overlap, IOA
-from ..config import BoolField, NumberField, StringField
-from ..representation import DetectionAnnotation, DetectionPrediction
-from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
-
-
-class APIntegralType(enum.Enum):
-    voc_11_point = '11point'
-    voc_max = 'max'
-
-
-class BaseDetectionMetricConfig(BaseMetricConfig):
-    overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
-    ignore_difficult = BoolField(optional=True)
-    include_boundaries = BoolField(optional=True)
-    distinct_conf = BoolField(optional=True)
-    allow_multiple_matches_per_ignored = BoolField(optional=True)
-    overlap_method = StringField(optional=True, choices=Overlap.providers)
-    use_filtered_tp = BoolField(optional=True)
-
-
-class MAPConfigValidator(BaseDetectionMetricConfig):
-    integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
-
-
-class MRConfigValidator(BaseDetectionMetricConfig):
-    fppi_level = NumberField(min_value=0, max_value=1)
-
-
-class DAConfigValidator(BaseDetectionMetricConfig):
-    use_normalization = BoolField(optional=True)
-
-
-class BaseDetectionMetricMixin:
-    def configure(self):
-        self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
-        self.ignore_difficult = self.config.get('ignore_difficult', True)
-        self.include_boundaries = self.config.get('include_boundaries', True)
-        self.distinct_conf = self.config.get('distinct_conf', False)
-        self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
-        self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
-        self.use_filtered_tp = self.config.get('use_filtered_tp', False)
-
-        label_map = self.config.get('label_map', 'label_map')
-        labels = self.dataset.metadata.get(label_map, {})
-        self.labels = labels.keys()
-        valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
-        self.meta['names'] = [labels[name] for name in valid_labels]
-
-    def per_class_detection_statistics(self, annotations, predictions, labels):
-        labels_stat = {}
-        for label in labels:
-            tp, fp, conf, n = bbox_match(
-                annotations, predictions, int(label),
-                self.overlap_method, self.overlap_threshold,
-                self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
-                self.use_filtered_tp
-            )
-
-            if not tp.size:
-                labels_stat[label] = {
-                    'precision': np.array([]),
-                    'recall': np.array([]),
-                    'thresholds': conf,
-                    'fppi': np.array([])
-                }
-                continue
-
-            # select only values for distinct confidences
-            if self.distinct_conf:
-                distinct_value_indices = np.where(np.diff(conf))[0]
-                threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
-            else:
-                threshold_indexes = np.arange(conf.size)
-
-            tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
-
-            labels_stat[label] = {
-                'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
-                'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
-                'thresholds': conf[threshold_indexes],
-                'fppi': fp / len(annotations)
-            }
-
-        return labels_stat
-
-
-class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
-    """
-    Class for evaluating mAP metric of detection models.
-    """
-
-    __provider__ = 'map'
-
-    annotation_types = (DetectionAnnotation, )
-    prediction_types = (DetectionPrediction, )
-
-    _config_validator_type = MAPConfigValidator
-
-    def configure(self):
-        super().configure()
-        self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
-
-    def evaluate(self, annotations, predictions):
-        valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
-        labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
-
-        average_precisions = []
-        for label in labels_stat:
-            label_precision = labels_stat[label]['precision']
-            label_recall = labels_stat[label]['recall']
-            if label_recall.size:
-                ap = average_precision(label_precision, label_recall, self.integral)
-                average_precisions.append(ap)
-            else:
-                average_precisions.append(np.nan)
-
-        average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
-        if not average_precisions:
-            warnings.warn("No detections to compute mAP")
-            average_precisions.append(0)
-
-        return average_precisions
-
-
-class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
-    """
-    Class for evaluating Miss Rate metric of detection models.
-    """
-
-    __provider__ = 'miss_rate'
-
-    annotation_types = (DetectionAnnotation, )
-    prediction_types = (DetectionPrediction, )
-
-    _config_validator_type = MRConfigValidator
-
-    def configure(self):
-        super().configure()
-        self.fppi_level = self.config.get('fppi_level')
-
-    def evaluate(self, annotations, predictions):
-        valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
-        labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
-
-        miss_rates = []
-        for label in labels_stat:
-            label_miss_rate = 1.0 - labels_stat[label]['recall']
-            label_fppi = labels_stat[label]['fppi']
-
-            position = bisect.bisect_left(label_fppi, self.fppi_level)
-            m0 = max(0, position - 1)
-            m1 = position if position < len(label_miss_rate) else m0
-            miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
-
-        return miss_rates
-
-
-class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
-    """
-    Class for evaluating recall metric of detection models.
-    """
-
-    __provider__ = 'recall'
-
-    annotation_types = (DetectionAnnotation, )
-    prediction_types = (DetectionPrediction, )
-
-    _config_validator_type = BaseDetectionMetricConfig
-
-    def evaluate(self, annotations, predictions):
-        valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
-        labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
-
-        recalls = []
-        for label in labels_stat:
-            label_recall = labels_stat[label]['recall']
-            if label_recall.size:
-                max_recall = label_recall[-1]
-                recalls.append(max_recall)
-            else:
-                recalls.append(np.nan)
-
-        recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
-        if not recalls:
-            warnings.warn("No detections to compute mAP")
-            recalls.append(0)
-
-        return recalls
-
-
-class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
-    __provider__ = 'detection_accuracy'
-
-    annotation_types = (DetectionAnnotation, )
-    prediction_types = (DetectionPrediction, )
-    _config_validator_type = DAConfigValidator
-
-    def configure(self):
-        super().configure()
-        self.use_normalization = self.config.get('use_normalization', False)
-
-    def evaluate(self, annotations, predictions):
-        all_matches, _, _ = match_detections_class_agnostic(
-            predictions, annotations, self.overlap_threshold, self.overlap_method
-        )
-        cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
-        if self.use_normalization:
-            return np.mean(normalize_confusion_matrix(cm).diagonal())
-
-        return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
-
-
-def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
-    out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
-    for gt, prediction in zip(gt_data, predicted_data):
-        for match_pair in all_matched_ids[gt.identifier]:
-            gt_label = int(gt.labels[match_pair[0]])
-            pred_label = int(prediction.labels[match_pair[1]])
-            out_cm[gt_label, pred_label] += 1
-
-    return out_cm
-
-
-def normalize_confusion_matrix(cm):
-    row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
-    return cm.astype(np.float32) / row_sums
-
-
-def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
-    all_matches = {}
-    total_gt_bbox_num = 0
-    matched_gt_bbox_num = 0
-
-    for gt, prediction in zip(gt_data, predicted_data):
-        gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
-        predicted_bboxes = np.stack(
-            (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
-        )
-
-        total_gt_bbox_num += len(gt_bboxes)
-
-        similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
-
-        matches = []
-        for _ in gt_bboxes:
-            best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
-            best_match_value = similarity_matrix[best_match_pos]
-
-            if best_match_value <= min_iou:
-                break
-
-            gt_id = best_match_pos[0]
-            predicted_id = best_match_pos[1]
-
-            similarity_matrix[gt_id, :] = 0.0
-            similarity_matrix[:, predicted_id] = 0.0
-
-            matches.append((gt_id, predicted_id))
-            matched_gt_bbox_num += 1
-
-        all_matches[gt.identifier] = matches
-
-    return all_matches, total_gt_bbox_num, matched_gt_bbox_num
-
-
-def calculate_similarity_matrix(set_a, set_b, overlap):
-    similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
-    for i, box_a in enumerate(set_a):
-        for j, box_b in enumerate(set_b):
-            similarity[i, j] = overlap(box_a, box_b)
-
-    return similarity
-
-
-def average_precision(precision, recall, integral):
-    if integral == APIntegralType.voc_11_point:
-        result = 0.
-        for point in np.arange(0., 1.1, 0.1):
-            accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
-            result = result + accumulator / 11.
-
-        return result
-
-    if integral != APIntegralType.voc_max:
-        raise NotImplementedError("Integral type not implemented")
-
-    # first append sentinel values at the end
-    recall = np.concatenate(([0.], recall, [1.]))
-    precision = np.concatenate(([0.], precision, [0.]))
-
-    # compute the precision envelope
-    for i in range(precision.size - 1, 0, -1):
-        precision[i - 1] = np.maximum(precision[i - 1], precision[i])
-
-    # to calculate area under PR curve, look for points
-    # where X axis (recall) changes value
-    change_point = np.where(recall[1:] != recall[:-1])[0]
-    # and sum (\Delta recall) * recall
-    return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
-
-
-def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
-               overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
-               include_boundaries=True, use_filtered_tp=False):
-    """
-    Args:
-        annotation: ground truth bounding boxes.
-        prediction: predicted bounding boxes.
-        label: class for which bounding boxes are matched.
-        overlap_evaluator: evaluator of overlap.
-        overlap_thresh: bounding box IoU threshold.
-        ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
-        allow_multiple_matches_per_ignored: allows multiple matches per ignored.
-        include_boundaries: if is True then width and height of box is calculated by max - min + 1.
-        use_filtered_tp: if is True then ignored object are counted during evaluation.
-    Returns:
-        tp: tp[i] == 1 if detection with i-th highest score is true positive.
-        fp: fp[i] == 1 if detection with i-th highest score is false positive.
-        thresholds: array of confidence thresholds.
-        number_ground_truth = number of true positives.
-    """
-
-    used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
-        annotation, ignore_difficult, label
-    )
-    prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
-        label, prediction, ignore_difficult
-    )
-
-    tp = np.zeros_like(prediction_images)
-    fp = np.zeros_like(prediction_images)
-
-    for image in range(prediction_images.shape[0]):
-        gt_img = annotation[prediction_images[image]]
-        annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
-        used = used_boxes[gt_img.identifier]
-
-        idx = gt_img.labels == label
-        if not np.array(idx).any():
-            fp[image] = 1
-            continue
-
-        prediction_box = prediction_boxes[image][1:]
-        annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
-
-        overlaps = overlap_evaluator(prediction_box, annotation_boxes)
-        if ignore_difficult and allow_multiple_matches_per_ignored:
-            ioa = IOA(include_boundaries)
-            ignored = np.where(annotation_difficult == 1)[0]
-            ignored_annotation_boxes = (
-                annotation_boxes[0][ignored], annotation_boxes[1][ignored],
-                annotation_boxes[2][ignored], annotation_boxes[3][ignored]
-            )
-            overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
-
-        max_overlap = -np.inf
-
-        not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
-        ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
-        if not_ignored_overlaps.size:
-            max_overlap = np.max(not_ignored_overlaps)
-
-        if max_overlap < overlap_thresh and ignored_overlaps.size:
-            max_overlap = np.max(ignored_overlaps)
-        max_overlapped = np.where(overlaps == max_overlap)[0]
-
-        def set_false_positive(box_index):
-            is_box_difficult = difficult_boxes_prediction[box_index].any()
-            return int(not ignore_difficult or not is_box_difficult)
-
-        if max_overlap < overlap_thresh:
-            fp[image] = set_false_positive(image)
-            continue
-
-        if not annotation_difficult[max_overlapped].any():
-            if not used[max_overlapped].any():
-                if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
-                    tp[image] = 1
-                    used[max_overlapped] = True
-            else:
-                fp[image] = set_false_positive(image)
-        elif not allow_multiple_matches_per_ignored:
-            if used[max_overlapped].any():
-                fp[image] = set_false_positive(image)
-            used[max_overlapped] = True
-
-    return tp, fp, prediction_boxes[:, 0], number_ground_truth
-
-
-def _prepare_annotation_boxes(annotation, ignore_difficult, label):
-    used_boxes = {}
-    difficult_boxes = {}
-    num_ground_truth = 0
-
-    for ground_truth in annotation:
-        idx_for_label = ground_truth.labels == label
-        filtered_label = ground_truth.labels[idx_for_label]
-        used_ = np.zeros_like(filtered_label)
-        used_boxes[ground_truth.identifier] = used_
-        num_ground_truth += used_.shape[0]
-
-        difficult_box_mask = np.full_like(ground_truth.labels, False)
-        difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
-        if ignore_difficult:
-            difficult_box_mask[difficult_box_indices] = True
-        difficult_box_mask = difficult_box_mask[idx_for_label]
-
-        difficult_boxes[ground_truth.identifier] = difficult_box_mask
-        if ignore_difficult:
-            num_ground_truth -= np.sum(difficult_box_mask)
-
-    return used_boxes, num_ground_truth, difficult_boxes
-
-
-def _prepare_prediction_boxes(label, predictions, ignore_difficult):
-    prediction_images = []
-    prediction_boxes = []
-    indexes = []
-    difficult_boxes = []
-    for i, prediction in enumerate(predictions):
-        idx = prediction.labels == label
-
-        prediction_images.append(np.full(prediction.labels[idx].shape, i))
-        prediction_boxes.append(np.c_[
-            prediction.scores[idx],
-            prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
-        ])
-
-        difficult_box_mask = np.full_like(prediction.labels, False)
-        difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
-        if ignore_difficult:
-            difficult_box_mask[difficult_box_indices] = True
-
-        difficult_boxes.append(difficult_box_mask)
-        indexes.append(np.argwhere(idx))
-
-    prediction_boxes = np.concatenate(prediction_boxes)
-    difficult_boxes = np.concatenate(difficult_boxes)
-    sorted_order = np.argsort(-prediction_boxes[:, 0])
-    prediction_boxes = prediction_boxes[sorted_order]
-    prediction_images = np.concatenate(prediction_images)[sorted_order]
-    difficult_boxes = difficult_boxes[sorted_order]
-
-    return prediction_boxes, prediction_images, difficult_boxes
-
-
-def get_valid_labels(labels, background):
-    return list(filter(lambda label: label != background, labels))
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/hit_ratio.py b/tools/accuracy_checker/accuracy_checker/metrics/hit_ratio.py
deleted file mode 100644 (file)
index f5ce2c7..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import heapq
-import math
-
-import numpy as np
-
-from ..representation import HitRatioAnnotation, HitRatioPrediction
-from .metric import FullDatasetEvaluationMetric, BaseMetricConfig
-from ..config import NumberField
-
-
-class RecommenderConfigValidator(BaseMetricConfig):
-    top_k = NumberField(floats=False, min_value=1, optional=True)
-
-
-class BaseRecommenderMetric(FullDatasetEvaluationMetric):
-    annotation_types = (HitRatioAnnotation, )
-    prediction_types = (HitRatioPrediction, )
-    _config_validator_type = RecommenderConfigValidator
-
-    def __init__(self, discounter, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.discounter = discounter or (lambda item, rank: int(item in rank))
-
-    def configure(self):
-        self.top_k = self.config.get('top_k', 10)
-        self.users_num = self.dataset.metadata.get('users_number')
-        self.pred_per_user = {i: [] for i in range(self.users_num)}
-        self.gt_items = {}
-
-    def update(self, annotation, prediction):
-        self.pred_per_user[prediction.user].append((prediction.item, prediction.scores))
-        if annotation.positive:
-            self.gt_items[annotation.user] = annotation.item
-
-    def evaluate(self, annotations, predictions):
-        iter_num = len(self.pred_per_user[0])
-
-        measure = []
-        for user in range(self.users_num):
-            map_item_score = {}
-            for j in range(iter_num):
-                item = self.pred_per_user[user][j][0]
-                score = self.pred_per_user[user][j][1]
-                map_item_score[item] = score
-            ranklist = heapq.nlargest(10, map_item_score, key=map_item_score.get)
-            measure.append(self.discounter(self.gt_items[user], ranklist))
-
-        return np.mean(measure)
-
-
-def hit_ratio_discounter(item, rank):
-    return int(item in rank)
-
-
-def ndcg_discounter(item, rank):
-    if item in rank:
-        return math.log(2) / math.log(rank.index(item) + 2)
-
-    return 0
-
-
-class HitRatioMetric(BaseRecommenderMetric):
-    """
-    Class for evaluating Hit Ratio metric
-    """
-
-    __provider__ = 'hit_ratio'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(hit_ratio_discounter, *args, **kwargs)
-
-
-class NDSGMetric(BaseRecommenderMetric):
-    """
-    Class for evaluating Normalized Discounted Cumulative Gain metric
-    """
-
-    __provider__ = 'ndcg'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(ndcg_discounter, *args, **kwargs)
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/metric.py b/tools/accuracy_checker/accuracy_checker/metrics/metric.py
deleted file mode 100644 (file)
index cb229dc..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..representation import ContainerRepresentation
-from ..config import ConfigError
-from ..utils import is_single_metric_source, get_supported_representations
-from ..presenters import BasePresenter
-from ..config import ConfigValidator, NumberField, StringField
-from ..dependency import ClassProvider
-from ..utils import zipped_transform
-
-
-class BaseMetricConfig(ConfigValidator):
-    type = StringField()
-    name = StringField(optional=True)
-    reference = NumberField(optional=True)
-    threshold = NumberField(min_value=0, optional=True)
-    presenter = StringField(choices=BasePresenter.providers, optional=True)
-    label_map = StringField(optional=True)
-    prediction_source = StringField(optional=True)
-    annotation_source = StringField(optional=True)
-
-
-class Metric(ClassProvider):
-    """
-    Interface for evaluating metrics.
-    """
-
-    __provider_type__ = 'metric'
-
-    annotation_types = ()
-    prediction_types = ()
-
-    _config_validator_type = BaseMetricConfig
-
-    def __init__(self, config, dataset, name=None, state=None):
-        self.config = config
-        self.name = name
-        self.dataset = dataset
-        self.state = state
-        self._update_iter = 0
-        self.meta = {}
-
-        self.validate_config()
-        self.configure()
-        message_unsupported_multi_source = 'metric {} does not support several {} sources'
-        self.annotation_source = self.config.get('annotation_source')
-
-        if self.annotation_source and not is_single_metric_source(self.annotation_source):
-            raise ConfigError(message_unsupported_multi_source.format(self.name, 'annotation'))
-
-        self.prediction_source = self.config.get('prediction_source')
-        if self.prediction_source and not is_single_metric_source(self.prediction_source):
-            raise ConfigError(message_unsupported_multi_source.format(self.name, 'prediction'))
-
-    def __call__(self, *args, **kwargs):
-        return self.submit_all(*args, **kwargs)
-
-    def submit(self, annotation, prediction):
-        self.update(annotation, prediction)
-
-    def submit_all(self, annotations, predictions):
-        return self.evaluate(annotations, predictions)
-
-    def update(self, annotation, prediction):
-        pass
-
-    def evaluate(self, annotations, predictions):
-        raise NotImplementedError
-
-    def configure(self):
-        """
-        Specifies configuration structure for metric entry.
-        """
-
-        pass
-
-    def validate_config(self):
-        """
-        Validate that metric entry meets all configuration structure requirements.
-        """
-
-        self._config_validator_type(
-            self.name, on_extra_argument=BaseMetricConfig.ERROR_ON_EXTRA_ARGUMENT
-        ).validate(self.config)
-
-    def _update_state(self, fn, state_key, default_factory=None):
-        iter_key = "{}_global_it".format(state_key)
-        if state_key not in self.state:
-            default = default_factory() if default_factory else None
-            self.state[state_key] = default
-            self.state[iter_key] = 0
-
-        self._update_iter += 1
-        if self.state[iter_key] < self._update_iter:
-            self.state[iter_key] += 1
-            self.state[state_key] = fn(self.state[state_key])
-
-    def _resolve_representation_containers(self, annotation, prediction):
-        def get_resolve_subject(representation, source=None):
-            def is_container(representation):
-                if isinstance(representation, ContainerRepresentation):
-                    return True
-                representation_parents = type(representation).__bases__
-                representation_parents_names = [parent.__name__ for parent in representation_parents]
-
-                return ContainerRepresentation.__name__ in representation_parents_names
-
-            if not is_container(representation):
-                return representation
-
-            if not source:
-                return representation.values()
-
-            representation = representation.get(source)
-            if not representation:
-                raise ConfigError('{} not found'.format(source))
-
-            return representation
-
-        annotation = get_resolve_subject(annotation, self.annotation_source)
-        prediction = get_resolve_subject(prediction, self.prediction_source)
-
-        def resolve(representation, supported_types, representation_name):
-            message_not_found = 'suitable {} for metric {} not found'
-            message_need_source = 'you need specify {} source for metric {}'
-
-            representation = get_supported_representations(representation, supported_types)
-            if not representation:
-                raise ConfigError(message_not_found.format(representation_name, self.name))
-
-            if len(representation) > 1:
-                raise ConfigError(message_need_source.format(representation_name, self.name))
-
-            return representation[0]
-
-        resolved_annotation = resolve(annotation, self.annotation_types, 'annotation')
-        resolved_prediction = resolve(prediction, self.prediction_types, 'prediction')
-
-        return resolved_annotation, resolved_prediction
-
-
-class PerImageEvaluationMetric(Metric):
-    def submit(self, annotation, prediction):
-        annotation_, prediction_ = self._resolve_representation_containers(annotation, prediction)
-        self.update(annotation_, prediction_)
-
-    def evaluate(self, annotations, predictions):
-        raise NotImplementedError
-
-
-class FullDatasetEvaluationMetric(Metric):
-    def submit_all(self, annotations, predictions):
-        annotations_, predictions_ = zipped_transform(self._resolve_representation_containers, annotations, predictions)
-        return self.evaluate(annotations_, predictions_)
-
-    def evaluate(self, annotations, predictions):
-        raise NotImplementedError
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/metric_executor.py b/tools/accuracy_checker/accuracy_checker/metrics/metric_executor.py
deleted file mode 100644 (file)
index ff16cd7..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from collections import namedtuple
-
-from ..presenters import BasePresenter, EvaluationResult
-from ..config import StringField
-from ..utils import zipped_transform
-from .metric import BaseMetricConfig, Metric
-from ..config import ConfigError
-
-MetricInstance = namedtuple(
-    'MetricInstance', ['name', 'metric_type', 'metric_fn', 'reference', 'threshold', 'presenter']
-)
-
-
-class MetricConfig(BaseMetricConfig):
-    type = StringField(choices=Metric.providers)
-
-
-class MetricsExecutor:
-    """
-    Class for evaluating metrics according to dataset configuration entry.
-    """
-
-    def __init__(self, metrics_config, dataset=None, state=None):
-        self.state = state or {}
-        dataset_name = dataset.name if dataset else ''
-        message_prefix = '{}'.format(dataset_name)
-        if not metrics_config:
-            raise ConfigError('{} dataset config must specify "{}"'.format(message_prefix, 'metrics'))
-
-        self._dataset = dataset
-
-        self.metrics = []
-        type_ = 'type'
-        identifier = 'name'
-        reference = 'reference'
-        threshold = 'threshold'
-        presenter = 'presenter'
-        for metric_config_entry in metrics_config:
-            metric_config = MetricConfig(
-                "metrics", on_extra_argument=MetricConfig.IGNORE_ON_EXTRA_ARGUMENT
-            )
-            metric_type = metric_config_entry.get(type_)
-            metric_config.validate(metric_config_entry, type_)
-
-            metric_identifier = metric_config_entry.get(identifier, metric_type)
-
-            metric_fn = Metric.provide(
-                metric_type, metric_config_entry, self.dataset, metric_identifier, state=self.state
-            )
-            metric_presenter = BasePresenter.provide(metric_config_entry.get(presenter, 'print_scalar'))
-
-            self.metrics.append(MetricInstance(
-                metric_identifier,
-                metric_type,
-                metric_fn,
-                metric_config_entry.get(reference),
-                metric_config_entry.get(threshold),
-                metric_presenter
-            ))
-
-    @property
-    def dataset(self):
-        return self._dataset
-
-    @dataset.setter
-    def _set_dataset(self, dataset):
-        self._dataset = dataset
-        for metric in self.metrics:
-            metric.metric_fn.dataset = dataset
-
-    def __call__(self, context, *args, **kwargs):
-        self.update_metrics_on_batch(context.annotation_batch, context.prediction_batch)
-        context.annotations.extend(context.annotation_batch)
-        context.predictions.extend(context.prediction_batch)
-
-    def update_metrics_on_object(self, annotation, prediction):
-        """
-        Updates metric value corresponding given annotation and prediction objects.
-        """
-
-        for metric in self.metrics:
-            metric.metric_fn.submit(annotation, prediction)
-
-    def update_metrics_on_batch(self, annotation, prediction):
-        """
-        Updates metric value corresponding given batch.
-
-        Args:
-            annotation: list of batch number of annotation objects.
-            prediction: list of batch number of prediction objects.
-        """
-
-        zipped_transform(self.update_metrics_on_object, annotation, prediction)
-
-    def iterate_metrics(self, annotations, predictions):
-        for name, metric_type, functor, reference, threshold, presenter in self.metrics:
-            yield presenter, EvaluationResult(
-                name=name,
-                metric_type=metric_type,
-                evaluated_value=functor(annotations, predictions),
-                reference_value=reference,
-                threshold=threshold,
-                meta=functor.meta,
-            )
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/multilabel_recognition.py b/tools/accuracy_checker/accuracy_checker/metrics/multilabel_recognition.py
deleted file mode 100644 (file)
index 9b24ce1..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from .metric import PerImageEvaluationMetric, BaseMetricConfig
-from ..representation import MultiLabelRecognitionAnnotation, MultiLabelRecognitionPrediction
-from ..config import StringField, BoolField
-
-
-class MultiLabelConfigValidator(BaseMetricConfig):
-    label_map = StringField(optional=True)
-    calculate_average = BoolField(optional=True)
-
-
-class MultiLabelMetric(PerImageEvaluationMetric):
-    annotation_types = (MultiLabelRecognitionAnnotation,)
-    prediction_types = (MultiLabelRecognitionPrediction,)
-    _config_validator_type = MultiLabelConfigValidator
-
-    def configure(self):
-        label_map = self.config.get('label_map', 'label_map')
-        self.labels = self.dataset.metadata.get(label_map)
-        self.calculate_average = self.config.get('calculate_average', True)
-
-        self.meta['scale'] = 1
-        self.meta['postfix'] = ''
-        self.meta['calculate_mean'] = False
-        self.meta['names'] = list(self.labels.values())
-        if self.calculate_average:
-            self.meta['names'].append('average')
-        self.tp = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-        self.fp = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-        self.tn = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-        self.fn = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-
-        self.counter = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-
-    def update(self, annotation, prediction):
-        def loss(annotation_labels, prediction_labels):
-            tp_result = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-            fp_results = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-            tn_results = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-            fn_results = np.zeros_like(list(self.labels.keys()), dtype=np.float)
-
-            for index, label in enumerate(annotation_labels):
-                if label == 1 and label == prediction_labels[index]:
-                    tp_result[index] = 1.
-                    continue
-
-                if label == 1 and label != prediction_labels[index]:
-                    fn_results[index] = 1.
-                    continue
-
-                if label == 0 and label == prediction_labels[index]:
-                    tn_results[index] = 1.
-                    continue
-
-                if label == 0 and label != prediction_labels[index]:
-                    fp_results[index] = 1.
-                    continue
-
-            return tp_result, fp_results, tn_results, fn_results
-
-        def counter(annotation_label):
-            count = np.zeros_like(annotation_label, dtype=float)
-            cond = np.where(np.array(annotation_label) != -1)
-            count[cond] = 1.
-            return count
-
-        tp_upd, fp_upd, tn_upd, fn_upd = loss(annotation.multi_label, prediction.multi_label)
-        self.tp = np.add(self.tp, tp_upd)
-        self.fp = np.add(self.fp, fp_upd)
-        self.tn = np.add(self.tn, tn_upd)
-        self.fn = np.add(self.fn, fn_upd)
-
-        self.counter = np.add(self.counter, counter(annotation.multi_label))
-
-    def evaluate(self, annotations, predictions):
-        pass
-
-
-class MultiLabelAccuracy(MultiLabelMetric):
-    __provider__ = 'multi_accuracy'
-
-    def evaluate(self, annotations, predictions):
-        tp_tn = np.add(self.tp, self.tn, dtype=float)
-        per_class = np.divide(tp_tn, self.counter, out=np.zeros_like(tp_tn, dtype=float), where=self.counter != 0)
-        average = np.sum(tp_tn) / np.sum(self.counter)
-
-        return [*per_class, average]
-
-
-class MultiLabelPrecision(MultiLabelMetric):
-    __provider__ = 'multi_precision'
-
-    def evaluate(self, annotations, predictions):
-        tp_fp = np.add(self.tp, self.fp, dtype=float)
-        per_class = np.divide(self.tp, tp_fp, out=np.zeros_like(self.tp, dtype=float), where=tp_fp != 0)
-        if not self.calculate_average:
-            return per_class
-        average = np.sum(self.tp) / np.sum(tp_fp)
-
-        return [*per_class, average]
-
-
-class MultiLabelRecall(MultiLabelMetric):
-    __provider__ = 'multi_recall'
-
-    def evaluate(self, annotations, predictions):
-        tp_fn = np.add(self.tp, self.fn, dtype=float)
-        per_class = np.divide(self.tp, tp_fn, out=np.zeros_like(self.tp, dtype=float), where=tp_fn != 0)
-        if not self.calculate_average:
-            return per_class
-        average = np.sum(self.tp) / np.sum(tp_fn)
-
-        return [*per_class, average]
-
-
-class F1Score(PerImageEvaluationMetric):
-    __provider__ = 'f1-score'
-    annotation_types = (MultiLabelRecognitionAnnotation,)
-    prediction_types = (MultiLabelRecognitionPrediction,)
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.precision = MultiLabelPrecision(self.config, self.dataset)
-        self.recall = MultiLabelRecall(self.config, self.dataset)
-
-    def validate_config(self):
-        class _F1ScoreValidator(BaseMetricConfig):
-            label_map = StringField(optional=True)
-            calculate_average = BoolField(optional=True)
-
-        f1_score_config_validator = _F1ScoreValidator(
-            'f1_score', on_extra_argument=_F1ScoreValidator.ERROR_ON_EXTRA_ARGUMENT
-        )
-        f1_score_config_validator.validate(self.config)
-
-    def configure(self):
-        label_map = self.config.get('label_map', 'label_map')
-        self.labels = self.dataset.metadata.get(label_map)
-        self.calculate_average = self.config.get('calculate_average', True)
-        self.meta['names'] = list(self.labels.values())
-        if self.calculate_average:
-            self.meta['names'].append('average')
-
-        self.meta['scale'] = 1
-        self.meta['postfix'] = ''
-        self.meta['calculate_mean'] = False
-        self.meta['names'] = list(self.labels.values()) + ['average']
-
-    def update(self, annotation, prediction):
-        self.precision.update(annotation, prediction)
-        self.recall.update(annotation, prediction)
-
-    def evaluate(self, annotations, predictions):
-        precisions = self.precision.evaluate(annotations, predictions)
-        recalls = self.recall.evaluate(annotations, predictions)
-
-        precision_add = np.add(precisions[:-1], recalls[:-1], dtype=float)
-        precision_multiply = np.multiply(precisions[:-1], recalls[:-1], dtype=float)
-
-        per_class = 2 * np.divide(
-            precision_multiply, precision_add, out=np.zeros_like(precision_multiply, dtype=float),
-            where=precision_add != 0
-        )
-        if not self.calculate_average:
-            return per_class
-
-        average = 2 * (precisions[-1] * recalls[-1]) / (precisions[-1] + recalls[-1])
-
-        return [*per_class, average]
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/overlap.py b/tools/accuracy_checker/accuracy_checker/metrics/overlap.py
deleted file mode 100644 (file)
index d9fffc7..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..dependency import ClassProvider
-
-
-class Overlap(ClassProvider):
-    __provider_type__ = 'overlap'
-
-    @staticmethod
-    def intersections(prediction_box, annotation_boxes):
-        px_min, py_min, px_max, py_max = prediction_box
-        ax_mins, ay_mins, ax_maxs, ay_maxs = annotation_boxes
-
-        x_mins = np.maximum(ax_mins, px_min)
-        y_mins = np.maximum(ay_mins, py_min)
-        x_maxs = np.minimum(ax_maxs, px_max)
-        y_maxs = np.minimum(ay_maxs, py_max)
-
-        return x_mins, y_mins, np.maximum(x_mins, x_maxs), np.maximum(y_mins, y_maxs)
-
-    def __init__(self, include_boundaries=None):
-        self.boundary = 1 if include_boundaries else 0
-
-    def __call__(self, *args, **kwargs):
-        return self.evaluate(*args, **kwargs)
-
-    def evaluate(self, prediction_box, annotation_boxes):
-        raise NotImplementedError
-
-    def area(self, box):
-        x0, y0, x1, y1 = box
-        return (x1 - x0 + self.boundary) * (y1 - y0 + self.boundary)
-
-
-class IOU(Overlap):
-    __provider__ = 'iou'
-
-    def evaluate(self, prediction_box, annotation_boxes):
-        intersections_area = self.area(self.intersections(prediction_box, annotation_boxes))
-        unions = self.area(prediction_box) + self.area(annotation_boxes) - intersections_area
-        return np.divide(
-            intersections_area, unions, out=np.zeros_like(intersections_area, dtype=float), where=unions != 0
-        )
-
-
-class IOA(Overlap):
-    __provider__ = 'ioa'
-
-    def evaluate(self, prediction_box, annotation_boxes):
-        intersections_area = self.area(self.intersections(prediction_box, annotation_boxes))
-        prediction_area = self.area(prediction_box)
-        return np.divide(
-            intersections_area, prediction_area, out=np.zeros_like(intersections_area, dtype=float),
-            where=prediction_area != 0
-        )
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/regression.py b/tools/accuracy_checker/accuracy_checker/metrics/regression.py
deleted file mode 100644 (file)
index c70866f..0000000
+++ /dev/null
@@ -1,357 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import warnings
-import math
-import numpy as np
-
-from ..representation import (
-    RegressionAnnotation,
-    RegressionPrediction,
-    FacialLandmarksAnnotation,
-    FacialLandmarksPrediction,
-    SuperResolutionAnnotation,
-    SuperResolutionPrediction,
-    GazeVectorAnnotation,
-    GazeVectorPrediction
-)
-
-from .metric import PerImageEvaluationMetric, BaseMetricConfig
-from ..config import BaseField, NumberField, BoolField, ConfigError, StringField
-from ..utils import string_to_tuple, finalize_metric_result
-
-
-class BaseIntervalRegressionMetricConfig(BaseMetricConfig):
-    intervals = BaseField(optional=True)
-    start = NumberField(optional=True)
-    end = NumberField(optional=True)
-    step = NumberField(optional=True)
-    ignore_values_not_in_interval = BoolField(optional=True)
-
-
-class BaseRegressionMetric(PerImageEvaluationMetric):
-    annotation_types = (RegressionAnnotation, )
-    prediction_types = (RegressionPrediction, )
-
-    def __init__(self, value_differ, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.value_differ = value_differ
-
-    def configure(self):
-        self.meta.update({'names': ['mean', 'std'], 'scale': 1, 'postfix': ' ', 'calculate_mean': False})
-        self.magnitude = []
-
-    def update(self, annotation, prediction):
-        self.magnitude.append(self.value_differ(annotation.value, prediction.value))
-
-    def evaluate(self, annotations, predictions):
-        return np.mean(self.magnitude), np.std(self.magnitude)
-
-
-class BaseRegressionOnIntervals(PerImageEvaluationMetric):
-    annotation_types = (RegressionAnnotation, )
-    prediction_types = (RegressionPrediction, )
-    _config_validator_type = BaseIntervalRegressionMetricConfig
-
-    def __init__(self, value_differ, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.value_differ = value_differ
-
-    def configure(self):
-        self.meta.update({'scale': 1, 'postfix': ' ', 'calculate_mean': False})
-        self.ignore_out_of_range = self.config.get('ignore_values_not_in_interval', True)
-
-        self.intervals = self.config.get('intervals')
-        if not self.intervals:
-            stop = self.config.get('end')
-            if not stop:
-                raise ConfigError('intervals or start-step-end of interval should be specified for metric')
-
-            start = self.config.get('start', 0.0)
-            step = self.config.get('step', 1.0)
-            self.intervals = np.arange(start, stop + step, step)
-
-        if not isinstance(self.intervals, (list, np.ndarray)):
-            self.intervals = string_to_tuple(self.intervals)
-
-        self.intervals = np.unique(self.intervals)
-        self.magnitude = [[] for _ in range(len(self.intervals) + 1)]
-
-        self.meta['names'] = ([])
-        if not self.ignore_out_of_range:
-            self.meta['names'] = (['mean: < ' + str(self.intervals[0]), 'std: < ' + str(self.intervals[0])])
-
-        for index in range(len(self.intervals) - 1):
-            self.meta['names'].append('mean: <= ' + str(self.intervals[index]) + ' < ' + str(self.intervals[index + 1]))
-            self.meta['names'].append('std: <= ' + str(self.intervals[index]) + ' < ' + str(self.intervals[index + 1]))
-
-        if not self.ignore_out_of_range:
-            self.meta['names'].append('mean: > ' + str(self.intervals[-1]))
-            self.meta['names'].append('std: > ' + str(self.intervals[-1]))
-
-    def update(self, annotation, prediction):
-        index = find_interval(annotation.value, self.intervals)
-        self.magnitude[index].append(self.value_differ(annotation.value, prediction.value))
-
-    def evaluate(self, annotations, predictions):
-        if self.ignore_out_of_range:
-            self.magnitude = self.magnitude[1:-1]
-
-        result = [[np.mean(values), np.std(values)] if values else [np.nan, np.nan] for values in self.magnitude]
-        result, self.meta['names'] = finalize_metric_result(np.reshape(result, -1), self.meta['names'])
-
-        if not result:
-            warnings.warn("No values in given interval")
-            result.append(0)
-
-        return result
-
-
-class MeanAbsoluteError(BaseRegressionMetric):
-    __provider__ = 'mae'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(mae_differ, *args, **kwargs)
-
-
-class MeanSquaredError(BaseRegressionMetric):
-    __provider__ = 'mse'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(mse_differ, *args, **kwargs)
-
-
-class RootMeanSquaredError(BaseRegressionMetric):
-    __provider__ = 'rmse'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(mse_differ, *args, **kwargs)
-
-    def evaluate(self, annotations, predictions):
-        return np.sqrt(np.mean(self.magnitude)), np.sqrt(np.std(self.magnitude))
-
-
-class MeanAbsoluteErrorOnInterval(BaseRegressionOnIntervals):
-    __provider__ = 'mae_on_interval'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(mae_differ, *args, **kwargs)
-
-
-class MeanSquaredErrorOnInterval(BaseRegressionOnIntervals):
-    __provider__ = 'mse_on_interval'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(mse_differ, *args, **kwargs)
-
-
-class RootMeanSquaredErrorOnInterval(BaseRegressionOnIntervals):
-    __provider__ = 'rmse_on_interval'
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(mse_differ, *args, **kwargs)
-
-    def evaluate(self, annotations, predictions):
-        if self.ignore_out_of_range:
-            self.magnitude = self.magnitude[1:-1]
-
-        result = []
-        for values in self.magnitude:
-            error = [np.sqrt(np.mean(values)), np.sqrt(np.std(values))] if values else [np.nan, np.nan]
-            result.append(error)
-
-        result, self.meta['names'] = finalize_metric_result(np.reshape(result, -1), self.meta['names'])
-
-        if not result:
-            warnings.warn("No values in given interval")
-            result.append(0)
-
-        return result
-
-
-class FacialLandmarksPerPointNormedError(PerImageEvaluationMetric):
-    __provider__ = 'per_point_normed_error'
-
-    annotation_types = (FacialLandmarksAnnotation, )
-    prediction_types = (FacialLandmarksPrediction, )
-
-    def configure(self):
-        self.meta.update({'scale': 1, 'postfix': ' ', 'calculate_mean': True, 'data_format': '{:.4f}'})
-        self.magnitude = []
-
-    def update(self, annotation, prediction):
-        result = point_regression_differ(
-            annotation.x_values, annotation.y_values, prediction.x_values, prediction.y_values
-        )
-        result /= np.maximum(annotation.interocular_distance, np.finfo(np.float64).eps)
-        self.magnitude.append(result)
-
-    def evaluate(self, annotations, predictions):
-        num_points = np.shape(self.magnitude)[1]
-        point_result_name_pattern = 'point_{}_normed_error'
-        self.meta['names'] = [point_result_name_pattern.format(point_id) for point_id in range(num_points)]
-        per_point_rmse = np.mean(self.magnitude, axis=1)
-        per_point_rmse, self.meta['names'] = finalize_metric_result(per_point_rmse, self.meta['names'])
-
-        return per_point_rmse
-
-
-class NormedErrorMetricConfig(BaseMetricConfig):
-    calculate_std = BoolField(optional=True)
-    percentile = NumberField(optional=True, floats=False, min_value=0, max_value=100)
-
-
-class FacialLandmarksNormedError(PerImageEvaluationMetric):
-    __provider__ = 'normed_error'
-
-    annotation_types = (FacialLandmarksAnnotation, )
-    prediction_types = (FacialLandmarksPrediction, )
-    _config_validator_type = NormedErrorMetricConfig
-
-    def configure(self):
-        self.calculate_std = self.config.get('calculate_std', False)
-        self.percentile = self.config.get('percentile')
-        self.meta.update({
-            'scale': 1,
-            'postfix': ' ',
-            'calculate_mean': not self.calculate_std or not self.percentile,
-            'data_format': '{:.4f}',
-            'names': ['mean']
-        })
-        self.magnitude = []
-
-    def update(self, annotation, prediction):
-        per_point_result = point_regression_differ(
-            annotation.x_values, annotation.y_values, prediction.x_values, prediction.y_values
-        )
-        avg_result = np.sum(per_point_result) / len(per_point_result)
-        avg_result /= np.maximum(annotation.interocular_distance, np.finfo(np.float64).eps)
-        self.magnitude.append(avg_result)
-
-    def evaluate(self, annotations, predictions):
-        result = [np.mean(self.magnitude)]
-
-        if self.calculate_std:
-            result.append(np.std(self.magnitude))
-            self.meta['names'].append('std')
-
-        if self.percentile:
-            sorted_magnitude = np.sort(self.magnitude)
-            index = len(self.magnitude) / 100 * self.percentile
-            result.append(sorted_magnitude[int(index)])
-            self.meta['names'].append('{}th percentile'.format(self.percentile))
-
-        return result
-
-
-def calculate_distance(x_coords, y_coords, selected_points):
-    first_point = [x_coords[selected_points[0]], y_coords[selected_points[0]]]
-    second_point = [x_coords[selected_points[1]], y_coords[selected_points[1]]]
-    return np.linalg.norm(np.subtract(first_point, second_point))
-
-
-def mae_differ(annotation_val, prediction_val):
-    return np.abs(annotation_val - prediction_val)
-
-
-def mse_differ(annotation_val, prediction_val):
-    return (annotation_val - prediction_val)**2
-
-
-def find_interval(value, intervals):
-    for index, point in enumerate(intervals):
-        if value < point:
-            return index
-
-    return len(intervals)
-
-
-def point_regression_differ(annotation_val_x, annotation_val_y, prediction_val_x, prediction_val_y):
-    loss = np.subtract(list(zip(annotation_val_x, annotation_val_y)), list(zip(prediction_val_x, prediction_val_y)))
-    return np.linalg.norm(loss, 2, axis=1)
-
-
-class PeakSignalToNoiseRatio(BaseRegressionMetric):
-    __provider__ = 'psnr'
-
-    annotation_types = (SuperResolutionAnnotation, )
-    prediction_types = (SuperResolutionPrediction, )
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(self._psnr_differ, *args, **kwargs)
-
-    def validate_config(self):
-        class _PSNRConfig(BaseMetricConfig):
-            scale_border = NumberField(optional=True, min_value=0)
-            color_order = StringField(optional=True, choices=['BGR', 'RGB'])
-
-        config_validator = _PSNRConfig('psnr', on_extra_argument=_PSNRConfig.ERROR_ON_EXTRA_ARGUMENT)
-        config_validator.validate(self.config)
-
-    def configure(self):
-        super().configure()
-        self.scale_border = self.config.get('scale_border', 4)
-        color_order = self.config.get('color_order', 'RGB')
-        channel_order = {
-            'BGR': [2, 1, 0],
-            'RGB': [0, 1, 2]
-        }
-        self.meta['postfix'] = 'Db'
-        self.channel_order = channel_order[color_order]
-
-    def _psnr_differ(self, annotation_image, prediction_image):
-        prediction = np.asarray(prediction_image).astype(np.float)
-        ground_truth = np.asarray(annotation_image).astype(np.float)
-
-        height, width = prediction.shape[:2]
-        prediction = prediction[
-            self.scale_border:height - self.scale_border,
-            self.scale_border:width - self.scale_border
-        ]
-        ground_truth = ground_truth[
-            self.scale_border:height - self.scale_border,
-            self.scale_border:width - self.scale_border
-        ]
-        image_difference = (prediction - ground_truth) / 255.  # rgb color space
-
-        r_channel_diff = image_difference[:, :, self.channel_order[0]]
-        g_channel_diff = image_difference[:, :, self.channel_order[1]]
-        b_channel_diff = image_difference[:, :, self.channel_order[2]]
-
-        channels_diff = (r_channel_diff * 65.738 + g_channel_diff * 129.057 + b_channel_diff * 25.064) / 256
-
-        mse = np.mean(channels_diff ** 2)
-        if mse == 0:
-            return np.Infinity
-
-        return -10 * math.log10(mse)
-
-
-def angle_differ(gt_gaze_vector, predicted_gaze_vector):
-    return np.arccos(
-        gt_gaze_vector.dot(predicted_gaze_vector) / np.linalg.norm(gt_gaze_vector)
-        / np.linalg.norm(predicted_gaze_vector)
-    ) * 180 / np.pi
-
-
-class AngleError(BaseRegressionMetric):
-    __provider__ = 'angle_error'
-
-    annotation_types = (GazeVectorAnnotation, )
-    prediction_types = (GazeVectorPrediction, )
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(angle_differ, *args, **kwargs)
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/reid.py b/tools/accuracy_checker/accuracy_checker/metrics/reid.py
deleted file mode 100644 (file)
index 37920f2..0000000
+++ /dev/null
@@ -1,369 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from collections import defaultdict, namedtuple
-from sklearn.metrics import auc, precision_recall_curve
-# noinspection PyProtectedMember
-from sklearn.metrics.base import _average_binary_score
-import numpy as np
-
-from ..representation import (
-    ReIdentificationClassificationAnnotation,
-    ReIdentificationAnnotation,
-    ReIdentificationPrediction
-)
-from ..config import BaseField, BoolField, NumberField
-from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
-
-PairDesc = namedtuple('PairDesc', 'image1 image2 same')
-
-
-class CMCConfigValidator(BaseMetricConfig):
-    top_k = NumberField(floats=False, min_value=1, optional=True)
-    separate_camera_set = BoolField(optional=True)
-    single_gallery_shot = BoolField(optional=True)
-    first_match_break = BoolField(optional=True)
-    number_single_shot_repeats = NumberField(floats=False, optional=True)
-
-
-class ReidMapConfig(BaseMetricConfig):
-    interpolated_auc = BoolField(optional=True)
-
-
-class PWAccConfig(BaseMetricConfig):
-    min_score = BaseField(optional=True)
-
-
-class PWAccSubsetConfig(BaseMetricConfig):
-    subset_number = NumberField(optional=True, min_value=1, floats=False)
-
-
-class CMCScore(FullDatasetEvaluationMetric):
-    """
-    Cumulative Matching Characteristics (CMC) score.
-
-    Config:
-        annotation: reid annotation.
-        prediction: predicted embeddings.
-        top_k: number of k highest ranked samples to consider when matching.
-        separate_camera_set: should identities from the same camera view be filtered out.
-        single_gallery_shot: each identity has only one instance in the gallery.
-        number_single_shot_repeats: number of repeats for single_gallery_shot setting.
-        first_match_break: break on first matched gallery sample.
-    """
-
-    __provider__ = 'cmc'
-
-    annotation_types = (ReIdentificationAnnotation, )
-    prediction_types = (ReIdentificationPrediction, )
-    _config_validator_type = CMCConfigValidator
-
-    def configure(self):
-        self.top_k = self.config.get('top_k', 1)
-        self.separate_camera_set = self.config.get('separate_camera_set', False)
-        self.single_gallery_shot = self.config.get('single_gallery_shot', False)
-        self.first_match_break = self.config.get('first_match_break', True)
-        self.number_single_shot_repeats = self.config.get('number_single_shot_repeats', 10)
-
-    def evaluate(self, annotations, predictions):
-        dist_matrix = distance_matrix(annotations, predictions)
-        gallery_cameras, gallery_pids, query_cameras, query_pids = get_gallery_query_pids(annotations)
-
-        _cmc_score = eval_cmc(
-            dist_matrix, query_pids, gallery_pids, query_cameras, gallery_cameras, self.separate_camera_set,
-            self.single_gallery_shot, self.first_match_break, self.number_single_shot_repeats
-        )
-
-        return _cmc_score[self.top_k - 1]
-
-
-class ReidMAP(FullDatasetEvaluationMetric):
-    """
-    Mean Average Precision score.
-
-    Config:
-        annotation: reid annotation.
-        prediction: predicted embeddings.
-        interpolated_auc: should area under precision recall curve be computed using trapezoidal rule or directly.
-    """
-
-    __provider__ = 'reid_map'
-
-    annotation_types = (ReIdentificationAnnotation, )
-    prediction_types = (ReIdentificationPrediction, )
-    _config_validator_type = ReidMapConfig
-
-    def configure(self):
-        self.interpolated_auc = self.config.get('interpolated_auc', True)
-
-    def evaluate(self, annotations, predictions):
-        dist_matrix = distance_matrix(annotations, predictions)
-        gallery_cameras, gallery_pids, query_cameras, query_pids = get_gallery_query_pids(annotations)
-
-        return eval_map(
-            dist_matrix, query_pids, gallery_pids, query_cameras, gallery_cameras, self.interpolated_auc
-        )
-
-
-class PairwiseAccuracy(FullDatasetEvaluationMetric):
-    __provider__ = 'pairwise_accuracy'
-
-    annotation_types = (ReIdentificationClassificationAnnotation, )
-    prediction_types = (ReIdentificationPrediction, )
-    _config_validator_type = PWAccConfig
-
-    def configure(self):
-        self.min_score = self.config.get('min_score', 'train_median')
-
-    def evaluate(self, annotations, predictions):
-        embed_distances, pairs = get_embedding_distances(annotations, predictions)
-
-        min_score = self.min_score
-        if min_score == 'train_median':
-            train_distances, _train_pairs = get_embedding_distances(annotations, predictions, train=True)
-            min_score = np.median(train_distances)
-
-        embed_same_class = embed_distances < min_score
-
-        accuracy = 0
-        for i, pair in enumerate(pairs):
-            same_label = pair.same
-            out_same = embed_same_class[i]
-
-            correct_prediction = same_label and out_same or (not same_label and not out_same)
-
-            if correct_prediction:
-                accuracy += 1
-
-        return float(accuracy) / len(pairs)
-
-
-class PairwiseAccuracySubsets(FullDatasetEvaluationMetric):
-    __provider__ = 'pairwise_accuracy_subsets'
-
-    annotation_types = (ReIdentificationClassificationAnnotation, )
-    prediction_types = (ReIdentificationPrediction, )
-    _config_validator_type = PWAccSubsetConfig
-
-    def configure(self):
-        self.subset_num = self.config.get('subset_number', 10)
-        self.accuracy_metric = PairwiseAccuracy(self.config, self.dataset)
-
-    def evaluate(self, annotations, predictions):
-        subset_results = []
-        first_images_annotations = list(filter(
-            lambda annotation: (len(annotation.negative_pairs) > 0 or len(annotation.positive_pairs) > 0), annotations
-        ))
-
-        idx_subsets = self.make_subsets(self.subset_num, len(first_images_annotations))
-        for subset in range(self.subset_num):
-            test_subset = self.get_subset(first_images_annotations, idx_subsets[subset]['test'])
-            test_subset = self.mark_subset(test_subset, False)
-
-            train_subset = self.get_subset(first_images_annotations, idx_subsets[subset]['train'])
-            train_subset = self.mark_subset(train_subset)
-
-            subset_result = self.accuracy_metric.evaluate(test_subset+train_subset, predictions)
-            subset_results.append(subset_result)
-
-        return np.mean(subset_results)
-
-    @staticmethod
-    def make_subsets(subset_num, dataset_size):
-        subsets = []
-        if subset_num > dataset_size:
-            raise ValueError('It is impossible to divide dataset on more than number of annotations subsets.')
-
-        for subset in range(subset_num):
-            lower_bnd = subset * dataset_size // subset_num
-            upper_bnd = (subset + 1) * dataset_size // subset_num
-            subset_test = [(lower_bnd, upper_bnd)]
-
-            subset_train = [(0, lower_bnd), (upper_bnd, dataset_size)]
-            subsets.append({'test': subset_test, 'train': subset_train})
-
-        return subsets
-
-    @staticmethod
-    def mark_subset(subset_annotations, train=True):
-        for annotation in subset_annotations:
-            annotation.metadata['train'] = train
-
-        return subset_annotations
-
-    @staticmethod
-    def get_subset(container, subset_bounds):
-        subset = []
-        for bound in subset_bounds:
-            subset += container[bound[0]: bound[1]]
-
-        return subset
-
-
-def extract_embeddings(annotation, prediction, query):
-    return np.stack([pred.embedding for pred, ann in zip(prediction, annotation) if ann.query == query])
-
-
-def get_gallery_query_pids(annotation):
-    gallery_pids = np.asarray([ann.person_id for ann in annotation if not ann.query])
-    query_pids = np.asarray([ann.person_id for ann in annotation if ann.query])
-    gallery_cameras = np.asarray([ann.camera_id for ann in annotation if not ann.query])
-    query_cameras = np.asarray([ann.camera_id for ann in annotation if ann.query])
-
-    return gallery_cameras, gallery_pids, query_cameras, query_pids
-
-
-def distance_matrix(annotation, prediction):
-    gallery_embeddings = extract_embeddings(annotation, prediction, query=False)
-    query_embeddings = extract_embeddings(annotation, prediction, query=True)
-
-    return 1. - np.matmul(gallery_embeddings, np.transpose(query_embeddings)).T
-
-
-def unique_sample(ids_dict, num):
-    mask = np.zeros(num, dtype=np.bool)
-    for indices in ids_dict.values():
-        mask[np.random.choice(indices)] = True
-
-    return mask
-
-
-def eval_map(distance_mat, query_ids, gallery_ids, query_cams, gallery_cams, interpolated_auc=False):
-    number_queries, _number_gallery = distance_mat.shape
-    # Sort and find correct matches
-    indices = np.argsort(distance_mat, axis=1)
-    matches = (gallery_ids[indices] == query_ids[:, np.newaxis])  # type: np.ndarray
-
-    # Compute AP for each query
-    average_precisions = []
-    for query in range(number_queries):
-        # Filter out the same id and same camera
-        valid = (gallery_ids[indices[query]] != query_ids[query]) | (gallery_cams[indices[query]] != query_cams[query])
-
-        y_true = matches[query, valid]
-        y_score = -distance_mat[query][indices[query]][valid]
-        if not np.any(y_true):
-            continue
-
-        average_precisions.append(binary_average_precision(y_true, y_score, interpolated_auc=interpolated_auc))
-
-    if not average_precisions:
-        raise RuntimeError("No valid query")
-
-    return np.mean(average_precisions)
-
-
-def eval_cmc(distance_mat, query_ids, gallery_ids, query_cams, gallery_cams, separate_camera_set=False,
-             single_gallery_shot=False, first_match_break=False, number_single_shot_repeats=10, top_k=100):
-    number_queries, _number_gallery = distance_mat.shape
-
-    if not single_gallery_shot:
-        number_single_shot_repeats = 1
-
-    # Sort and find correct matches
-    indices = np.argsort(distance_mat, axis=1)
-    matches = gallery_ids[indices] == query_ids[:, np.newaxis]  # type: np.ndarray
-
-    # Compute CMC for each query
-    ret = np.zeros(top_k)
-    num_valid_queries = 0
-    for query in range(number_queries):
-        valid = get_valid_subset(
-            gallery_cams, gallery_ids, query, indices, query_cams, query_ids, separate_camera_set
-        )  # type: np.ndarray
-
-        if not np.any(matches[query, valid]):
-            continue
-
-        ids_dict = defaultdict(list)
-        if single_gallery_shot:
-            gallery_indexes = gallery_ids[indices[query][valid]]
-            for j, x in zip(np.where(valid)[0], gallery_indexes):
-                ids_dict[x].append(j)
-
-        for _ in range(number_single_shot_repeats):
-            if single_gallery_shot:
-                # Randomly choose one instance for each id
-                # required for correct validation on CUHK datasets
-                # http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html
-                sampled = (valid & unique_sample(ids_dict, len(valid)))
-                index = np.nonzero(matches[query, sampled])[0]
-            else:
-                index = np.nonzero(matches[query, valid])[0]
-
-            delta = 1. / (len(index) * number_single_shot_repeats)
-            for j, k in enumerate(index):
-                if k - j >= top_k:
-                    break
-                if first_match_break:
-                    ret[k - j] += 1
-                    break
-                ret[k - j] += delta
-
-        num_valid_queries += 1
-
-    if num_valid_queries == 0:
-        raise RuntimeError("No valid query")
-
-    return ret.cumsum() / num_valid_queries
-
-
-def get_valid_subset(gallery_cams, gallery_ids, query_index, indices, query_cams, query_ids, separate_camera_set):
-    # Filter out the same id and same camera
-    valid = (
-        (gallery_ids[indices[query_index]] != query_ids[query_index]) |
-        (gallery_cams[indices[query_index]] != query_cams[query_index])
-    )
-    if separate_camera_set:
-        # Filter out samples from same camera
-        valid &= (gallery_cams[indices[query_index]] != query_cams[query_index])
-
-    return valid
-
-
-def get_embedding_distances(annotation, prediction, train=False):
-    image_indexes = {}
-    for i, pred in enumerate(prediction):
-        image_indexes[pred.identifier] = i
-
-    pairs = []
-    for image1 in annotation:
-        if train != image1.metadata.get("train", False):
-            continue
-
-        for image2 in image1.positive_pairs:
-            pairs.append(PairDesc(image_indexes[image1.identifier], image_indexes[image2], True))
-        for image2 in image1.negative_pairs:
-            pairs.append(PairDesc(image_indexes[image1.identifier], image_indexes[image2], False))
-
-    embed1 = np.asarray([prediction[idx].embedding for idx, _, _ in pairs])
-    embed2 = np.asarray([prediction[idx].embedding for _, idx, _ in pairs])
-
-    return 0.5 * (1 - np.sum(embed1 * embed2, axis=1)), pairs
-
-
-def binary_average_precision(y_true, y_score, interpolated_auc=True):
-    def _average_precision(y_true_, y_score_, sample_weight=None):
-        precision, recall, _ = precision_recall_curve(y_true_, y_score_, sample_weight)
-        if not interpolated_auc:
-            # Return the step function integral
-            # The following works because the last entry of precision is
-            # guaranteed to be 1, as returned by precision_recall_curve
-            return -1 * np.sum(np.diff(recall) * np.array(precision)[:-1])
-
-        return auc(recall, precision)
-
-    return _average_binary_score(_average_precision, y_true, y_score, average="macro")
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/semantic_segmentation.py b/tools/accuracy_checker/accuracy_checker/metrics/semantic_segmentation.py
deleted file mode 100644 (file)
index a6138ff..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..config import BoolField
-from ..representation import (
-    SegmentationAnnotation,
-    SegmentationPrediction,
-    BrainTumorSegmentationAnnotation,
-    BrainTumorSegmentationPrediction
-)
-from .metric import PerImageEvaluationMetric, BaseMetricConfig
-from ..utils import finalize_metric_result
-
-
-class SegmentationMetricConfig(BaseMetricConfig):
-    use_argmax = BoolField(optional=True)
-
-
-class SegmentationMetric(PerImageEvaluationMetric):
-    annotation_types = (SegmentationAnnotation, )
-    prediction_types = (SegmentationPrediction, )
-    _config_validator_type = SegmentationMetricConfig
-
-    CONFUSION_MATRIX_KEY = 'segmentation_confusion_matrix'
-
-    def evaluate(self, annotations, predictions):
-        raise NotImplementedError
-
-    def configure(self):
-        self.use_argmax = self.config.get('use_argmax', True)
-
-    def update(self, annotation, prediction):
-        n_classes = len(self.dataset.labels)
-        prediction_mask = np.argmax(prediction.mask, axis=0) if self.use_argmax else prediction.mask.astype('int64')
-
-        def update_confusion_matrix(confusion_matrix):
-            label_true = annotation.mask.flatten()
-            label_pred = prediction_mask.flatten()
-
-            mask = (label_true >= 0) & (label_true < n_classes)
-            hist = np.bincount(n_classes * label_true[mask].astype(int) + label_pred[mask], minlength=n_classes ** 2)
-            hist = hist.reshape(n_classes, n_classes)
-            confusion_matrix += hist
-
-            return confusion_matrix
-
-        self._update_state(update_confusion_matrix, self.CONFUSION_MATRIX_KEY, lambda: np.zeros((n_classes, n_classes)))
-
-
-class SegmentationAccuracy(SegmentationMetric):
-    __provider__ = 'segmentation_accuracy'
-
-    def evaluate(self, annotations, predictions):
-        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
-        return np.diag(confusion_matrix).sum() / confusion_matrix.sum()
-
-
-class SegmentationIOU(SegmentationMetric):
-    __provider__ = 'mean_iou'
-
-    def evaluate(self, annotations, predictions):
-        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
-        union = confusion_matrix.sum(axis=1) + confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
-        diagonal = np.diag(confusion_matrix)
-        iou = np.divide(diagonal, union, out=np.zeros_like(diagonal), where=union != 0)
-
-        values, names = finalize_metric_result(iou, list(self.dataset.labels.values()))
-        self.meta['names'] = names
-
-        return values
-
-
-class SegmentationMeanAccuracy(SegmentationMetric):
-    __provider__ = 'mean_accuracy'
-
-    def evaluate(self, annotations, predictions):
-        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
-        diagonal = np.diag(confusion_matrix)
-        per_class_count = confusion_matrix.sum(axis=1)
-        acc_cls = np.divide(diagonal, per_class_count, out=np.zeros_like(diagonal), where=per_class_count != 0)
-
-        values, names = finalize_metric_result(acc_cls, list(self.dataset.labels.values()))
-        self.meta['names'] = names
-
-        return values
-
-
-class SegmentationFWAcc(SegmentationMetric):
-    __provider__ = 'frequency_weighted_accuracy'
-
-    def evaluate(self, annotations, predictions):
-        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
-
-        union = (confusion_matrix.sum(axis=1) + confusion_matrix.sum(axis=0) - np.diag(confusion_matrix))
-        diagonal = np.diag(confusion_matrix)
-        iou = np.divide(diagonal, union, out=np.zeros_like(diagonal), where=union != 0)
-        freq = confusion_matrix.sum(axis=1) / confusion_matrix.sum()
-
-        return (freq[freq > 0] * iou[freq > 0]).sum()
-
-
-class SegmentationDSCAcc(PerImageEvaluationMetric):
-    __provider__ = 'dice'
-    annotation_types = (BrainTumorSegmentationAnnotation,)
-    prediction_types = (BrainTumorSegmentationPrediction,)
-    overall_metric = []
-
-    def update(self, annotation, prediction):
-        cnt = 0
-        for prediction_mask, annotation_mask in zip(prediction.mask, annotation.mask):
-            annotation_mask = np.transpose(annotation_mask, (2, 0, 1))
-            annotation_mask = np.expand_dims(annotation_mask, 0)
-            numerator = np.sum(prediction_mask * annotation_mask) * 2.0 + 1.0
-            denominator = np.sum(annotation_mask) + np.sum(prediction_mask) + 1.0
-            self.overall_metric.append(numerator / denominator)
-            cnt += 1
-
-    def evaluate(self, annotations, predictions):
-        return sum(self.overall_metric) / len(self.overall_metric)
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/text_detection.py b/tools/accuracy_checker/accuracy_checker/metrics/text_detection.py
deleted file mode 100644 (file)
index fec5b3c..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from .metric import PerImageEvaluationMetric, BaseMetricConfig
-from ..config import BoolField, NumberField
-from ..representation import TextDetectionPrediction, TextDetectionAnnotation
-from ..utils import polygon_from_points
-
-
-def get_union(detection_polygon, annotation_polygon):
-    area_prediction = detection_polygon.area
-    area_annotation = annotation_polygon.area
-    return area_prediction + area_annotation - get_intersection_area(detection_polygon, annotation_polygon)
-
-
-def get_intersection_over_union(detection_polygon, annotation_polygon):
-    union = get_union(detection_polygon, annotation_polygon)
-    intersection = get_intersection_area(detection_polygon, annotation_polygon)
-    return intersection / union if union != 0 else 0.0
-
-
-def get_intersection_area(detection_polygon, annotation_polygon):
-    return detection_polygon.intersection(annotation_polygon).area
-
-
-class TextDetectionMetricConfig(BaseMetricConfig):
-    iou_constrain = NumberField(min_value=0, max_value=1, optional=True)
-    ignore_difficult = BoolField(optional=True)
-    area_precision_constrain = NumberField(min_value=0, max_value=1, optional=True)
-
-
-class TextDetectionMetric(PerImageEvaluationMetric):
-    __provider__ = 'text_detection'
-
-    annotation_types = (TextDetectionAnnotation, )
-    prediction_types = (TextDetectionPrediction, )
-    _config_validator_type = TextDetectionMetricConfig
-
-    def configure(self):
-        self.iou_constrain = self.config.get('iou_constrain', 0.5)
-        self.area_precision_constrain = self.config.get('area_precision_constrain', 0.5)
-        self.ignore_difficult = self.config.get('ignore_difficult', False)
-        self.number_matched_detections = 0
-        self.number_valid_annotations = 0
-        self.number_valid_detections = 0
-
-    def update(self, annotation, prediction):
-        gt_polygons = list(map(polygon_from_points, annotation.points))
-        prediction_polygons = list(map(polygon_from_points, prediction.points))
-        num_gt = len(gt_polygons)
-        num_det = len(prediction_polygons)
-        gt_difficult_mask = np.full(num_gt, False)
-        prediction_difficult_mask = np.full(num_det, False)
-        num_det_matched = 0
-        if self.ignore_difficult:
-            gt_difficult_inds = annotation.metadata.get('difficult_boxes', [])
-            prediction_difficult_inds = prediction.metadata.get('difficult_boxes', [])
-            gt_difficult_mask[gt_difficult_inds] = True
-            prediction_difficult_mask[prediction_difficult_inds] = True
-            for det_id, detection_polygon in enumerate(prediction_polygons):
-                for gt_difficult_id in gt_difficult_inds:
-                    gt_difficult_polygon = gt_polygons[gt_difficult_id]
-                    intersected_area = get_intersection_area(gt_difficult_polygon, detection_polygon)
-                    pd_dimensions = detection_polygon.area
-                    precision = 0 if pd_dimensions == 0 else intersected_area / pd_dimensions
-
-                    if precision >= self.area_precision_constrain:
-                        prediction_difficult_mask[det_id] = True
-
-        if num_gt > 0 and num_det > 0:
-            iou_matrix = np.empty((num_gt, num_det))
-            gt_matched = np.zeros(num_gt, np.int8)
-            det_matched = np.zeros(num_det, np.int8)
-
-            for gt_id, gt_polygon in enumerate(gt_polygons):
-                for pred_id, pred_polygon in enumerate(prediction_polygons):
-                    iou_matrix[gt_id, pred_id] = get_intersection_over_union(pred_polygon, gt_polygon)
-                    not_matched_before = gt_matched[gt_id] == 0 and det_matched[pred_id] == 0
-                    not_difficult = not gt_difficult_mask[gt_id] and not prediction_difficult_mask[pred_id]
-                    if not_matched_before and not_difficult:
-                        if iou_matrix[gt_id, pred_id] >= self.iou_constrain:
-                            gt_matched[gt_id] = 1
-                            det_matched[pred_id] = 1
-                            num_det_matched += 1
-
-        num_ignored_gt = np.sum(gt_difficult_mask)
-        num_ignored_pred = np.sum(prediction_difficult_mask)
-        num_valid_gt = num_gt - num_ignored_gt
-        num_valid_pred = num_det - num_ignored_pred
-
-        self.number_matched_detections += num_det_matched
-        self.number_valid_annotations += num_valid_gt
-        self.number_valid_detections += num_valid_pred
-
-    def evaluate(self, annotations, predictions):
-        recall = (
-            0 if self.number_valid_annotations == 0
-            else float(self.number_matched_detections) / self.number_valid_annotations
-        )
-        precision = (
-            0 if self.number_valid_detections == 0
-            else float(self.number_matched_detections) / self.number_valid_detections
-        )
-
-        return 0 if recall + precision == 0 else 2 * recall * precision / (recall + precision)
diff --git a/tools/accuracy_checker/accuracy_checker/pipeline_connectors/__init__.py b/tools/accuracy_checker/accuracy_checker/pipeline_connectors/__init__.py
deleted file mode 100644 (file)
index 1e22b65..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-from .connectors import Connection, StageConnectionDescription, create_connection_description
-
-__all__ = [
-    'Connection',
-    'StageConnectionDescription',
-    'create_connection_description'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/pipeline_connectors/connectors.py b/tools/accuracy_checker/accuracy_checker/pipeline_connectors/connectors.py
deleted file mode 100644 (file)
index 318a3b8..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-from collections import namedtuple
-from ..dependency import ClassProvider
-from ..data_readers import DataRepresentation
-
-
-StageConnectionDescription = namedtuple('StageConnection', ['from_stage', 'to_stage', 'replace', 'connector'])
-
-
-class Connection:
-    def __init__(self, stages, description: StageConnectionDescription):
-        from_stage = description.from_stage
-        if from_stage is None:
-            for stage_index, stage in enumerate(stages):
-                if stage == description.to_stage:
-                    from_stage = list(stages.keys())[stage_index - 1]
-        self.from_stage_context = stages[from_stage].evaluation_context
-        self.to_stage_context = stages[description.to_stage].evaluation_context
-        self.replace_container = description.replace
-        if description.connector:
-            self.connector = BaseConnector.provide(description.connector)
-            self.replace_container = self.connector.replace_container
-
-    def __call__(self, *args, **kwargs):
-        shared_data = (
-            self.connector(self.from_stage_context)
-            if self.connector else getattr(self.from_stage_context, self.replace_container)
-        )
-        setattr(self.to_stage_context, self.replace_container, shared_data)
-
-
-class BaseConnector(ClassProvider):
-    __provider_type__ = 'connector'
-
-    def connect(self, context):
-        raise NotImplementedError
-
-    def __call__(self, context, *args, **kwargs):
-        return self.connect(context)
-
-
-class PredictionToDataConnector(BaseConnector):
-    __provider__ = 'prediction_to_data'
-
-    replace_container = 'data_batch'
-
-    def connect(self, context):
-        batch_predictions = context.prediction_batch
-        batch_identifiers = context.identifiers_batch
-        data_batch = []
-        for prediction_item, identifier in zip(batch_predictions, batch_identifiers):
-            prediction_key = list(prediction_item.keys())[0]
-            data_batch.append(DataRepresentation(prediction_item[prediction_key], identifier=identifier))
-
-        return data_batch
-
-
-def create_connection_description(configuration, stage_name):
-    config = configuration
-    if not isinstance(configuration, list):
-        config = [configuration]
-    for config_item in config:
-        connector = config_item.get('connector')
-        if connector:
-            connected_stage = config_item.get('stage')
-            return StageConnectionDescription(
-                from_stage=connected_stage, to_stage=stage_name, replace=None, connector=connector
-            )
-
-    return None
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/README.md b/tools/accuracy_checker/accuracy_checker/postprocessor/README.md
deleted file mode 100644 (file)
index 752276a..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-# Postprocessors
-
-Postprocessor is function which processes prediction and/or annotation data after model infer and before metric calculation. For correct work postprocessors require specific representation format. 
-(e. g. clip boxes postprocessor expects detection annotation and detection prediction for processing). 
-
-In case when you use complicated representation located in representation container, you can add options `annotation_source` and `prediction_source` in configuration file, 
-if you want process only specific representations, another way postprocessor will be used for all suitable representations. `annotation_source` and `prediction_source` should contain 
-comma separated list of annotation identifiers and output layer names respectively.
-
-Every postprocessor has parameters available for configuration. 
-
-Accuracy Checker supports following set of postprocessors:
-
-* `cast_to_int` - casting detection bounding box coordinates given in floating point format to integer. Supported representations: `DetectionAnotation`, `DetectionPrediction`, `TextDetectionAnnotation`, `TextDetectionPrediction`.
-  * `round_policy` - method for rounding: `nearest`, `greater`, `lower`, `nearest_to_zero`.
-*  `clip_boxes` - clipping detection bounding box sizes. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
-   * `dst_width` and `dst_height` - destination width and height for box clipping respectively. You can also use `size` instead in case when destination sizes are equal.
-   * `apply_to` - option which determines target boxes for processing (`annotation` for ground truth boxes and `prediction` for detection results, `all` for both).
-   * `bboxes_normalized` is flag which says that target bounding boxes are in normalized format.
-* `correct_yolo_v2_boxes` - resizing detection prediction bbox coordinates using specific for Yolo v2 approach. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
-   * `dst_width` and `dst_height` - destination width and height respectively. You can also use `size` instead in case when destination sizes are equal.
-*  `encode_segmentation_mask` - encoding segmentation label image as segmentation mask. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
-*  `resize_prediction_boxes` - resizing normalized detection prediction boxes according to image size. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
-*  `resize_segmentation_mask` - resizing segmentation mask. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
-    * `dst_width` and `dst_height` - destination width and height for box clipping respectively. You can also use `size` instead in case when destination sizes are equal. 
-       If any of these parameters are not specified, image size will be used as default.
-    * `apply_to` - determines target boxes for processing (`annotation` for ground truth boxes and `prediction` for detection results, `all` for both).
-*  `nms` - non-maximum suppression. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
-    * `overlap` - overlap threshold for merging detections.
-* `filter` - filtering data using different parameters. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
-    * `apply_to` - determines target boxes for processing (`annotation` for ground truth boxes and `prediction` for detection results, `all` for both).
-    * `remove_filtered` - removing filtered data. Annotations support ignoring filtered data without removing as default, in other cases filtered data will be removed automatically.
-    * Supported parameters for filtering: `labels`, `min_confidence`, `height_range`, `width_range`, `is_empty`, `min_visibility`, `aspect_ratio`, `area_ratio`, `area_range`.
-   Filtering by `height_range`, `width_range` are also available for `TextDetectionAnnotation`, `TextDetectionPrediction`, `area_range`  - for `PoseEstimationAnnotation`, `PoseEstimationPrediction` and `TextDetectionAnnotation`, `TextDetectionPrediction`.
-* `normalize_landmarks_points` - normalizing ground truth landmarks points. Supported representations: `FacialLandmarksAnnotation`, `FacialLandmarksPrediction`.
-    * `use_annotation_rect` - allows to use size of rectangle saved in annotation metadata for point scaling instead source image size.
-* `extend_segmentation_mask` - extending annotation segmentation mask to predicted mask size making border filled by specific value. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
-  * `filling_label` - value for filling border (default 255).
-* `zoom_segmentation_mask` - zooming segmentation mask. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
-  * `zoom` - size for zoom operation.
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/__init__.py b/tools/accuracy_checker/accuracy_checker/postprocessor/__init__.py
deleted file mode 100644 (file)
index c3a93bd..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .postprocessing_executor import PostprocessingExecutor
-
-from .filter import (
-    FilterPostprocessor,
-
-    FilterByHeightRange,
-    FilterByLabels,
-    FilterByMinConfidence,
-    FilterEmpty,
-    FilterByVisibility,
-    FilterByAspectRatio
-)
-
-from .cast_to_int import CastToInt
-from .clip_boxes import ClipBoxes
-from .nms import NMS
-from .resize_prediction_boxes import ResizePredictionBoxes
-from .correct_yolo_v2_boxes import CorrectYoloV2Boxes
-from .resize_segmentation_mask import ResizeSegmentationMask
-from .encode_segmentation_mask import EncodeSegMask
-from .normalize_landmarks_points import NormalizeLandmarksPoints
-from .clip_points import ClipPoints
-from .extend_segmentation_mask import ExtendSegmentationMask
-from .zoom_segmentation_mask import ZoomSegMask
-from .crop_segmentation_mask import CropSegmentationMask
-from .clip_segmentation_mask import ClipSegmentationMask
-
-__all__ = [
-    'PostprocessingExecutor',
-
-    'FilterPostprocessor',
-    'FilterByHeightRange',
-    'FilterByLabels',
-    'FilterByMinConfidence',
-    'FilterEmpty',
-    'FilterByVisibility',
-    'FilterByAspectRatio',
-
-    'CastToInt',
-    'ClipBoxes',
-    'NMS',
-    'ResizePredictionBoxes',
-    'CorrectYoloV2Boxes',
-
-    'ResizeSegmentationMask',
-    'EncodeSegMask',
-    'ExtendSegmentationMask',
-    'ZoomSegMask',
-    'CropSegmentationMask',
-    'ClipSegmentationMask',
-
-    'NormalizeLandmarksPoints'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/cast_to_int.py b/tools/accuracy_checker/accuracy_checker/postprocessor/cast_to_int.py
deleted file mode 100644 (file)
index 26468d4..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from functools import singledispatch
-from typing import Union
-import numpy as np
-from ..config import StringField
-from ..representation import DetectionAnnotation, DetectionPrediction, TextDetectionPrediction, TextDetectionAnnotation
-from .postprocessor import Postprocessor, BasePostprocessorConfig
-
-round_policies_func = {
-    'nearest': np.rint,
-    'nearest_to_zero': np.trunc,
-    'lower': np.floor,
-    'greater': np.ceil
-}
-
-
-class CastToIntConfigValidator(BasePostprocessorConfig):
-    round_policy = StringField(optional=True, choices=round_policies_func.keys())
-
-
-class CastToInt(Postprocessor):
-    __provider__ = 'cast_to_int'
-    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
-    prediction_types = (DetectionPrediction, TextDetectionPrediction)
-    _config_validator_type = CastToIntConfigValidator
-
-    def configure(self):
-        self.round_func = round_policies_func[self.config.get('round_policy', 'nearest')]
-
-    def process_image(self, annotation, prediction):
-        @singledispatch
-        def cast(entry):
-            pass
-
-        @cast.register(Union[DetectionAnnotation, DetectionPrediction])
-        def _(entry):
-            entry.x_mins = self.round_func(entry.x_mins)
-            entry.x_maxs = self.round_func(entry.x_maxs)
-            entry.y_mins = self.round_func(entry.y_mins)
-            entry.y_maxs = self.round_func(entry.y_maxs)
-
-        @cast.register(Union[TextDetectionAnnotation, TextDetectionPrediction])
-        def _(entry):
-            entry.points = self.round_func(entry.points)
-
-
-        for annotation_ in annotation:
-            cast(annotation_)
-
-        for prediction_ in prediction:
-            cast(prediction_)
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/clip_boxes.py b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_boxes.py
deleted file mode 100644 (file)
index 0d31750..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..config import BoolField, NumberField
-from ..representation import DetectionPrediction, DetectionAnnotation
-from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
-
-
-class ClipConfigValidator(PostprocessorWithTargetsConfigValidator):
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    size = NumberField(floats=False, optional=True, min_value=1)
-    boxes_normalized = BoolField(optional=True)
-
-
-class ClipBoxes(PostprocessorWithSpecificTargets):
-    __provider__ = 'clip_boxes'
-
-    annotation_types = (DetectionAnnotation, )
-    prediction_types = (DetectionPrediction, )
-    _config_validator_type = ClipConfigValidator
-
-    def configure(self):
-        size = self.config.get('size')
-        self.dst_height = size or self.config.get('dst_height')
-        self.dst_width = size or self.config.get('dst_width')
-        self.boxes_normalized = self.config.get('boxes_normalized', False)
-
-    def process_image(self, annotation, prediction):
-        target_height = self.dst_height or self.image_size[0]
-        target_width = self.dst_width or self.image_size[1]
-
-        max_width = target_width if not self.boxes_normalized else 1
-        max_height = target_height if not self.boxes_normalized else 1
-
-        for target in annotation:
-            self._clip_boxes(target, (0, max_width), (0, max_height))
-        for target in prediction:
-            self._clip_boxes(target, (0, max_width), (0, max_height))
-
-        return annotation, prediction
-
-    @staticmethod
-    def _clip_boxes(entry, width_range, height_range):
-        entry.x_mins = entry.x_mins.clip(width_range[0], width_range[1])
-        entry.x_maxs = entry.x_maxs.clip(width_range[0], width_range[1])
-        entry.y_mins = entry.y_mins.clip(height_range[0], height_range[1])
-        entry.y_maxs = entry.y_maxs.clip(height_range[0], height_range[1])
-
-        return entry
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/clip_points.py b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_points.py
deleted file mode 100644 (file)
index fdef034..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-""""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from ..config import BoolField, NumberField
-from ..representation import TextDetectionAnnotation, TextDetectionPrediction
-from ..utils import get_size_from_config
-from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
-
-
-class ClipPointsConfigValidator(PostprocessorWithTargetsConfigValidator):
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    size = NumberField(floats=False, optional=True, min_value=1)
-    points_normalized = BoolField(optional=True)
-
-
-class ClipPoints(PostprocessorWithSpecificTargets):
-    __provider__ = 'clip_points'
-
-    annotation_types = (TextDetectionAnnotation, )
-    prediction_types = (TextDetectionPrediction, )
-    _config_validator_type = ClipPointsConfigValidator
-
-    def configure(self):
-        self.dst_height, self.dst_width = get_size_from_config(self.config, allow_none=True)
-        self.points_normalized = self.config.get('points_normalized', False)
-
-    def process_image(self, annotation, prediction):
-        target_width = self.dst_width or self.image_size[1] - 1
-        target_height = self.dst_height or self.image_size[0] - 1
-
-        max_width = target_width if not self.points_normalized else 1
-        max_height = target_height if not self.points_normalized else 1
-        for target in annotation:
-            points = []
-            for polygon in target.points:
-                polygon[:, 0] = np.clip(polygon[:, 0], 0, max_width)
-                polygon[:, 1] = np.clip(polygon[:, 1], 0, max_height)
-                points.append(polygon)
-            target.points = points
-        for target in prediction:
-            points = []
-            for polygon in target.points:
-                polygon[:, 0] = np.clip(polygon[:, 0], 0, max_width)
-                polygon[:, 1] = np.clip(polygon[:, 1], 0, max_height)
-                points.append(polygon)
-            target.points = points
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/clip_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_segmentation_mask.py
deleted file mode 100644 (file)
index f5e097c..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
-from ..representation import BrainTumorSegmentationAnnotation, BrainTumorSegmentationPrediction
-from ..config import NumberField, ConfigError
-
-
-class ClipMaskConfigValidator(PostprocessorWithTargetsConfigValidator):
-    min_value = NumberField(floats=False, min_value=0, optional=True)
-    max_value = NumberField(floats=False)
-
-
-class ClipSegmentationMask(PostprocessorWithSpecificTargets):
-    __provider__ = 'clip_segmentation_mask'
-
-    annotation_types = (BrainTumorSegmentationAnnotation, )
-    prediction_types = (BrainTumorSegmentationPrediction, )
-    _config_validator_type = ClipMaskConfigValidator
-
-    def configure(self):
-        self.min_value = self.config.get('min_value', 0)
-        self.max_value = self.config['max_value']
-        if self.max_value < self.min_value:
-            raise ConfigError('max_value should be greater than min_value')
-
-    def process_image(self, annotation, prediction):
-        for target in annotation:
-            target.mask = np.clip(target.mask, a_min=self.min_value, a_max=self.max_value)
-
-        for target in prediction:
-            target.mask = np.clip(target.mask, a_min=self.min_value, a_max=self.max_value)
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/correct_yolo_v2_boxes.py b/tools/accuracy_checker/accuracy_checker/postprocessor/correct_yolo_v2_boxes.py
deleted file mode 100644 (file)
index 7ed247a..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..config import NumberField
-from .postprocessor import BasePostprocessorConfig, Postprocessor
-from ..representation import DetectionPrediction, DetectionAnnotation
-from ..utils import get_size_from_config
-
-
-class CorrectYoloV2BoxesConfigValidator(BasePostprocessorConfig):
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    size = NumberField(floats=False, optional=True, min_value=1)
-
-
-class CorrectYoloV2Boxes(Postprocessor):
-    __provider__ = 'correct_yolo_v2_boxes'
-
-    prediction_types = (DetectionPrediction, )
-    annotation_types = (DetectionAnnotation, )
-    _config_validator_type = CorrectYoloV2BoxesConfigValidator
-
-    def configure(self):
-        self.dst_height, self.dst_width = get_size_from_config(self.config)
-
-    def process_image(self, annotation, prediction):
-        dst_h, dst_w = self.dst_height, self.dst_width
-        # postprocessor always expects lists of annotations and predictions for the same image
-        # we do not need to get image sizes in cycle, because they are equal
-        img_h, img_w, _ = self.image_size
-
-        if (dst_w / img_w) < (dst_h / img_h):
-            new_w = dst_w
-            new_h = (img_h * dst_w) // img_w
-        else:
-            new_h = dst_h
-            new_w = (img_w * dst_h) // img_h
-
-        for prediction_ in prediction:
-            coordinates = zip(prediction_.x_mins, prediction_.y_mins, prediction_.x_maxs, prediction_.y_maxs)
-            for i, (x0, y0, x1, y1) in enumerate(coordinates):
-                box = [(x0 + x1) / 2.0, (y0 + y1) / 2.0, x1 - x0, y1 - y0]
-                box[0] = (box[0] - (dst_w - new_w) / (2.0 * dst_w)) * (dst_w / new_w)
-                box[1] = (box[1] - (dst_h - new_h) / (2.0 * dst_h)) * (dst_h / new_h)
-                box[2] *= dst_w / new_w
-                box[3] *= dst_h / new_h
-
-                box[0] *= img_w
-                box[1] *= img_h
-                box[2] *= img_w
-                box[3] *= img_h
-
-                prediction_.x_mins[i] = box[0] - box[2] / 2.0 + 1
-                prediction_.y_mins[i] = box[1] - box[3] / 2.0 + 1
-                prediction_.x_maxs[i] = box[0] + box[2] / 2.0 + 1
-                prediction_.y_maxs[i] = box[1] + box[3] / 2.0 + 1
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/crop_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/crop_segmentation_mask.py
deleted file mode 100644 (file)
index 9eb4341..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
-from ..representation import BrainTumorSegmentationAnnotation, BrainTumorSegmentationPrediction
-from ..config import NumberField
-from ..preprocessor import Crop3D
-from ..utils import get_size_3d_from_config
-
-
-class CropMaskConfigValidator(PostprocessorWithTargetsConfigValidator):
-    size = NumberField(floats=False, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    dst_volume = NumberField(floats=False, optional=True, min_value=1)
-
-
-class CropSegmentationMask(PostprocessorWithSpecificTargets):
-    __provider__ = 'crop_segmentation_mask'
-
-    annotation_types = (BrainTumorSegmentationAnnotation,)
-    prediction_types = (BrainTumorSegmentationPrediction,)
-    _config_validator_type = CropMaskConfigValidator
-
-    def configure(self):
-        self.dst_height, self.dst_width, self.dst_volume = get_size_3d_from_config(self.config)
-
-    def process_image(self, annotation, prediction):
-        for target in annotation:
-            target.mask = Crop3D.crop_center(target.mask, self.dst_height, self.dst_width, self.dst_volume)
-
-        for target in prediction:
-            target.mask = Crop3D.crop_center(target.mask, self.dst_height, self.dst_width, self.dst_volume)
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/encode_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/encode_segmentation_mask.py
deleted file mode 100644 (file)
index 736eb0e..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from .postprocessor import Postprocessor
-from ..representation import SegmentationAnnotation, SegmentationPrediction
-
-
-class EncodeSegMask(Postprocessor):
-    """
-    Encode segmentation label image as segmentation mask.
-    """
-
-    __provider__ = 'encode_segmentation_mask'
-
-    annotation_types = (SegmentationAnnotation, )
-    prediction_types = (SegmentationPrediction, )
-
-    def process_image(self, annotation, prediction):
-        segmentation_colors = self.meta.get("segmentation_colors")
-
-        if not segmentation_colors:
-            raise ValueError("No 'segmentation_colors' in dataset metadata.")
-
-        for annotation_ in annotation:
-            mask = annotation_.mask.astype(int)
-            encoded_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.int16)
-            for label, color in enumerate(segmentation_colors):
-                encoded_mask[np.where(np.all(mask == color, axis=-1))[:2]] = label
-                annotation_.mask = encoded_mask
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/extend_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/extend_segmentation_mask.py
deleted file mode 100644 (file)
index d8dad9d..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import math
-import cv2
-
-from .postprocessor import Postprocessor, BasePostprocessorConfig
-from ..representation import SegmentationAnnotation, SegmentationPrediction
-from ..config import NumberField, ConfigError
-
-
-class ExtendSegmentationMaskConfigValidator(BasePostprocessorConfig):
-    filling_label = NumberField(optional=True, floats=False)
-
-
-class ExtendSegmentationMask(Postprocessor):
-    """
-    Extend annotation segmentation mask to prediction size filling border with specific label.
-    """
-
-    __provider__ = 'extend_segmentation_mask'
-
-    annotation_types = (SegmentationAnnotation, )
-    prediction_types = (SegmentationPrediction, )
-    _config_validator_type = ExtendSegmentationMaskConfigValidator
-
-    def configure(self):
-        self.filling_label = self.config.get('filling_label', 255)
-
-    def process_image(self, annotation, prediction):
-        for annotation_, prediction_ in zip(annotation, prediction):
-            annotation_mask = annotation_.mask
-            dst_height, dst_width = prediction_.mask.shape[-2:]
-            height, width = annotation_mask.shape[-2:]
-            if dst_width < width or dst_height < height:
-                raise ConfigError('size for extending should be not less current mask size')
-            pad = []
-            pad.append(int(math.floor((dst_height - height) / 2.0)))
-            pad.append(int(math.floor((dst_width - width) / 2.0)))
-            pad.append(int(dst_height - height - pad[0]))
-            pad.append(int(dst_width - width - pad[1]))
-
-            extended_mask = cv2.copyMakeBorder(
-                annotation_mask, pad[0], pad[2], pad[1], pad[3], cv2.BORDER_CONSTANT, value=self.filling_label
-            )
-            annotation_.mask = extended_mask
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/filter.py b/tools/accuracy_checker/accuracy_checker/postprocessor/filter.py
deleted file mode 100644 (file)
index f0122b2..0000000
+++ /dev/null
@@ -1,316 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from functools import singledispatch
-from typing import Union
-import numpy as np
-
-from ..config import BaseField, BoolField
-from ..dependency import ClassProvider
-from ..postprocessor.postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
-from ..representation import (DetectionAnnotation, DetectionPrediction, TextDetectionAnnotation,
-                              TextDetectionPrediction, PoseEstimationPrediction, PoseEstimationAnnotation)
-from ..utils import in_interval, polygon_from_points, convert_to_range
-
-
-class FilterConfig(PostprocessorWithTargetsConfigValidator):
-    remove_filtered = BoolField(optional=True)
-
-    def __init__(self, config_uri, **kwargs):
-        super().__init__(config_uri, **kwargs)
-        for functor in BaseFilter.providers:
-            self.fields[functor] = BaseField(optional=True)
-
-
-class FilterPostprocessor(PostprocessorWithSpecificTargets):
-    __provider__ = 'filter'
-
-    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
-    prediction_types = (DetectionPrediction, TextDetectionPrediction)
-    _config_validator_type = FilterConfig
-
-    def __init__(self, *args, **kwargs):
-        self._filters = []
-        self.remove_filtered = False
-        super().__init__(*args, **kwargs)
-
-    def configure(self):
-        config = self.config.copy()
-        config.pop('type')
-        self.remove_filtered = config.pop('remove_filtered', False)
-        config.pop('annotation_source', None)
-        config.pop('prediction_source', None)
-        config.pop('apply_to', None)
-
-        for key, value in config.items():
-            self._filters.append(BaseFilter.provide(key, value))
-
-    def process_image(self, annotation, prediction):
-        for functor in self._filters:
-            for target in annotation:
-                self._filter_entry_by(target, functor)
-
-            for target in prediction:
-                self._filter_entry_by(target, functor)
-
-        return annotation, prediction
-
-    def _filter_entry_by(self, entry, functor):
-        ignored_key = 'difficult_boxes'
-
-        if not self.remove_filtered and isinstance(entry, (DetectionAnnotation, DetectionPrediction,
-                                                           TextDetectionAnnotation, TextDetectionPrediction,
-                                                           PoseEstimationAnnotation, PoseEstimationPrediction)):
-            ignored = entry.metadata.setdefault(ignored_key, [])
-            ignored.extend(functor(entry))
-        else:
-            entry.remove(functor(entry))
-
-        return entry
-
-
-class BaseFilter(ClassProvider):
-    __provider_type__ = 'filter'
-
-    def __init__(self, filter_arg):
-        self.filter_arg = filter_arg
-
-    def __call__(self, entry):
-        return self.apply_filter(entry, self.filter_arg)
-
-    def apply_filter(self, entry, filter_arg):
-        raise NotImplementedError
-
-
-class FilterByLabels(BaseFilter):
-    __provider__ = 'labels'
-
-    def apply_filter(self, entry, labels):
-        filtered = []
-        for index, label in enumerate(entry.labels):
-            if label in labels:
-                filtered.append(index)
-
-        return filtered
-
-
-class FilterByMinConfidence(BaseFilter):
-    __provider__ = 'min_confidence'
-
-    def apply_filter(self, entry, min_confidence):
-        filtered = []
-
-        if isinstance(entry, DetectionAnnotation):
-            return filtered
-
-        for index, score in enumerate(entry.scores):
-            if score < min_confidence:
-                filtered.append(index)
-
-        return filtered
-
-
-class FilterByHeightRange(BaseFilter):
-    __provider__ = 'height_range'
-
-    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
-    prediction_types = (DetectionPrediction, TextDetectionPrediction)
-
-    def apply_filter(self, entry, height_range):
-        @singledispatch
-        def filtering(entry_value, height_range_):
-            return []
-
-        @filtering.register(Union[DetectionAnnotation, DetectionPrediction])
-        def _(entry_value, height_range_):
-            filtered = []
-            for index, (y_min, y_max) in enumerate(zip(entry_value.y_mins, entry_value.y_maxs)):
-                height = y_max - y_min
-                if not in_interval(height, height_range_):
-                    filtered.append(index)
-
-            return filtered
-
-        @filtering.register(Union[TextDetectionAnnotation, TextDetectionPrediction])
-        def _(entry_values, height_range_):
-            filtered = []
-            for index, polygon_points in enumerate(entry_values.points):
-                left_bottom_point, left_top_point, right_top_point, right_bottom_point = polygon_points
-                left_side_height = np.linalg.norm(left_bottom_point - left_top_point)
-                right_side_height = np.linalg.norm(right_bottom_point - right_top_point)
-                if not in_interval(np.mean([left_side_height, right_side_height]), height_range_):
-                    filtered.append(index)
-
-            return filtered
-
-        return filtering(entry, convert_to_range(height_range))
-
-
-class FilterByWidthRange(BaseFilter):
-    __provider__ = 'width_range'
-
-    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
-    prediction_types = (DetectionPrediction, TextDetectionPrediction)
-
-    def apply_filter(self, entry, width_range):
-        @singledispatch
-        def filtering(entry_value, width_range_):
-            return []
-
-        @filtering.register(Union[DetectionAnnotation, DetectionPrediction])
-        def _(entry_value, width_range_):
-            filtered = []
-            for index, (x_min, x_max) in enumerate(zip(entry_value.x_mins, entry_value.x_maxs)):
-                width = x_max - x_min
-                if not in_interval(width, width_range_):
-                    filtered.append(index)
-
-            return filtered
-
-        @filtering.register(Union[TextDetectionAnnotation, TextDetectionPrediction])
-        def _(entry_values, width_range_):
-            filtered = []
-            for index, polygon_points in enumerate(entry_values.points):
-                left_bottom_point, left_top_point, right_top_point, right_bottom_point = polygon_points
-                top_width = np.linalg.norm(right_top_point - left_top_point)
-                bottom_width = np.linalg.norm(right_bottom_point - left_bottom_point)
-                if not in_interval(top_width, width_range_) or not in_interval(bottom_width, width_range_):
-                    filtered.append(index)
-
-            return filtered
-
-        return filtering(entry, convert_to_range(width_range))
-
-
-class FilterByAreaRange(BaseFilter):
-    __provider__ = 'area_range'
-
-    annotation_types = (TextDetectionAnnotation, PoseEstimationAnnotation)
-    prediction_types = (TextDetectionPrediction, )
-
-    def apply_filter(self, entry, area_range):
-        area_range = convert_to_range(area_range)
-
-        @singledispatch
-        def filtering(entry, area_range):
-            return []
-
-        @filtering.register
-        def _(entry: Union[PoseEstimationAnnotation, PoseEstimationPrediction], area_range):
-            filtered = []
-            areas = entry.areas
-            for area_id, area in enumerate(areas):
-                if not in_interval(area, area_range):
-                    filtered.append(area_id)
-            return filtered
-
-        @filtering.register
-        def _(entry: Union[TextDetectionAnnotation, TextDetectionPrediction]):
-            filtered = []
-            for index, polygon_points in enumerate(entry.points):
-                if not in_interval(polygon_from_points(polygon_points).area, area_range):
-                    filtered.append(index)
-            return filtered
-
-        return filtering(entry, area_range)
-
-
-class FilterEmpty(BaseFilter):
-    __provider__ = 'is_empty'
-
-    def apply_filter(self, entry: DetectionAnnotation, is_empty):
-        return np.where(np.bitwise_or(entry.x_maxs - entry.x_mins <= 0, entry.y_maxs - entry.y_mins <= 0))[0]
-
-
-class FilterByVisibility(BaseFilter):
-    __provider__ = 'min_visibility'
-
-    _VISIBILITY_LEVELS = {
-        'heavy occluded': 0,
-        'partially occluded': 1,
-        'visible': 2
-    }
-
-    def apply_filter(self, entry, min_visibility):
-        filtered = []
-        min_visibility_level = self.visibility_level(min_visibility)
-        for index, visibility in enumerate(entry.metadata.get('visibilities', [])):
-            if self.visibility_level(visibility) < min_visibility_level:
-                filtered.append(index)
-
-        return filtered
-
-    def visibility_level(self, visibility):
-        level = self._VISIBILITY_LEVELS.get(visibility)
-        if level is None:
-            message = 'Unknown visibility level "{}". Supported only "{}"'
-            raise ValueError(message.format(visibility, ','.join(self._VISIBILITY_LEVELS.keys())))
-
-        return level
-
-
-class FilterByAspectRatio(BaseFilter):
-    __provider__ = 'aspect_ratio'
-
-    def apply_filter(self, entry, aspect_ratio):
-        aspect_ratio = convert_to_range(aspect_ratio)
-
-        filtered = []
-        coordinates = zip(entry.x_mins, entry.y_mins, entry.x_maxs, entry.y_maxs)
-        for index, (x_min, y_min, x_max, y_max) in enumerate(coordinates):
-            ratio = (y_max - y_min) / np.maximum(x_max - x_min, np.finfo(np.float64).eps)
-            if not in_interval(ratio, aspect_ratio):
-                filtered.append(index)
-
-        return filtered
-
-
-class FilterByAreaRatio(BaseFilter):
-    __provider__ = 'area_ratio'
-
-    def apply_filter(self, entry, area_ratio):
-        area_ratio = convert_to_range(area_ratio)
-
-        filtered = []
-        if not isinstance(entry, DetectionAnnotation):
-            return filtered
-
-        image_size = entry.metadata.get('image_size')
-        if not image_size:
-            return filtered
-        image_size = image_size[0]
-
-        image_area = image_size[0] * image_size[1]
-
-        occluded_indices = entry.metadata.get('is_occluded', [])
-        coordinates = zip(entry.x_mins, entry.y_mins, entry.x_maxs, entry.y_maxs)
-        for index, (x_min, y_min, x_max, y_max) in enumerate(coordinates):
-            width, height = x_max - x_min, y_max - y_min
-            area = np.sqrt(float(width * height) / np.maximum(image_area, np.finfo(np.float64).eps))
-            if not in_interval(area, area_ratio) or index in occluded_indices:
-                filtered.append(index)
-
-        return filtered
-
-
-class FilterInvalidBoxes(BaseFilter):
-    __provider__ = 'invalid_boxes'
-
-    def apply_filter(self, entry, invalid_boxes):
-        infinite_mask_x = np.logical_or(~np.isfinite(entry.x_mins), ~np.isfinite(entry.x_maxs))
-        infinite_mask_y = np.logical_or(~np.isfinite(entry.y_mins), ~np.isfinite(entry.y_maxs))
-        infinite_mask = np.logical_or(infinite_mask_x, infinite_mask_y)
-
-        return np.argwhere(infinite_mask).reshape(-1).tolist()
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/nms.py b/tools/accuracy_checker/accuracy_checker/postprocessor/nms.py
deleted file mode 100644 (file)
index 3edc2e0..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..config import BoolField, NumberField
-from .postprocessor import BasePostprocessorConfig, Postprocessor
-from ..representation import DetectionPrediction, DetectionAnnotation
-
-
-class NMSConfigValidator(BasePostprocessorConfig):
-    overlap = NumberField(min_value=0, max_value=1, optional=True)
-    include_boundaries = BoolField(optional=True)
-    keep_top_k = NumberField(min_value=0, optional=True)
-
-
-class NMS(Postprocessor):
-    __provider__ = 'nms'
-
-    prediction_types = (DetectionPrediction, )
-    annotation_types = (DetectionAnnotation, )
-    _config_validator_type = NMSConfigValidator
-
-    def configure(self):
-        self.overlap = self.config.get('overlap', 0.5)
-        self.include_boundaries = self.config.get('include_boundaries', True)
-        self.keep_top_k = self.config.get('keep_top_k')
-
-    def process_image(self, annotations, predictions):
-        for prediction in predictions:
-            keep = self.nms(
-                prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs, prediction.scores,
-                self.overlap, self.include_boundaries, self.keep_top_k
-            )
-            prediction.remove([box for box in range(len(prediction.x_mins)) if box not in keep])
-
-        return annotations, predictions
-
-    @staticmethod
-    def nms(x1, y1, x2, y2, scores, thresh, include_boundaries=True, keep_top_k=None):
-        """
-        Pure Python NMS baseline.
-        """
-
-        b = 1 if include_boundaries else 0
-
-        areas = (x2 - x1 + b) * (y2 - y1 + b)
-        order = scores.argsort()[::-1]
-
-        if keep_top_k:
-            order = order[:keep_top_k]
-
-        keep = []
-        while order.size > 0:
-            i = order[0]
-            keep.append(i)
-
-            xx1 = np.maximum(x1[i], x1[order[1:]])
-            yy1 = np.maximum(y1[i], y1[order[1:]])
-            xx2 = np.minimum(x2[i], x2[order[1:]])
-            yy2 = np.minimum(y2[i], y2[order[1:]])
-
-            w = np.maximum(0.0, xx2 - xx1 + b)
-            h = np.maximum(0.0, yy2 - yy1 + b)
-            intersection = w * h
-
-            union = (areas[i] + areas[order[1:]] - intersection)
-            overlap = np.divide(intersection, union, out=np.zeros_like(intersection, dtype=float), where=union != 0)
-
-            order = order[np.where(overlap <= thresh)[0] + 1]
-
-        return keep
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/normalize_landmarks_points.py b/tools/accuracy_checker/accuracy_checker/postprocessor/normalize_landmarks_points.py
deleted file mode 100644 (file)
index 323fed3..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..config import BoolField
-from ..postprocessor.postprocessor import Postprocessor, BasePostprocessorConfig
-from ..representation import FacialLandmarksAnnotation, FacialLandmarksPrediction
-
-
-class NormalizeConfigValidator(BasePostprocessorConfig):
-    use_annotation_rect = BoolField(optional=True)
-
-
-class NormalizeLandmarksPoints(Postprocessor):
-    __provider__ = 'normalize_landmarks_points'
-
-    annotation_types = (FacialLandmarksAnnotation, )
-    prediction_types = (FacialLandmarksPrediction, )
-    _config_validator_type = NormalizeConfigValidator
-
-    def configure(self):
-        self.use_annotation_rect = self.config.get('use_annotation_rect', False)
-
-    def process_image(self, annotation, prediction):
-        for target in annotation:
-            height, width, _ = self.image_size
-            x_start, y_start = 0, 0
-            if self.use_annotation_rect:
-                resized_box = annotation[0].metadata.get('rect')
-                x_start, y_start, x_max, y_max = resized_box
-                width = x_max - x_start
-                height = y_max - y_start
-
-            target.x_values = (
-                (np.array(target.x_values, dtype=float) - x_start) / np.maximum(width, np.finfo(np.float64).eps)
-            )
-            target.y_values = (
-                (np.array(target.y_values, dtype=float) - y_start) / np.maximum(height, np.finfo(np.float64).eps)
-            )
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessing_executor.py b/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessing_executor.py
deleted file mode 100644 (file)
index 29bf854..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..config import ConfigValidator, StringField
-from ..utils import overrides, zipped_transform
-from .postprocessor import Postprocessor
-
-
-class PostprocessingExecutor:
-    def __init__(self, processors=None, dataset_name='custom', dataset_meta=None, state=None):
-        self._processors = []
-        self._image_processors = []
-        self._dataset_processors = []
-        self.dataset_meta = dataset_meta
-
-        self.state = state or {}
-
-        if not processors:
-            return
-
-        for config in processors:
-            postprocessor_config = PostprocessorConfig(
-                "{}.postprocessing".format(dataset_name),
-                on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT
-            )
-            postprocessor_config.validate(config)
-            postprocessor = Postprocessor.provide(config['type'], config, config['type'], self.dataset_meta, state)
-            self._processors.append(postprocessor)
-
-        allow_image_postprocessor = True
-        for processor in self._processors:
-            if overrides(processor, 'process_all', Postprocessor):
-                allow_image_postprocessor = False
-                self._dataset_processors.append(processor)
-            else:
-                if allow_image_postprocessor:
-                    self._image_processors.append(processor)
-                else:
-                    self._dataset_processors.append(processor)
-
-    def process_dataset(self, annotations, predictions):
-        for method in self._dataset_processors:
-            annotations, predictions = method.process_all(annotations, predictions)
-
-        return annotations, predictions
-
-    def process_image(self, annotation, prediction):
-        for method in self._image_processors:
-            annotation_entries, prediction_entries = method.get_entries(annotation, prediction)
-            method.process(annotation_entries, prediction_entries)
-
-        return annotation, prediction
-
-    def process_batch(self, annotations, predictions):
-        return zipped_transform(self.process_image, annotations, predictions)
-
-    def full_process(self, annotations, predictions):
-        return self.process_dataset(*self.process_batch(annotations, predictions))
-
-    @property
-    def has_dataset_processors(self):
-        return len(self._dataset_processors) != 0
-
-    def __call__(self, context, *args, **kwargs):
-        batch_annotation = context.annotation_batch
-        batch_prediction = context.prediction_batch
-        context.batch_annotation, context.batch_prediction = self.process_batch(batch_annotation, batch_prediction)
-
-
-class PostprocessorConfig(ConfigValidator):
-    type = StringField(choices=Postprocessor.providers)
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessor.py b/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessor.py
deleted file mode 100644 (file)
index 0e9fa73..0000000
+++ /dev/null
@@ -1,184 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import warnings
-from enum import Enum
-from ..representation import ContainerRepresentation
-from ..config import ConfigValidator, StringField, ConfigError, BaseField
-from ..dependency import ClassProvider
-from ..utils import (
-    zipped_transform,
-    string_to_list,
-    check_representation_type,
-    get_supported_representations,
-    enum_values
-)
-
-
-class BasePostprocessorConfig(ConfigValidator):
-    type = StringField()
-    annotation_source = BaseField(optional=True)
-    prediction_source = BaseField(optional=True)
-
-
-class Postprocessor(ClassProvider):
-    __provider_type__ = 'postprocessor'
-
-    annotation_types = ()
-    prediction_types = ()
-    _config_validator_type = BasePostprocessorConfig
-
-    def __init__(self, config, name=None, meta=None, state=None):
-        self.config = config
-        self.name = name
-        self.meta = meta
-        self.state = state
-        self.image_size = None
-
-        self.annotation_source = self.config.get('annotation_source')
-        if self.annotation_source and not isinstance(self.annotation_source, list):
-            self.annotation_source = string_to_list(self.annotation_source)
-
-        self.prediction_source = self.config.get('prediction_source')
-        if self.prediction_source and not isinstance(self.prediction_source, list):
-            self.prediction_source = string_to_list(self.prediction_source)
-
-        self.validate_config()
-        self.setup()
-
-    def __call__(self, *args, **kwargs):
-        return self.process_all(*args, **kwargs)
-
-    def setup(self):
-        self.configure()
-
-    def process_image(self, annotation, prediction):
-        raise NotImplementedError
-
-    def process(self, annotation, prediction):
-        image_size = annotation[0].metadata.get('image_size') if not None in annotation else None
-        self.image_size = None
-        if image_size:
-            self.image_size = image_size[0]
-        self.process_image(annotation, prediction)
-
-        return annotation, prediction
-
-    def process_all(self, annotations, predictions):
-        zipped_transform(self.process, zipped_transform(self.get_entries, annotations, predictions))
-        return annotations, predictions
-
-    def configure(self):
-        pass
-
-    def validate_config(self):
-        config_validator = self._config_validator_type(
-            self.name, on_extra_argument=BasePostprocessorConfig.ERROR_ON_EXTRA_ARGUMENT
-        )
-        config_validator.validate(self.config)
-
-    def get_entries(self, annotation, prediction):
-        message_not_found = '{}: {} is not found in container'
-        message_incorrect_type = "Incorrect type of {}. Postprocessor {} can work only with {}"
-
-        def resolve_container(container, supported_types, entry_name, sources=None):
-            if not isinstance(container, ContainerRepresentation):
-                if sources:
-                    message = 'Warning: {}_source can be applied only to container. Default value will be used'
-                    warnings.warn(message.format(entry_name))
-
-                return [container]
-
-            if not sources:
-                return get_supported_representations(container.values(), supported_types)
-
-            entries = []
-            for source in sources:
-                representation = container.get(source)
-                if not representation:
-                    raise ConfigError(message_not_found.format(entry_name, source))
-
-                if supported_types and not check_representation_type(representation, supported_types):
-                    raise TypeError(message_incorrect_type.format(entry_name, self.name, ','.join(supported_types)))
-
-                entries.append(representation)
-
-            return entries
-
-        annotation_entries = resolve_container(annotation, self.annotation_types, 'annotation', self.annotation_source)
-        prediction_entries = resolve_container(prediction, self.prediction_types, 'prediction', self.prediction_source)
-
-        return annotation_entries, prediction_entries
-
-
-class ApplyToOption(Enum):
-    ANNOTATION = 'annotation'
-    PREDICTION = 'prediction'
-    ALL = 'all'
-
-
-class PostprocessorWithTargetsConfigValidator(BasePostprocessorConfig):
-    apply_to = StringField(optional=True, choices=enum_values(ApplyToOption))
-
-
-class PostprocessorWithSpecificTargets(Postprocessor):
-    def setup(self):
-        apply_to = self.config.get('apply_to')
-        self.apply_to = ApplyToOption(apply_to) if apply_to else None
-
-        if (self.annotation_source or self.prediction_source) and self.apply_to:
-            raise ConfigError("apply_to and sources both provided. You need specify only one from them")
-
-        if not self.annotation_source and not self.prediction_source and not self.apply_to:
-            raise ConfigError("apply_to or annotation_source or prediction_source required for {}".format(self.name))
-
-        self.configure()
-
-    def process(self, annotation, prediction):
-        image_size = annotation[0].metadata.get('image_size') if not None in annotation else None
-        self.image_size = None
-        if image_size:
-            self.image_size = image_size[0]
-        target_annotations, target_predictions = None, None
-        if self.annotation_source or self.prediction_source:
-            target_annotations, target_predictions = self._choose_targets_using_sources(annotation, prediction)
-
-        if self.apply_to:
-            target_annotations, target_predictions = self._choose_targets_using_apply_to(annotation, prediction)
-
-        if not target_annotations and not target_predictions:
-            raise ValueError("Suitable targets for {} not found".format(self.name))
-
-        self.process_image(target_annotations, target_predictions)
-        return annotation, prediction
-
-    def _choose_targets_using_sources(self, annotations, predictions):
-        target_annotations = annotations if self.annotation_source else []
-        target_predictions = predictions if self.prediction_source else []
-
-        return target_annotations, target_predictions
-
-    def _choose_targets_using_apply_to(self, annotations, predictions):
-        targets_specification = {
-            ApplyToOption.ANNOTATION: (annotations, []),
-            ApplyToOption.PREDICTION: ([], predictions),
-            ApplyToOption.ALL: (annotations, predictions)
-        }
-
-        return targets_specification[self.apply_to]
-
-    def process_image(self, annotation, prediction):
-        raise NotImplementedError
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/resize_prediction_boxes.py b/tools/accuracy_checker/accuracy_checker/postprocessor/resize_prediction_boxes.py
deleted file mode 100644 (file)
index 2ce7b85..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..representation import DetectionPrediction, DetectionAnnotation
-from ..postprocessor.postprocessor import Postprocessor
-
-
-class ResizePredictionBoxes(Postprocessor):
-    """
-    Resize normalized predicted bounding boxes coordinates (i.e. from [0, 1] range) to input image shape.
-    """
-
-    __provider__ = 'resize_prediction_boxes'
-
-    prediction_types = (DetectionPrediction, )
-    annotation_types = (DetectionAnnotation, )
-
-    def process_image(self, annotations, predictions):
-        h, w, _ = self.image_size
-
-        for prediction in predictions:
-            prediction.x_mins *= w
-            prediction.x_maxs *= w
-            prediction.y_mins *= h
-            prediction.y_maxs *= h
-
-        return annotations, predictions
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/resize_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/resize_segmentation_mask.py
deleted file mode 100644 (file)
index 0369342..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from functools import singledispatch
-import scipy.misc
-import numpy as np
-
-from ..config import NumberField
-from ..utils import get_size_from_config
-from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
-from ..representation import SegmentationPrediction, SegmentationAnnotation
-
-
-class ResizeMaskConfigValidator(PostprocessorWithTargetsConfigValidator):
-    size = NumberField(floats=False, optional=True, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-
-class ResizeSegmentationMask(PostprocessorWithSpecificTargets):
-    __provider__ = 'resize_segmentation_mask'
-
-    annotation_types = (SegmentationAnnotation, )
-    prediction_types = (SegmentationPrediction, )
-    _config_validator_type = ResizeMaskConfigValidator
-
-    def configure(self):
-        self.dst_height, self.dst_width = get_size_from_config(self.config, allow_none=True)
-
-    def process_image(self, annotation, prediction):
-        target_height = self.dst_height or self.image_size[0]
-        target_width = self.dst_width or self.image_size[1]
-
-        @singledispatch
-        def resize_segmentation_mask(entry, height, width):
-            return entry
-
-        @resize_segmentation_mask.register(SegmentationPrediction)
-        def _(entry, height, width):
-            entry_mask = []
-            for class_mask in entry.mask:
-                resized_mask = scipy.misc.imresize(class_mask, (height, width), 'nearest')
-                entry_mask.append(resized_mask)
-            entry.mask = np.array(entry_mask)
-
-            return entry
-
-        @resize_segmentation_mask.register(SegmentationAnnotation)
-        def _(entry, height, width):
-            entry.mask = scipy.misc.imresize(entry.mask, (height, width), 'nearest')
-            return entry
-
-        for target in annotation:
-            resize_segmentation_mask(target, target_height, target_width)
-
-        for target in prediction:
-            resize_segmentation_mask(target, target_height, target_width)
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/zoom_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/zoom_segmentation_mask.py
deleted file mode 100644 (file)
index d7d76f4..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-"""
-Copyright (c) 2018 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from .postprocessor import Postprocessor, BasePostprocessorConfig
-from ..representation import SegmentationAnnotation, SegmentationPrediction
-from ..config import NumberField
-
-
-class ZoomSegMaskConfigValidator(BasePostprocessorConfig):
-    zoom = NumberField(floats=False, min_value=1)
-
-
-class ZoomSegMask(Postprocessor):
-    """
-    Zoom probabilities of segmentation prediction.
-    """
-
-    __provider__ = 'zoom_segmentation_mask'
-
-    annotation_types = (SegmentationAnnotation, )
-    prediction_types = (SegmentationPrediction, )
-    _config_validator_type = ZoomSegMaskConfigValidator
-
-    def configure(self):
-        self.zoom = self.config['zoom']
-
-    def process_image(self, annotation, prediction):
-        for annotation_, prediction_ in zip(annotation, prediction):
-            height, width = annotation_.mask.shape[:2]
-            prob = prediction_.mask
-            zoom_prob = np.zeros((prob.shape[0], height, width), dtype=np.float32)
-            for c in range(prob.shape[0]):
-                for h in range(height):
-                    for w in range(width):
-                        r0 = h // self.zoom
-                        r1 = r0 + 1
-                        c0 = w // self.zoom
-                        c1 = c0 + 1
-                        rt = float(h) / self.zoom - r0
-                        ct = float(w) / self.zoom - c0
-                        v0 = rt * prob[c, r1, c0] + (1 - rt) * prob[c, r0, c0]
-                        v1 = rt * prob[c, r1, c1] + (1 - rt) * prob[c, r0, c1]
-                        zoom_prob[c, h, w] = (1 - ct) * v0 + ct * v1
-            prediction_.mask = zoom_prob
-
-        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/README.md b/tools/accuracy_checker/accuracy_checker/preprocessor/README.md
deleted file mode 100644 (file)
index 28fe73f..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-# Preprocessors
-
-Preprocessor is function which processes input data before model inference.
-Every preprocessor has parameters available for configuration.
-Accuracy Checker supports following set of preprocessors:
-
-* `resize` - resizing the image to a new width and height.
-  * `dst_width` and `dst_height` are destination width and height for image resizing respectively.
-    You can also use `size` instead in case when destination sizes are equal for both dimensions.
-  * `use_pillow` parameter specifies usage of Pillow library for resizing.
-    Accuracy Checker uses OpenCV as default image reader.
-  * `interpolation` specifies method that will be used.
-    Possible values depend on image processing library:
-      * **OpenCV**: Nearest, Linear, Cubic, Area, Max, Lanczos4, Bits, Bits32
-      * **Pillow**: None, Nearest, Cubic, Bicubic, Box, Bilinear, Lanczos, Antialias, Hamming
-  * `aspect_ratio_scale` allows save image aspect ratio using one of these ways: 
-    - `width` - rescale width.
-    - `height` - rescale height.
-    - `greater` - rescale greater from image sizes.
-    - `fit_to_window` - adaptive resize for fit image into window with fixed size [dst_height x dst_width]
-
-* `normalization` - changing the range of pixel intensity values.
-  * `mean` values which will be subtracted from image channels.
-     You can specify one value for all channels or list of comma separated channel-wise values.
-  * `std` specifies values, on which pixels will be divided.
-     You can specify one value for all channels or list of comma separated channel-wise values.
-
-     These parameters support work with precomputed values of frequently used datasets (e.g. `cifar10` or `imagenet`).
-
-* `bgr_to_rgb` - reversing image channels. Convert image in BGR format to RGB.
-* `bgr_to_gray` - converting image in BGR to grayscale color space.
-* `flip` - image mirroring around specified axis.
-  * `mode` specifies the axis for flipping (`vertical` or `horizontal`).
-* `crop` - central cropping for image.
-  * `dst_width` and `dst_height` are destination width and height for image resizing respectively. You can also use `size` instead in case when destination sizes are equal.
-  * `use_pillow` parameter specifies usage of Pillow library for cropping.
-* `crop_rectangle` - cropping region of interest using coordinates given as annotation metadata.
-* `extend_around_rect` - scaling region of interest using annotation metadata.
-  * `augmentation_param` is scale factor for augmentation.
-* `point_aligment` - aligning keypoints stored in annotation metadata.
-  * `draw_points` - allows visualize points.
-  * `normalize` - allows to use normalization for keypoints.
-  * `dst_width` and `dst_height` are destination width and height for keypoints resizing respectively. You can also use `size` instead in case when destination sizes are equal.
-* `padding` - padding for image.
-  * `stride` - stride for padding.
-  * `pad_value` - value for filling space around original image.
-  * `dst_width` and `dst_height` are destination width and height for padded image respectively.
-    You can also use `size` instead in case when destination sizes are equal for both dimensions.
-  * `pad_type` - padding space location. Supported: `center`, `left_top`, `right_bottom` (Default is `center`).
-  * `use_numpy` - allow to use numpy for padding instead default OpenCV.
-* `tiling` - image tiling.
-  * `margin` - margin for tiled fragment of image.
-  * `dst_width` and `dst_height` are destination width and height of tiled fragment respectively.
-    You can also use `size` instead in case when destination sizes are equal for both dimensions.
-  
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/__init__.py b/tools/accuracy_checker/accuracy_checker/preprocessor/__init__.py
deleted file mode 100644 (file)
index 3999b41..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .preprocessing_executor import PreprocessingExecutor
-from .preprocessors import (
-    Preprocessor,
-
-    Resize,
-    Flip,
-    Normalize,
-    Crop,
-    BgrToRgb,
-    BgrToGray,
-    CropRect,
-    ExtendAroundRect,
-    PointAligner,
-    Tiling,
-    Crop3D,
-    Normalize3d
-)
-
-__all__ = [
-    'PreprocessingExecutor',
-
-    'Preprocessor',
-    'Resize',
-    'Flip',
-    'Normalize',
-    'Crop',
-    'BgrToRgb',
-    'BgrToGray',
-    'CropRect',
-    'ExtendAroundRect',
-    'PointAligner',
-    'Tiling',
-    'Crop3D',
-    'Normalize3d'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessing_executor.py b/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessing_executor.py
deleted file mode 100644 (file)
index 5f5b740..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from ..config import ConfigValidator, StringField
-from ..preprocessor.preprocessors import Preprocessor
-
-
-class PreprocessingExecutor:
-    def __init__(self, processors=None, dataset_name='custom', dataset_meta=None):
-        self.processors = []
-        self.dataset_meta = dataset_meta
-
-        if not processors:
-            return
-
-        identifier = 'type'
-        for processor in processors:
-            preprocessor_config = PreprocessorConfig(
-                "{}.preprocessors".format(dataset_name), on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT
-            )
-
-            type_ = processor.get(identifier)
-            preprocessor_config.validate(processor, type_)
-            preprocessor = Preprocessor.provide(processor[identifier], config=processor, name=type_)
-
-            self.processors.append(preprocessor)
-
-    def __call__(self, context, *args, **kwargs):
-        batch_data = context.data_batch
-        batch_annotation = context.annotation_batch
-        context.data_batch = self.process(batch_data, batch_annotation)
-
-    def process(self, images, batch_annotation=None):
-        for i, _ in enumerate(images):
-            for processor in self.processors:
-                images[i] = processor(
-                    image=images[i], annotation_meta=batch_annotation[i].metadata if batch_annotation else None
-                )
-
-        return images
-
-
-class PreprocessorConfig(ConfigValidator):
-    type = StringField(choices=Preprocessor.providers)
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessors.py b/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessors.py
deleted file mode 100644 (file)
index 675741e..0000000
+++ /dev/null
@@ -1,642 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import math
-import cv2
-import numpy as np
-from PIL import Image
-
-from ..config import BaseField, BoolField, ConfigValidator, NumberField, StringField, ConfigError
-from ..dependency import ClassProvider
-from ..utils import get_size_from_config, get_or_parse_value, string_to_tuple, get_size_3d_from_config
-
-
-class BasePreprocessorConfig(ConfigValidator):
-    type = StringField()
-
-
-class Preprocessor(ClassProvider):
-    __provider_type__ = 'preprocessor'
-    _config_validator_type = BasePreprocessorConfig
-
-    def __init__(self, config, name=None):
-        self.config = config
-        self.name = name
-
-        self.validate_config()
-        self.configure()
-
-    def __call__(self, *args, **kwargs):
-        return self.process(*args, **kwargs)
-
-    def process(self, image, annotation_meta=None):
-        raise NotImplementedError
-
-    def configure(self):
-        pass
-
-    def validate_config(self):
-        self._config_validator_type(
-            self.name, on_extra_argument=self._config_validator_type.ERROR_ON_EXTRA_ARGUMENT
-        ).validate(self.config)
-
-
-def scale_width(dst_width, dst_height, image_width, image_height,):
-    return int(dst_width * image_width / image_height), dst_height
-
-
-def scale_height(dst_width, dst_height, image_width, image_height):
-    return dst_width, int(dst_height * image_height / image_width)
-
-
-def scale_greater(dst_width, dst_height, image_width, image_height):
-    if image_height > image_width:
-        return scale_height(dst_width, dst_height, image_width, image_height)
-    return scale_width(dst_width, dst_height, image_width, image_height)
-
-
-def scale_fit_to_window(dst_width, dst_height, image_width, image_height):
-    im_scale = min(dst_height / image_height, dst_width / image_width)
-    return int(im_scale * image_width), int(im_scale * image_height)
-
-
-PILLOW_INTERPOLATION = {
-    'NEAREST': Image.NEAREST,
-    'NONE': Image.NONE,
-    'BOX': Image.BOX,
-    'BILINEAR': Image.BILINEAR,
-    'LINEAR': Image.LINEAR,
-    'HAMMING': Image.HAMMING,
-    'BICUBIC': Image.BICUBIC,
-    'CUBIC': Image.CUBIC,
-    'LANCZOS': Image.LANCZOS,
-    'ANTIALIAS': Image.ANTIALIAS,
-}
-
-OPENCV_INTERPOLATION = {
-    'NEAREST': cv2.INTER_NEAREST,
-    'LINEAR': cv2.INTER_LINEAR,
-    'CUBIC': cv2.INTER_CUBIC,
-    'AREA': cv2.INTER_AREA,
-    'MAX': cv2.INTER_MAX,
-    'BITS': cv2.INTER_BITS,
-    'BITS2': cv2.INTER_BITS2,
-    'LANCZOS4': cv2.INTER_LANCZOS4,
-}
-
-ASPECT_RATIO_SCALE = {
-    'width': scale_width,
-    'height': scale_height,
-    'greater': scale_greater,
-    'fit_to_window': scale_fit_to_window
-}
-
-class ResizeConfigValidator(BasePreprocessorConfig):
-    size = NumberField(floats=False, optional=True, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    aspect_ratio_scale = StringField(choices=ASPECT_RATIO_SCALE.keys(), optional=True)
-    interpolation = StringField(
-        choices=set(PILLOW_INTERPOLATION) | set(OPENCV_INTERPOLATION), optional=True
-    )
-    use_pillow = BoolField(optional=True)
-
-
-class Resize(Preprocessor):
-    __provider__ = 'resize'
-    _config_validator_type = ResizeConfigValidator
-
-    def configure(self):
-        self.dst_height, self.dst_width = get_size_from_config(self.config)
-        self.use_pil = self.config.get('use_pillow', False)
-
-        interpolation = self.config.get('interpolation', 'LINEAR')
-
-        self.scaling_func = ASPECT_RATIO_SCALE.get(self.config.get('aspect_ratio_scale'))
-
-        if self.use_pil and interpolation.upper() not in PILLOW_INTERPOLATION:
-            raise ValueError("Incorrect interpolation option: {} for resize preprocessing".format(interpolation))
-        if not self.use_pil and interpolation.upper() not in OPENCV_INTERPOLATION:
-            raise ValueError("Incorrect interpolation option: {} for resize preprocessing".format(interpolation))
-
-        if self.use_pil:
-            self.interpolation = PILLOW_INTERPOLATION[interpolation]
-        else:
-            self.interpolation = OPENCV_INTERPOLATION[interpolation]
-
-    def process(self, image, annotation_meta=None):
-        data = image.data
-        new_height, new_width = self.dst_height, self.dst_width
-
-        def process_data(data, new_height, new_width, scale_func, use_pil, interpolation):
-            if scale_func:
-                image_h, image_w = data.shape[:2]
-                new_width, new_height = self.scaling_func(self.dst_width, self.dst_height, image_w, image_h)
-
-            image.metadata['preferable_width'] = max(new_width, self.dst_width)
-            image.metadata['preferable_height'] = max(new_height, self.dst_height)
-
-            if use_pil:
-                data = Image.fromarray(data)
-                data = data.resize((new_width, new_height), interpolation)
-                data = np.array(data)
-                return data
-
-            data = cv2.resize(data, (new_width, new_height), interpolation=interpolation).astype(np.float32)
-            if len(data.shape) == 2:
-                data = np.expand_dims(data, axis=-1)
-
-            return data
-
-        image.data = (
-            process_data(data, new_height, new_width, self.scaling_func, self.use_pil, self.interpolation)
-            if not isinstance(data, list) else [
-                process_data(data_fragment, new_height, new_width, self.scaling_func, self.use_pil, self.interpolation)
-                for data_fragment in data
-            ]
-        )
-
-        return image
-
-
-class NormalizeConfigValidator(BasePreprocessorConfig):
-    mean = BaseField(optional=True)
-    std = BaseField(optional=True)
-
-class Normalize(Preprocessor):
-    __provider__ = 'normalization'
-
-    PRECOMPUTED_MEANS = {
-        'imagenet': (104.00698793, 116.66876762, 122.67891434),
-        'cifar10': (125.307, 122.961, 113.8575),
-    }
-
-    PRECOMPUTED_STDS = {
-        'imagenet': (104.00698793, 116.66876762, 122.67891434),
-        'cifar10': (125.307, 122.961, 113.8575),
-    }
-
-    _config_validator_type = NormalizeConfigValidator
-
-    def configure(self):
-        self.mean = get_or_parse_value(self.config.get('mean'), Normalize.PRECOMPUTED_MEANS)
-        self.std = get_or_parse_value(self.config.get('std'), Normalize.PRECOMPUTED_STDS)
-        if not self.mean and not self.std:
-            raise ConfigError('mean or std value should be provided')
-
-        if self.std and 0 in self.std:
-            raise ConfigError('std value should not contain 0')
-
-        if self.mean and not (len(self.mean) == 3 or len(self.mean) == 1):
-            raise ConfigError('mean should be one value or comma-separated list channel-wise values')
-
-        if self.std and not (len(self.std) == 3 or len(self.std) == 1):
-            raise ConfigError('std should be one value or comma-separated list channel-wise values')
-
-    def process(self, image, annotation_meta=None):
-        def process_data(data, mean, std):
-            if self.mean:
-                data = data - mean
-            if self.std:
-                data = data / std
-
-            return data
-
-        image.data = process_data(image.data, self.mean, self.std) if not isinstance(image.data, list) else [
-            process_data(data_fragment, self.mean, self.std) for data_fragment in image.data
-        ]
-
-        return image
-
-
-class BgrToRgb(Preprocessor):
-    __provider__ = 'bgr_to_rgb'
-
-    def process(self, image, annotation_meta=None):
-        def process_data(data):
-            return cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
-        image.data = process_data(image.data) if not isinstance(image.data, list) else [
-            process_data(fragment) for fragment in image.data
-        ]
-        return image
-
-
-class BgrToGray(Preprocessor):
-    __provider__ = 'bgr_to_gray'
-
-    def process(self, image, annotation_meta=None):
-        image.data = np.expand_dims(cv2.cvtColor(image.data, cv2.COLOR_BGR2GRAY).astype(np.float32), -1)
-        return image
-
-FLIP_MODES = {
-    'horizontal': 0,
-    'vertical': 1
-}
-
-
-class FlipConfigValidator(BasePreprocessorConfig):
-    mode = StringField(choices=FLIP_MODES.keys())
-
-
-class Flip(Preprocessor):
-    __provider__ = 'flip'
-
-    _config_validator_type = FlipConfigValidator
-
-    def configure(self):
-        mode = self.config.get('mode', 'horizontal')
-        if isinstance(mode, str):
-            self.mode = FLIP_MODES[mode]
-
-    def process(self, image, annotation_meta=None):
-        image.data = cv2.flip(image.data, self.mode)
-        return image
-
-
-class CropConfigValidator(BasePreprocessorConfig):
-    size = NumberField(floats=False, optional=True, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    use_pillow = BoolField(optional=True)
-
-
-class Crop(Preprocessor):
-    __provider__ = 'crop'
-    _config_validator_type = CropConfigValidator
-
-    def configure(self):
-        self.use_pillow = self.config.get('use_pillow', False)
-        self.dst_height, self.dst_width = get_size_from_config(self.config)
-
-    def process(self, image, annotation_meta=None):
-        data = image.data
-
-        def process_data(data, dst_height, dst_width, use_pillow):
-            height, width = data.shape[:2]
-            if use_pillow:
-                i = int(round((height - self.dst_height) / 2.))
-                j = int(round((width - self.dst_width) / 2.))
-                croped_data = Image.fromarray(data).crop((j, i, j + self.dst_width, i + self.dst_height))
-                data = np.array(croped_data)
-                return data
-
-            if width < dst_width or height < dst_height:
-                resized = np.array([width, height])
-                if resized[0] < dst_width:
-                    resized = resized * dst_width / resized[0]
-                if resized[1] < dst_height:
-                    resized = resized * dst_height / resized[1]
-
-                data = cv2.resize(data, tuple(np.ceil(resized).astype(int)))
-
-            height, width, _ = data.shape
-            start_height = (height - dst_height) // 2
-            start_width = (width - dst_width) // 2
-
-            return data[start_height:start_height + dst_height, start_width:start_width + dst_width]
-
-        image.data = process_data(
-            data, self.dst_height, self.dst_width, self.use_pillow
-        ) if not isinstance(data, list) else [
-            process_data(fragment, self.dst_height, self.dst_width, self.use_pillow) for fragment in image.data
-        ]
-        return image
-
-
-class CropRect(Preprocessor):
-    __provider__ = 'crop_rect'
-
-    def process(self, image, annotation_meta=None):
-        rect = annotation_meta.get('rect')
-        if not rect:
-            return image
-
-        rows, cols = image.data.shape[:2]
-        rect_x_min, rect_y_min, rect_x_max, rect_y_max = rect
-        start_width, start_height = max(0, rect_x_min), max(0, rect_y_min)
-
-        width = min(start_width + (rect_x_max - rect_x_min), cols)
-        height = min(start_height + (rect_y_max - rect_y_min), rows)
-
-        image.data = image.data[start_height:height, start_width:width]
-        return image
-
-
-class ExtendAroundRectConfigValidator(BasePreprocessorConfig):
-    augmentation_param = NumberField(floats=True, optional=True)
-
-
-class ExtendAroundRect(Preprocessor):
-    __provider__ = 'extend_around_rect'
-    _config_validator_type = ExtendAroundRectConfigValidator
-
-    def configure(self):
-        self.augmentation_param = self.config.get('augmentation_param', 0)
-
-    def process(self, image, annotation_meta=None):
-        rect = annotation_meta.get('rect')
-        rows, cols = image.data.shape[:2]
-
-        rect_x_left, rect_y_top, rect_x_right, rect_y_bottom = rect or (0, 0, cols, rows)
-        rect_x_left = max(0, rect_x_left)
-        rect_y_top = max(0, rect_y_top)
-        rect_x_right = min(rect_x_right, cols)
-        rect_y_bottom = min(rect_y_bottom, rows)
-
-        rect_w = rect_x_right - rect_x_left
-        rect_h = rect_y_bottom - rect_y_top
-
-        width_extent = (rect_x_right - rect_x_left + 1) * self.augmentation_param
-        height_extent = (rect_y_bottom - rect_y_top + 1) * self.augmentation_param
-        rect_x_left = rect_x_left - width_extent
-        border_left = abs(min(0, rect_x_left))
-        rect_x_left = int(max(0, rect_x_left))
-
-        rect_y_top = rect_y_top - height_extent
-        border_top = abs(min(0, rect_y_top))
-        rect_y_top = int(max(0, rect_y_top))
-
-        rect_y_bottom += border_top
-        rect_y_bottom = int(rect_y_bottom + height_extent + 0.5)
-        border_bottom = abs(max(0, rect_y_bottom - rows))
-
-        rect_x_right += border_left
-        rect_x_right = int(rect_x_right + width_extent + 0.5)
-        border_right = abs(max(0, rect_x_right - cols))
-
-        image.data = cv2.copyMakeBorder(
-            image.data, int(border_top), int(border_bottom), int(border_left), int(border_right), cv2.BORDER_REPLICATE
-        )
-
-        rect = (
-            int(rect_x_left), int(rect_y_top),
-            int(rect_x_left) + int(rect_w + width_extent * 2), int(rect_y_top) + int(rect_h + height_extent * 2)
-        )
-        annotation_meta['rect'] = rect
-
-        return image
-
-
-class PointAlignerConfigValidator(BasePreprocessorConfig):
-    draw_points = BoolField(optional=True)
-    normalize = BoolField(optional=True)
-    size = NumberField(floats=False, optional=True, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-
-class PointAligner(Preprocessor):
-    __provider__ = 'point_alignment'
-    _config_validator_type = PointAlignerConfigValidator
-
-    ref_landmarks = np.array([
-        30.2946 / 96, 51.6963 / 112,
-        65.5318 / 96, 51.5014 / 112,
-        48.0252 / 96, 71.7366 / 112,
-        33.5493 / 96, 92.3655 / 112,
-        62.7299 / 96, 92.2041 / 112
-    ], dtype=np.float64).reshape(5, 2)
-
-    def configure(self):
-        self.draw_points = self.config.get('draw_points', False)
-        self.normalize = self.config.get('normalize', True)
-        self.dst_height, self.dst_width = get_size_from_config(self.config)
-
-    def process(self, image, annotation_meta=None):
-        keypoints = annotation_meta.get('keypoints')
-        image.data = self.align(image.data, keypoints)
-        return image
-
-    def align(self, img, points):
-        if not points:
-            return img
-
-        points_number = len(points) // 2
-        points = np.array(points).reshape(points_number, 2)
-
-        inp_shape = [1., 1.]
-        if self.normalize:
-            inp_shape = img.shape
-
-        keypoints = points.copy().astype(np.float64)
-        keypoints[:, 0] *= (float(self.dst_width) / inp_shape[1])
-        keypoints[:, 1] *= (float(self.dst_height) / inp_shape[0])
-
-        keypoints_ref = np.zeros((points_number, 2), dtype=np.float64)
-        keypoints_ref[:, 0] = self.ref_landmarks[:, 0] * self.dst_width
-        keypoints_ref[:, 1] = self.ref_landmarks[:, 1] * self.dst_height
-
-        transformation_matrix = self.transformation_from_points(np.array(keypoints_ref), np.array(keypoints))
-        img = cv2.resize(img, (self.dst_width, self.dst_height))
-        if self.draw_points:
-            for point in keypoints:
-                cv2.circle(img, (int(point[0]), int(point[1])), 5, (255, 0, 0), -1)
-
-        return cv2.warpAffine(img, transformation_matrix, (self.dst_width, self.dst_height), flags=cv2.WARP_INVERSE_MAP)
-
-    @staticmethod
-    def transformation_from_points(points1, points2):
-        points1 = np.matrix(points1.astype(np.float64))
-        points2 = np.matrix(points2.astype(np.float64))
-
-        c1 = np.mean(points1, axis=0)
-        c2 = np.mean(points2, axis=0)
-        points1 -= c1
-        points2 -= c2
-        s1 = np.std(points1)
-        s2 = np.std(points2)
-        points1 /= np.maximum(s1, np.finfo(np.float64).eps)
-        points2 /= np.maximum(s1, np.finfo(np.float64).eps)
-        points_std_ratio = s2 / np.maximum(s1, np.finfo(np.float64).eps)
-
-        u, _, vt = np.linalg.svd(points1.T * points2)
-        r = (u * vt).T
-
-        return np.hstack((points_std_ratio * r, c2.T - points_std_ratio * r * c1.T))
-
-
-def center_padding(dst_width, dst_height, width, height):
-    pad = [int(math.floor((dst_height - height) / 2.0)), int(math.floor((dst_width - width) / 2.0))]
-    pad.extend([dst_height - height - pad[0], dst_width - width - pad[1]])
-
-    return pad
-
-
-def right_bottom_padding(dst_width, dst_height, width, height):
-    return [0, 0, dst_height - height, dst_width - width]
-
-
-def left_top_padding(dst_width, dst_height, width, height):
-    return [dst_height - height, dst_width - width, 0, 0]
-
-
-padding_func = {
-    'center': center_padding,
-    'left_top': left_top_padding,
-    'right_bottom': right_bottom_padding
-}
-
-
-class PaddingConfigValidator(BasePreprocessorConfig):
-    stride = NumberField(floats=False, min_value=1, optional=True)
-    pad_value = StringField(optional=True)
-    size = NumberField(floats=False, optional=True, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    pad_type = StringField(choices=padding_func.keys(), optional=True)
-    use_numpy = BoolField(optional=True)
-
-
-class Padding(Preprocessor):
-    __provider__ = 'padding'
-    _config_validator_type = PaddingConfigValidator
-
-    def configure(self):
-        self.stride = self.config.get('stride', 1)
-        pad_val = self.config.get('pad_value', '0,0,0')
-        if isinstance(pad_val, int):
-            self.pad_value = (pad_val, pad_val, pad_val)
-        if isinstance(pad_val, str):
-            self.pad_value = string_to_tuple(pad_val, int)
-        self.dst_height, self.dst_width = get_size_from_config(self.config, allow_none=True)
-        self.pad_func = padding_func[self.config.get('pad_type', 'center')]
-        self.use_numpy = self.config.get('use_numpy', False)
-
-    def process(self, image, annotation_meta=None):
-        height, width, _ = image.data.shape
-        pref_height = self.dst_height or image.metadata.get('preferable_height', height)
-        pref_width = self.dst_width or image.metadata.get('preferable_width', width)
-        height = min(height, pref_height)
-        pref_height = math.ceil(pref_height / float(self.stride)) * self.stride
-        pref_width = max(pref_width, width)
-        pref_width = math.ceil(pref_width / float(self.stride)) * self.stride
-        pad = self.pad_func(pref_width, pref_height, width, height)
-        image.metadata['padding'] = pad
-        padding_realization_func = self._opencv_padding if not self.use_numpy else self._numpy_padding
-        image.data = padding_realization_func(image.data, pad)
-
-        return image
-
-    def _opencv_padding(self, image, pad):
-        return cv2.copyMakeBorder(
-            image, pad[0], pad[2], pad[1], pad[3], cv2.BORDER_CONSTANT, value=self.pad_value
-        )
-
-    def _numpy_padding(self, image, pad):
-        pad_values = (
-            (self.pad_value[0], self.pad_value[0]),
-            (self.pad_value[1], self.pad_value[1]),
-            (self.pad_value[2], self.pad_value[2])
-        )
-        return np.pad(
-            image, ((pad[0], pad[2]), (pad[1], pad[3]), (0, 0)),
-            mode='constant', constant_values=pad_values
-        )
-
-
-class TillingConfigValidator(BasePreprocessorConfig):
-    margin = NumberField(floats=False, min_value=1)
-    size = NumberField(floats=False, optional=True, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-
-
-class Tiling(Preprocessor):
-    __provider__ = 'tiling'
-    _config_validator_type = TillingConfigValidator
-
-    def configure(self):
-        self.dst_height, self.dst_width = get_size_from_config(self.config)
-        self.margin = self.config['margin']
-
-    def process(self, image, annotation_meta=None):
-        data = image.data
-        image_size = data.shape
-        output_height = self.dst_height - 2 * self.margin
-        output_width = self.dst_width - 2 * self.margin
-        data = cv2.copyMakeBorder(data, *np.full(4, self.margin), cv2.BORDER_REFLECT_101)
-        num_tiles_h = image_size[0] // output_height + (1 if image_size[0] % output_height else 0)
-        num_tiles_w = image_size[1] // output_width + (1 if image_size[1] % output_width else 0)
-        tiled_data = []
-        for height in range(num_tiles_h):
-            for width in range(num_tiles_w):
-                offset = [output_height * height, output_width * width]
-                tile = data[offset[0]:offset[0] + self.dst_height, offset[1]:offset[1] + self.dst_width, :]
-                margin = [0, self.dst_height - tile.shape[0], 0, self.dst_width - tile.shape[1]]
-                tile = cv2.copyMakeBorder(tile, *margin, cv2.BORDER_REFLECT_101)
-                tiled_data.append(tile)
-        image.data = tiled_data
-        image.metadata['tiles_shape'] = (num_tiles_h, num_tiles_w)
-        image.metadata['multi_infer'] = True
-
-        return image
-
-
-class Crop3DConfigValidator(BasePreprocessorConfig):
-    size = NumberField(floats=False, min_value=1)
-    dst_width = NumberField(floats=False, optional=True, min_value=1)
-    dst_height = NumberField(floats=False, optional=True, min_value=1)
-    dst_volume = NumberField(floats=False, optional=True, min_value=1)
-
-
-class Crop3D(Preprocessor):
-    __provider__ = 'crop3d'
-    _config_validator_type = Crop3DConfigValidator
-
-    def configure(self):
-        self.dst_height, self.dst_width, self.dst_volume = get_size_3d_from_config(self.config)
-
-    def process(self, image, annotation_meta=None):
-        image.data = self.crop_center(image.data, self.dst_height, self.dst_width, self.dst_volume)
-        return image
-
-    @staticmethod
-    def crop_center(img, cropx, cropy, cropz):
-
-        z, y, x, _ = img.shape
-
-        # Make sure starting index is >= 0
-        startx = max(x // 2 - (cropx // 2), 0)
-        starty = max(y // 2 - (cropy // 2), 0)
-        startz = max(z // 2 - (cropz // 2), 0)
-
-        # Make sure ending index is <= size
-        endx = min(startx + cropx, x)
-        endy = min(starty + cropy, y)
-        endz = min(startz + cropz, z)
-
-        return img[startz:endz, starty:endy, startx:endx, :]
-
-
-class Normalize3d(Preprocessor):
-    __provider__ = "normalize3d"
-
-    def process(self, image, annotation_meta=None):
-        data = self.normalize_img(image.data)
-        image_list = []
-        for img in data:
-            image_list.append(img)
-        image.data = image_list
-        image.metadata['multi_infer'] = True
-
-        return image
-
-    @staticmethod
-    def normalize_img(img):
-        for channel in range(img.shape[3]):
-            channel_val = img[:, :, :, channel] - np.mean(img[:, :, :, channel])
-            channel_val /= np.std(img[:, :, :, channel])
-            img[:, :, :, channel] = channel_val
-
-        return img
diff --git a/tools/accuracy_checker/accuracy_checker/presenters.py b/tools/accuracy_checker/accuracy_checker/presenters.py
deleted file mode 100644 (file)
index 33c1346..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from collections import namedtuple
-from enum import Enum
-import numpy as np
-
-from .dependency import ClassProvider
-from .logging import print_info
-
-EvaluationResult = namedtuple(
-    'EvaluationResult', [
-        'evaluated_value', 'reference_value', 'name', 'metric_type', 'threshold', 'meta'
-    ]
-)
-
-
-class Color(Enum):
-    PASSED = 0
-    FAILED = 1
-
-
-def color_format(s, color=Color.PASSED):
-    if color == Color.PASSED:
-        return "\x1b[0;32m{}\x1b[0m".format(s)
-    return "\x1b[0;31m{}\x1b[0m".format(s)
-
-
-class BasePresenter(ClassProvider):
-    __provider_type__ = "presenter"
-
-    def write_result(self, evaluation_result, output_callback=None, ignore_results_formatting=False):
-        raise NotImplementedError
-
-
-class ScalarPrintPresenter(BasePresenter):
-    __provider__ = "print_scalar"
-
-    def write_result(self, evaluation_result: EvaluationResult, output_callback=None, ignore_results_formatting=False):
-        value, reference, name, _, threshold, meta = evaluation_result
-        value = np.mean(value)
-        postfix, scale, result_format = get_result_format_parameters(meta, ignore_results_formatting)
-        difference = None
-        if reference:
-            _, original_scale, _ = get_result_format_parameters(meta, False)
-            difference = compare_with_ref(reference, value, original_scale)
-        write_scalar_result(
-            value, name, threshold, difference, postfix=postfix, scale=scale, result_format=result_format
-        )
-
-
-class VectorPrintPresenter(BasePresenter):
-    __provider__ = "print_vector"
-
-    def write_result(self, evaluation_result: EvaluationResult, output_callback=None, ignore_results_formatting=False):
-        value, reference, name, _, threshold, meta = evaluation_result
-        if threshold:
-            threshold = float(threshold)
-
-        value_names = meta.get('names')
-        postfix, scale, result_format = get_result_format_parameters(meta, ignore_results_formatting)
-        if np.isscalar(value) or np.size(value) == 1:
-            if not np.isscalar(value):
-                value = value[0]
-            difference = None
-            if reference:
-                _, original_scale, _ = get_result_format_parameters(meta, False)
-                difference = compare_with_ref(reference, value, original_scale)
-            write_scalar_result(
-                value, name, threshold, difference,
-                value_name=value_names[0] if value_names else None,
-                postfix=postfix[0] if not np.isscalar(postfix) else postfix,
-                scale=scale[0] if not np.isscalar(scale) else scale,
-                result_format=result_format
-            )
-            return
-
-        for index, res in enumerate(value):
-            cur_postfix = '%'
-            if not np.isscalar(postfix):
-                if index < len(postfix):
-                    cur_postfix = postfix[index]
-            else:
-                cur_postfix = postfix
-            write_scalar_result(
-                res, name,
-                value_name=value_names[index] if value_names else None,
-                postfix=cur_postfix,
-                scale=scale[index] if not np.isscalar(scale) else scale,
-                result_format=result_format
-            )
-
-        if len(value) > 1 and meta.get('calculate_mean', True):
-            mean_value = np.mean(np.multiply(value, scale))
-            difference = None
-            if reference:
-                original_scale = get_result_format_parameters(meta, False)[1] if ignore_results_formatting else 1
-                difference = compare_with_ref(reference, mean_value, original_scale)
-            write_scalar_result(
-                mean_value, name, threshold, difference, value_name='mean',
-                postfix=postfix[-1] if not np.isscalar(postfix) else postfix, scale=1,
-                result_format=result_format
-            )
-
-
-def write_scalar_result(
-        res_value, name, threshold=None, diff_with_ref=None, value_name=None,
-        postfix='%', scale=100, result_format='{:.2f}'
-):
-    display_name = "{}@{}".format(name, value_name) if value_name else name
-    display_result = result_format.format(res_value * scale)
-    message = '{}: {}{}'.format(display_name, display_result, postfix)
-
-    if diff_with_ref:
-        threshold = threshold or 0
-        if threshold <= diff_with_ref:
-            fail_message = "[FAILED: error = {:.4}]".format(diff_with_ref)
-            message = "{} {}".format(message, color_format(fail_message, Color.FAILED))
-        else:
-            message = "{} {}".format(message, color_format("[OK]", Color.PASSED))
-
-    print_info(message)
-
-
-def compare_with_ref(reference, res_value, scale):
-    return abs(reference - (res_value * scale))
-
-
-class ReturnValuePresenter(BasePresenter):
-    __provider__ = "return_value"
-
-    def write_result(self, evaluation_result: EvaluationResult, output_callback=None, ignore_results_formatting=False):
-        if output_callback:
-            output_callback(evaluation_result)
-
-
-def get_result_format_parameters(meta, use_default_formatting):
-    postfix = ' '
-    scale = 1
-    result_format = '{}'
-    if not use_default_formatting:
-        postfix = meta.get('postfix', '%')
-        scale = meta.get('scale', 100)
-        result_format = meta.get('data_format', '{:.2f}')
-
-    return postfix, scale, result_format
diff --git a/tools/accuracy_checker/accuracy_checker/progress_reporters.py b/tools/accuracy_checker/accuracy_checker/progress_reporters.py
deleted file mode 100644 (file)
index df1e04f..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import time
-
-from tqdm import tqdm
-
-from .dependency import ClassProvider
-from .logging import print_info
-
-
-class ProgressReporter(ClassProvider):
-    __provider_type__ = 'progress_reporter'
-
-    def __init__(self, dataset_size=None):
-        self.finished = True
-        self.dataset_size = None
-        self.start_time = None
-        self.prev_time = None
-        if dataset_size is not None:
-            self.reset(dataset_size)
-        self.current = 0
-
-    def finish(self, objects_processed=True):
-        self.finished = True
-        if not objects_processed:
-            return
-
-        process_time = time.time() - self.start_time
-        print_info('{} objects processed in {:.3f} seconds'.format(self.dataset_size, process_time))
-
-    @property
-    def progress(self):
-        return (self.current / self.dataset_size) * 100 if self.dataset_size else 0
-
-    def reset(self, dataset_size):
-        if not self.finished:
-            self.finish(objects_processed=False)
-        self.current = 0
-
-        self.dataset_size = dataset_size
-        self.start_time = time.time()
-        self.finished = False
-
-
-class PrintProgressReporter(ProgressReporter):
-    __provider__ = 'print'
-
-    def __init__(self, dataset_size=None, print_interval=1000):
-        super().__init__(dataset_size)
-        self.print_interval = print_interval
-
-    def reset(self, dataset_size):
-        self.dataset_size = dataset_size
-        print_info('Total dataset size: {}'.format(dataset_size))
-        self.start_time = time.time()
-        self.prev_time = self.start_time
-
-    def update(self, batch_id, batch_size):
-        self.current += batch_size
-        if (batch_id + 1) % self.print_interval != 0:
-            return
-
-        now = time.time()
-        batch_time = now - self.prev_time
-        self.prev_time = now
-
-        print_info('{} / {} processed in {:.3f}s'.format((batch_id + 1) * batch_size, self.dataset_size, batch_time))
-
-
-class TQDMReporter(ProgressReporter):
-    __provider__ = 'bar'
-
-    def update(self, _batch_id, batch_size):
-        self.current += batch_size
-        self.tqdm.update(batch_size)
-
-    def finish(self, objects_processed=True):
-        self.tqdm.close()
-        super().finish(objects_processed)
-
-    def reset(self, dataset_size):
-        super().reset(dataset_size)
-        self.tqdm = tqdm(
-            total=self.dataset_size, unit='frames', leave=False,
-            bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
-        )
diff --git a/tools/accuracy_checker/accuracy_checker/representation/__init__.py b/tools/accuracy_checker/accuracy_checker/representation/__init__.py
deleted file mode 100644 (file)
index 0ceabc3..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .base_representation import BaseRepresentation
-from .classification_representation import Classification, ClassificationAnnotation, ClassificationPrediction
-from .detection_representation import Detection, DetectionAnnotation, DetectionPrediction
-from .reid_representation import (
-    ReIdentificationAnnotation,
-    ReIdentificationClassificationAnnotation,
-    ReIdentificationPrediction
-)
-from .segmentation_representation import (
-    SegmentationRepresentation,
-    SegmentationAnnotation,
-    SegmentationPrediction,
-    BrainTumorSegmentationAnnotation,
-    BrainTumorSegmentationPrediction
-)
-from .character_recognition_representation import (
-    CharacterRecognition,
-    CharacterRecognitionAnnotation,
-    CharacterRecognitionPrediction
-)
-from .representaton_container import ContainerRepresentation, ContainerAnnotation, ContainerPrediction
-from .regression_representation import (
-    RegressionAnnotation,
-    RegressionPrediction,
-    FacialLandmarksAnnotation,
-    FacialLandmarksPrediction,
-    GazeVectorAnnotation,
-    GazeVectorPrediction
-)
-from .multilabel_recognition import MultiLabelRecognitionAnnotation, MultiLabelRecognitionPrediction
-from .super_resolution_representation import SuperResolutionAnnotation, SuperResolutionPrediction
-from .text_detection_representation import TextDetectionAnnotation, TextDetectionPrediction
-from .pose_estimation_representation import PoseEstimationAnnotation, PoseEstimationPrediction
-from .hit_ratio_representation import HitRatio, HitRatioAnnotation, HitRatioPrediction
-
-__all__ = [
-    'BaseRepresentation',
-
-    'Classification',
-    'ClassificationAnnotation',
-    'ClassificationPrediction',
-
-    'Detection',
-    'DetectionAnnotation',
-    'DetectionPrediction',
-
-    'ReIdentificationAnnotation',
-    'ReIdentificationClassificationAnnotation',
-    'ReIdentificationPrediction',
-
-    'SegmentationRepresentation',
-    'SegmentationAnnotation',
-    'SegmentationPrediction',
-    'BrainTumorSegmentationAnnotation',
-    'BrainTumorSegmentationPrediction',
-
-    'CharacterRecognition',
-    'CharacterRecognitionAnnotation',
-    'CharacterRecognitionPrediction',
-
-    'ContainerRepresentation',
-    'ContainerAnnotation',
-    'ContainerPrediction',
-
-    'RegressionAnnotation',
-    'RegressionPrediction',
-    'FacialLandmarksAnnotation',
-    'FacialLandmarksPrediction',
-    'GazeVectorAnnotation',
-    'GazeVectorPrediction',
-
-    'MultiLabelRecognitionAnnotation',
-    'MultiLabelRecognitionPrediction',
-
-    'SuperResolutionAnnotation',
-    'SuperResolutionPrediction',
-
-    'TextDetectionAnnotation',
-    'TextDetectionPrediction',
-
-    'PoseEstimationAnnotation',
-    'PoseEstimationPrediction',
-
-    'HitRatio',
-    'HitRatioAnnotation',
-    'HitRatioPrediction'
-]
diff --git a/tools/accuracy_checker/accuracy_checker/representation/base_representation.py b/tools/accuracy_checker/accuracy_checker/representation/base_representation.py
deleted file mode 100644 (file)
index 05d53b5..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import abc
-import pickle
-
-
-class BaseRepresentation(abc.ABC):
-    def __init__(self, identifier, metadata=None):
-        self.identifier = identifier
-        self.metadata = metadata or {}
-
-    @classmethod
-    def load(cls, file):
-        obj = pickle.load(file)
-
-        if cls != BaseRepresentation:
-            assert isinstance(obj, cls)
-
-        return obj
-
-    def dump(self, file):
-        pickle.dump(self, file)
-
-    def set_image_size(self, image_sizes):
-        self.metadata['image_size'] = image_sizes
-
-    def set_data_source(self, data_source):
-        self.metadata['data_source'] = data_source
diff --git a/tools/accuracy_checker/accuracy_checker/representation/character_recognition_representation.py b/tools/accuracy_checker/accuracy_checker/representation/character_recognition_representation.py
deleted file mode 100644 (file)
index df6a241..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .base_representation import BaseRepresentation
-
-
-class CharacterRecognition(BaseRepresentation):
-    def __init__(self, identifier='', label=None):
-        super().__init__(identifier)
-        self.label = label
-
-
-class CharacterRecognitionAnnotation(CharacterRecognition):
-    pass
-
-
-class CharacterRecognitionPrediction(CharacterRecognition):
-    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/classification_representation.py b/tools/accuracy_checker/accuracy_checker/representation/classification_representation.py
deleted file mode 100644 (file)
index 67f72f6..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from .base_representation import BaseRepresentation
-
-
-class Classification(BaseRepresentation):
-    pass
-
-
-class ClassificationAnnotation(Classification):
-    def __init__(self, identifier='', label=None):
-        super().__init__(identifier)
-
-        self.label = label
-
-
-class ClassificationPrediction(Classification):
-    def __init__(self, identifier='', scores=None):
-        super().__init__(identifier)
-
-        self.scores = np.array(scores) if scores is not None else np.array([])
-
-    @property
-    def label(self):
-        return np.argmax(self.scores)
-
-    def top_k(self, k):
-        return np.argpartition(self.scores, -k)[-k:]
diff --git a/tools/accuracy_checker/accuracy_checker/representation/detection_representation.py b/tools/accuracy_checker/accuracy_checker/representation/detection_representation.py
deleted file mode 100644 (file)
index 1fc2c8b..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from ..utils import remove_difficult
-from .base_representation import BaseRepresentation
-
-
-class Detection(BaseRepresentation):
-    def __init__(self, identifier='', labels=None, x_mins=None, y_mins=None, x_maxs=None, y_maxs=None, metadata=None):
-        super().__init__(identifier, metadata)
-
-        self.labels = np.array(labels) if labels is not None else np.array([])
-        self.x_mins = np.array(x_mins) if x_mins is not None else np.array([])
-        self.y_mins = np.array(y_mins) if y_mins is not None else np.array([])
-        self.x_maxs = np.array(x_maxs) if x_maxs is not None else np.array([])
-        self.y_maxs = np.array(y_maxs) if y_maxs is not None else np.array([])
-
-    def remove(self, indexes):
-        self.labels = np.delete(self.labels, indexes)
-        self.x_mins = np.delete(self.x_mins, indexes)
-        self.y_mins = np.delete(self.y_mins, indexes)
-        self.x_maxs = np.delete(self.x_maxs, indexes)
-        self.y_maxs = np.delete(self.y_maxs, indexes)
-
-        difficult_boxes = self.metadata.get('difficult_boxes')
-        if not difficult_boxes:
-            return
-
-        new_difficult_boxes = remove_difficult(difficult_boxes, indexes)
-
-        self.metadata['difficult_boxes'] = new_difficult_boxes
-
-    @property
-    def size(self):
-        return len(self.x_mins)
-
-    def __eq__(self, other):
-        if not isinstance(other, type(self)):
-            return False
-
-        def are_bounding_boxes_equal():
-            if not np.array_equal(self.labels, other.labels):
-                return False
-            if not np.array_equal(self.x_mins, other.x_mins):
-                return False
-            if not np.array_equal(self.y_mins, other.y_mins):
-                return False
-            if not np.array_equal(self.x_maxs, other.x_maxs):
-                return False
-            if not np.array_equal(self.y_maxs, other.y_maxs):
-                return False
-            return True
-
-        return self.identifier == other.identifier and are_bounding_boxes_equal() and self.metadata == other.metadata
-
-
-class DetectionAnnotation(Detection):
-    pass
-
-
-class DetectionPrediction(Detection):
-    def __init__(self, identifier='', labels=None, scores=None, x_mins=None, y_mins=None, x_maxs=None, y_maxs=None,
-                 metadata=None):
-        super().__init__(identifier, labels, x_mins, y_mins, x_maxs, y_maxs, metadata)
-        self.scores = np.array(scores) if scores is not None else np.array([])
-
-    def remove(self, indexes):
-        super().remove(indexes)
-        self.scores = np.delete(self.scores, indexes)
-
-    def __eq__(self, other):
-        return np.array_equal(self.scores, other.scores) if super().__eq__(other) else False
diff --git a/tools/accuracy_checker/accuracy_checker/representation/hit_ratio_representation.py b/tools/accuracy_checker/accuracy_checker/representation/hit_ratio_representation.py
deleted file mode 100644 (file)
index f6cb6c7..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-
-from .base_representation import BaseRepresentation
-
-
-class HitRatio(BaseRepresentation):
-    def __init__(self, identifier=''):
-        super().__init__(identifier)
-        self.user = int(identifier[0].split('u:')[-1])
-        self.item = int(identifier[1].split('i:')[-1])
-
-
-
-class HitRatioAnnotation(HitRatio):
-    def __init__(self, identifier='', positive=True):
-        super().__init__(identifier)
-        self.positive = positive
-
-
-class HitRatioPrediction(HitRatio):
-    def __init__(self, identifier='', scores=None):
-        super().__init__(identifier)
-
-        self.scores = np.array(scores) if scores is not None else np.array([])
diff --git a/tools/accuracy_checker/accuracy_checker/representation/multilabel_recognition.py b/tools/accuracy_checker/accuracy_checker/representation/multilabel_recognition.py
deleted file mode 100644 (file)
index d5af464..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from .base_representation import BaseRepresentation
-
-
-class MultiLabelRecognitionRepresentation(BaseRepresentation):
-    def __init__(self, identifier='', multi_label=None):
-        super().__init__(identifier)
-        self.multi_label = np.array(multi_label) if isinstance(multi_label, list) else multi_label
-
-
-class MultiLabelRecognitionAnnotation(MultiLabelRecognitionRepresentation):
-    pass
-
-
-class MultiLabelRecognitionPrediction(MultiLabelRecognitionRepresentation):
-    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/pose_estimation_representation.py b/tools/accuracy_checker/accuracy_checker/representation/pose_estimation_representation.py
deleted file mode 100644 (file)
index f765dd8..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from .base_representation import BaseRepresentation
-
-
-class PoseEstimationRepresentation(BaseRepresentation):
-    def __init__(self, identifier='', x_values=None, y_values=None, visibility=None, labels=None):
-        super().__init__(identifier)
-        self.x_values = x_values if np.size(x_values) > 0 else []
-        self.y_values = y_values if np.size(y_values) > 0 else []
-        self.visibility = visibility if np.size(visibility) > 0 else [2] * len(x_values)
-        self.labels = labels if labels is not None else np.array([1]*len(x_values))
-
-    @property
-    def areas(self):
-        areas = self.metadata.get('areas')
-        if areas:
-            return areas
-        x_mins = np.min(self.x_values, axis=1)
-        x_maxs = np.max(self.x_values, axis=1)
-        y_mins = np.min(self.y_values, axis=1)
-        y_maxs = np.max(self.y_values, axis=1)
-        return (x_maxs - x_mins) * (y_maxs - y_mins)
-
-    @property
-    def bboxes(self):
-        rects = self.metadata.get('rects')
-        if rects:
-            return rects
-        x_mins = np.min(self.x_values, axis=1)
-        x_maxs = np.max(self.x_values, axis=1)
-        y_mins = np.min(self.y_values, axis=1)
-        y_maxs = np.max(self.y_values, axis=1)
-        return [[x_min, y_min, x_max, y_max] for x_min, y_min, x_max, y_max in zip(x_mins, y_mins, x_maxs, y_maxs)]
-
-    @property
-    def size(self):
-        return len(self.x_values)
-
-
-class PoseEstimationAnnotation(PoseEstimationRepresentation):
-    pass
-
-
-class PoseEstimationPrediction(PoseEstimationRepresentation):
-    def __init__(self, identifier='', x_values=None, y_values=None, visibility=None, scores=None, labels=None):
-        super().__init__(identifier, x_values, y_values, visibility, labels)
-        self.scores = scores if scores.any() else []
diff --git a/tools/accuracy_checker/accuracy_checker/representation/regression_representation.py b/tools/accuracy_checker/accuracy_checker/representation/regression_representation.py
deleted file mode 100644 (file)
index 99800d3..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from .base_representation import BaseRepresentation
-
-
-class RegressionRepresentation(BaseRepresentation):
-    def __init__(self, identifier='', value=None):
-        super().__init__(identifier)
-        self.value = value
-
-
-class RegressionAnnotation(RegressionRepresentation):
-    pass
-
-
-class RegressionPrediction(RegressionRepresentation):
-    pass
-
-
-class GazeVectorRepresentation(RegressionRepresentation):
-    def __init__(self, identifier='', value=None):
-        if value is None:
-            value = np.array([])
-        super().__init__(identifier, value)
-
-class GazeVectorAnnotation(GazeVectorRepresentation):
-    pass
-
-class GazeVectorPrediction(GazeVectorRepresentation):
-    pass
-
-
-
-class FacialLandmarksRepresentation(BaseRepresentation):
-    def __init__(self, identifier='', x_values=None, y_values=None):
-        super().__init__(identifier)
-        self.x_values = x_values if x_values.any() else []
-        self.y_values = y_values if y_values.any() else []
-
-
-class FacialLandmarksAnnotation(FacialLandmarksRepresentation):
-    @property
-    def interocular_distance(self):
-        left_eye = [
-            np.mean(self.x_values[self.metadata['left_eye']]),
-            np.mean(self.y_values[self.metadata['left_eye']])
-        ]
-        right_eye = [
-            np.mean(self.x_values[self.metadata['right_eye']]),
-            np.mean(self.y_values[self.metadata['right_eye']])
-        ]
-
-        return np.linalg.norm((np.subtract(left_eye, right_eye)))
-
-
-class FacialLandmarksPrediction(FacialLandmarksRepresentation):
-    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/reid_representation.py b/tools/accuracy_checker/accuracy_checker/representation/reid_representation.py
deleted file mode 100644 (file)
index d212eb7..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .base_representation import BaseRepresentation
-
-
-class ReIdentification(BaseRepresentation):
-    pass
-
-
-class ReIdentificationAnnotation(ReIdentification):
-    def __init__(self, identifier, camera_id, person_id, query):
-        super().__init__(identifier)
-        self.camera_id = camera_id
-        self.person_id = person_id
-        self.query = query
-
-
-class ReIdentificationClassificationAnnotation(ReIdentification):
-    def __init__(self, identifier, positive_pairs=None, negative_pairs=None):
-        super().__init__(identifier)
-        self.positive_pairs = set(positive_pairs)
-        self.negative_pairs = set(negative_pairs)
-
-
-class ReIdentificationPrediction(ReIdentification):
-    def __init__(self, identifiers, embedding):
-        super().__init__(identifiers)
-        self.embedding = embedding.copy()
diff --git a/tools/accuracy_checker/accuracy_checker/representation/representaton_container.py b/tools/accuracy_checker/accuracy_checker/representation/representaton_container.py
deleted file mode 100644 (file)
index add7c69..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from ..representation import BaseRepresentation
-
-
-class ContainerRepresentation(BaseRepresentation):
-    def __init__(self, representation_map=None):
-        super().__init__('')
-        self.representations = representation_map or {}
-
-    def __eq__(self, other):
-        if not isinstance(other, type(self)):
-            return False
-
-        if self.identifier != other.identifier:
-            return False
-
-        if self.metadata != other.metadata:
-            return False
-
-        if self.representations != other.representations:
-            return False
-
-        return True
-
-    def __getitem__(self, item):
-        return self.representations[item]
-
-    def get(self, key):
-        return self.representations.get(key)
-
-    def values(self):
-        return list(self.representations.values())
-
-    @property
-    def identifier(self):
-        if self._identifier:
-            return self._identifier
-
-        values = self.values()
-        if np.size(values) == 0:
-            raise ValueError('representation container is empty')
-
-        self._identifier = values[0].identifier
-        return self._identifier
-
-    @identifier.setter
-    def identifier(self, identifier):
-        self._identifier = identifier
-
-
-class ContainerAnnotation(ContainerRepresentation):
-    def set_image_size(self, image_sizes):
-        for key in self.representations.keys():
-            self.representations[key].metadata['image_size'] = image_sizes
-
-    def set_data_source(self, data_source):
-        for key in self.representations.keys():
-            self.representations[key].metadata['data_source'] = data_source
-
-
-class ContainerPrediction(ContainerRepresentation):
-    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/segmentation_representation.py b/tools/accuracy_checker/accuracy_checker/representation/segmentation_representation.py
deleted file mode 100644 (file)
index 45f6b01..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from enum import Enum
-
-import numpy as np
-
-from .base_representation import BaseRepresentation
-from ..data_readers import BaseReader
-
-
-class GTMaskLoader(Enum):
-    PILLOW = 0
-    OPENCV = 1
-    SCIPY = 2
-    NIFTI = 3
-
-
-class SegmentationRepresentation(BaseRepresentation):
-    pass
-
-
-class SegmentationAnnotation(SegmentationRepresentation):
-    LOADERS = {
-        GTMaskLoader.PILLOW: 'pillow_imread',
-        GTMaskLoader.OPENCV: 'opencv_imread',
-        GTMaskLoader.SCIPY: 'scipy_imread',
-        GTMaskLoader.NIFTI: 'nifti_reader'
-    }
-
-    def __init__(self, identifier, path_to_mask, mask_loader=GTMaskLoader.PILLOW):
-        """
-        Args:
-            identifier: object identifier (e.g. image name).
-            path_to_mask: path where segmentation mask should be loaded from. The path is relative to data source.
-            mask_loader: back-end, used to load segmentation masks.
-        """
-
-        super().__init__(identifier)
-        self._mask_path = path_to_mask
-        self._mask_loader = mask_loader
-        self._mask = None
-
-    @property
-    def mask(self):
-        return self._mask if self._mask is not None else self._load_mask()
-
-    @mask.setter
-    def mask(self, value):
-        self._mask = value
-
-    def _load_mask(self):
-        if self._mask is None:
-            loader = BaseReader.provide(self.LOADERS.get(self._mask_loader), self.metadata['data_source'])
-            if self._mask_loader == GTMaskLoader.PILLOW:
-                loader.convert_to_rgb = False
-            mask = loader.read(self._mask_path)
-            return mask.astype(np.uint8)
-
-        return self._mask
-
-
-class SegmentationPrediction(SegmentationRepresentation):
-    def __init__(self, identifiers, mask):
-        """
-        Args:
-            identifiers: object identifier (e.g. image name).
-            mask: array with shape (n_classes, height, width) of probabilities at each location.
-        """
-
-        super().__init__(identifiers)
-        self.mask = mask
-
-
-class BrainTumorSegmentationAnnotation(SegmentationAnnotation):
-    def __init__(self, identifier, path_to_mask):
-        super().__init__(identifier, path_to_mask, GTMaskLoader.NIFTI)
-
-
-class BrainTumorSegmentationPrediction(SegmentationPrediction):
-    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/super_resolution_representation.py b/tools/accuracy_checker/accuracy_checker/representation/super_resolution_representation.py
deleted file mode 100644 (file)
index 7d2b660..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from enum import Enum
-import numpy as np
-
-from .base_representation import BaseRepresentation
-from ..data_readers import BaseReader
-
-
-class GTLoader(Enum):
-    PILLOW = 0
-    OPENCV = 1
-
-
-class SuperResolutionRepresentation(BaseRepresentation):
-    pass
-
-
-class SuperResolutionAnnotation(SuperResolutionRepresentation):
-    LOADERS = {
-        GTLoader.PILLOW: 'pillow_imread',
-        GTLoader.OPENCV: 'opencv_imread'
-    }
-
-    def __init__(self, identifier, path_to_hr, gt_loader=GTLoader.PILLOW):
-        """
-        Args:
-            identifier: object identifier (e.g. image name).
-            path_to_hr: path where height resolution image should be loaded from. The path is relative to data source.
-            gt_loader: back-end, used to load segmentation masks.
-        """
-
-        super().__init__(identifier)
-        self._image_path = path_to_hr
-        self._gt_loader = self.LOADERS.get(gt_loader)
-
-    @property
-    def value(self):
-        loader = BaseReader.provide(self._gt_loader, self.metadata['data_source'])
-        gt = loader.read(self._image_path)
-        return gt.astype(np.uint8)
-
-
-class SuperResolutionPrediction(SuperResolutionRepresentation):
-    def __init__(self, identifiers, prediction):
-        """
-        Args:
-            identifiers: object identifier (e.g. image name).
-            prediction: array with shape (height, width) contained result image.
-        """
-
-        super().__init__(identifiers)
-        self.value = prediction
diff --git a/tools/accuracy_checker/accuracy_checker/representation/text_detection_representation.py b/tools/accuracy_checker/accuracy_checker/representation/text_detection_representation.py
deleted file mode 100644 (file)
index 38e7a9c..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from ..utils import remove_difficult
-from .base_representation import BaseRepresentation
-
-
-class TextDetectionRepresentation(BaseRepresentation):
-    def __init__(self, identifier='', points=None):
-        super().__init__(identifier)
-        self.points = points or []
-
-    def remove(self, indexes):
-        self.points = np.delete(self.points, indexes, axis=0)
-        difficult = self.metadata.get('difficult_boxes')
-        if not difficult:
-            return
-        self.metadata['difficult_boxes'] = remove_difficult(difficult, indexes)
-
-
-class TextDetectionAnnotation(TextDetectionRepresentation):
-    def __init__(self, identifier='', points=None, description=''):
-        super().__init__(identifier, points)
-        self.description = description
-
-    def remove(self, indexes):
-        super().remove(indexes)
-        self.description = np.delete(self.description, indexes)
-
-
-class TextDetectionPrediction(TextDetectionRepresentation):
-    pass
diff --git a/tools/accuracy_checker/accuracy_checker/utils.py b/tools/accuracy_checker/accuracy_checker/utils.py
deleted file mode 100644 (file)
index ca3b268..0000000
+++ /dev/null
@@ -1,385 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import csv
-import errno
-import itertools
-import json
-import os
-import pickle
-
-from pathlib import Path
-from typing import Union
-from warnings import warn
-
-from shapely.geometry.polygon import Polygon
-import numpy as np
-import yaml
-import yamlloader
-
-try:
-    import lxml.etree as et
-except ImportError:
-    import xml.etree.cElementTree as et
-
-
-def concat_lists(*lists):
-    return list(itertools.chain(*lists))
-
-
-def get_path(entry: Union[str, Path], is_directory=False, check_exists=True):
-    try:
-        path = Path(entry)
-    except TypeError:
-        raise TypeError('"{}" is expected to be a path-like'.format(entry))
-
-    if not check_exists:
-        return path
-
-    # pathlib.Path.exists throws an exception in case of broken symlink
-    if not os.path.exists(str(path)):
-        raise FileNotFoundError('{}: {}'.format(os.strerror(errno.ENOENT), path))
-
-    if is_directory and not path.is_dir():
-        raise NotADirectoryError('{}: {}'.format(os.strerror(errno.ENOTDIR), path))
-
-    # if it exists it is either file (or valid symlink to file) or directory (or valid symlink to directory)
-    if not is_directory and not path.is_file():
-        raise IsADirectoryError('{}: {}'.format(os.strerror(errno.EISDIR), path))
-
-    return path
-
-
-def contains_all(container, *args):
-    sequence = set(container)
-
-    for arg in args:
-        if len(sequence.intersection(arg)) != len(arg):
-            return False
-
-    return True
-
-
-def contains_any(container, *args):
-    sequence = set(container)
-
-    for arg in args:
-        if sequence.intersection(arg):
-            return True
-
-    return False
-
-
-def string_to_tuple(string, casting_type=float):
-    processed = string.replace(' ', '')
-    processed = processed.replace('(', '')
-    processed = processed.replace(')', '')
-    processed = processed.split(',')
-
-    return tuple([casting_type(entry) for entry in processed])
-
-
-def string_to_list(string):
-    processed = string.replace(' ', '')
-    processed = processed.replace('[', '')
-    processed = processed.replace(']', '')
-    processed = processed.split(',')
-
-    return list(entry for entry in processed)
-
-
-class JSONDecoderWithAutoConversion(json.JSONDecoder):
-    """
-    Custom json decoder to convert all strings into numbers (int, float) during reading json file.
-    """
-
-    def decode(self, s, _w=json.decoder.WHITESPACE.match):
-        decoded = super().decode(s, _w)
-        return self._decode(decoded)
-
-    def _decode(self, entry):
-        if isinstance(entry, str):
-            try:
-                return int(entry)
-            except ValueError:
-                pass
-            try:
-                return float(entry)
-            except ValueError:
-                pass
-        elif isinstance(entry, dict):
-            return {self._decode(key): self._decode(value) for key, value in entry.items()}
-        elif isinstance(entry, list):
-            return [self._decode(value) for value in entry]
-
-        return entry
-
-
-def dict_subset(dict_, key_subset):
-    return {key: value for key, value in dict_.items() if key in key_subset}
-
-
-def zipped_transform(fn, *iterables, inplace=False):
-    result = (iterables if inplace else tuple([] for _ in range(len(iterables))))
-    updater = (list.__setitem__ if inplace else lambda container, _, entry: container.append(entry))
-
-    for idx, values in enumerate(zip(*iterables)):
-        iter_res = fn(*values)
-        if not iter_res:
-            continue
-
-        for dst, res in zip(result, iter_res):
-            updater(dst, idx, res)
-
-    return result
-
-
-def overrides(obj, attribute_name, base=None):
-    cls = obj if isinstance(obj, type) else obj.__class__
-
-    base = base or cls.__bases__[0]
-    obj_attr = getattr(cls, attribute_name, None)
-    base_attr = getattr(base, attribute_name, None)
-
-    return obj_attr and obj_attr != base_attr
-
-
-def enum_values(enum):
-    return [member.value for member in enum]
-
-
-def get_size_from_config(config, allow_none=False):
-    if contains_all(config, ('size', 'dst_width', 'dst_height')):
-        warn('All parameters: size, dst_width, dst_height are provided. Size will be used. '
-             'You should specify only size or pair values des_width, dst_height in config.')
-    if 'size' in config:
-        return config['size'], config['size']
-    if contains_all(config, ('dst_width', 'dst_height')):
-        return config['dst_height'], config['dst_width']
-    if not allow_none:
-        raise ValueError('Either size or dst_width and dst_height required')
-
-    return None, None
-
-
-def get_size_3d_from_config(config, allow_none=False):
-    if contains_all(config, ('size', 'dst_width', 'dst_height', 'dst_volume')):
-        warn('All parameters: size, dst_width, dst_height, dst_volume are provided. Size will be used. '
-             'You should specify only size or three values des_width, dst_height, dst_volume in config.')
-    if 'size' in config:
-        return config['size'], config['size'], config['size']
-    if contains_all(config, ('dst_width', 'dst_height', 'dst_volume')):
-        return config['dst_height'], config['dst_width'], config['dst_volume']
-    if not allow_none:
-        raise ValueError('Either size or dst_width and dst_height required')
-
-    return config.get('dst_height'), config.get('dst_width'), config.get('dst_volume')
-
-
-def in_interval(value, interval):
-    minimum = interval[0]
-    maximum = interval[1] if len(interval) >= 2 else None
-
-    if not maximum:
-        return minimum <= value
-
-    return minimum <= value < maximum
-
-
-def finalize_metric_result(values, names):
-    result_values, result_names = [], []
-    for value, name in zip(values, names):
-        if np.isnan(value):
-            continue
-
-        result_values.append(value)
-        result_names.append(name)
-
-    return result_values, result_names
-
-
-def get_representations(values, representation_source):
-    return np.reshape([value.get(representation_source) for value in values], -1)
-
-
-def get_supported_representations(container, supported_types):
-    if np.shape(container) == ():
-        container = [container]
-
-    return list(filter(lambda rep: check_representation_type(rep, supported_types), container))
-
-
-def check_representation_type(representation, representation_types):
-    for representation_type in representation_types:
-        if type(representation).__name__ == representation_type.__name__:
-            return True
-    return False
-
-
-def is_single_metric_source(source):
-    if not source:
-        return False
-
-    return np.size(source.split(',')) == 1
-
-
-def read_txt(file: Union[str, Path], sep='\n', **kwargs):
-    def is_empty(string):
-        return not string or string.isspace()
-
-    with get_path(file).open() as content:
-        content = content.read(**kwargs).split(sep)
-        content = list(filter(lambda string: not is_empty(string), content))
-
-        return list(map(str.strip, content))
-
-
-def read_xml(file: Union[str, Path], *args, **kwargs):
-    return et.parse(str(get_path(file)), *args, **kwargs).getroot()
-
-
-def read_json(file: Union[str, Path], *args, **kwargs):
-    with get_path(file).open() as content:
-        return json.load(content, *args, **kwargs)
-
-
-def read_pickle(file: Union[str, Path], *args, **kwargs):
-    with get_path(file).open('rb') as content:
-        return pickle.load(content, *args, **kwargs)
-
-
-def read_yaml(file: Union[str, Path], *args, **kwargs):
-    with get_path(file).open() as content:
-        return yaml.load(content, *args, Loader=yamlloader.ordereddict.Loader, **kwargs)
-
-
-def read_csv(file: Union[str, Path], *args, **kwargs):
-    with get_path(file).open() as content:
-        return list(csv.DictReader(content, *args, **kwargs))
-
-
-def extract_image_representations(image_representations):
-    images = [rep.data for rep in image_representations]
-    meta = [rep.metadata for rep in image_representations]
-
-    return images, meta
-
-
-def convert_bboxes_xywh_to_x1y1x2y2(x_coord, y_coord, width, height):
-    return x_coord, y_coord, x_coord + width, y_coord + height
-
-
-def get_or_parse_value(item, supported_values, default=None):
-    if isinstance(item, str):
-        item = item.lower()
-        if item in supported_values:
-            return supported_values[item]
-
-        try:
-            return string_to_tuple(item)
-        except ValueError:
-            message = 'Invalid value "{}", expected one of precomputed: ({}) or list of values'.format(
-                item, ', '.join(supported_values.keys())
-            )
-            raise ValueError(message)
-
-    if isinstance(item, (float, int)):
-        return (item, )
-
-    return default
-
-
-def string_to_bool(string):
-    return string.lower() in ['yes', 'true', 't', '1']
-
-
-def get_key_by_value(container, target):
-    for key, value in container.items():
-        if value == target:
-            return key
-
-    return None
-
-
-def format_key(key):
-    return '--{}'.format(key)
-
-
-def to_lower_register(str_list):
-    return list(map(lambda item: item.lower() if item else None, str_list))
-
-
-def polygon_from_points(points):
-    return Polygon(points)
-
-
-def remove_difficult(difficult, indexes):
-    new_difficult = []
-    decrementor = 0
-    id_difficult = 0
-    id_removed = 0
-    while id_difficult < len(difficult) and id_removed < len(indexes):
-        if difficult[id_difficult] < indexes[id_removed]:
-            new_difficult.append(difficult[id_difficult] - decrementor)
-            id_difficult += 1
-        else:
-            decrementor += 1
-            id_removed += 1
-
-    return new_difficult
-
-
-def convert_to_range(entry):
-    entry_range = entry
-    if isinstance(entry, str):
-        entry_range = string_to_tuple(entry_range)
-    elif not isinstance(entry_range, tuple) and not isinstance(entry_range, list):
-        entry_range = [entry_range]
-
-    return entry_range
-
-
-def add_input_shape_to_meta(meta, shape):
-    meta['input_shape'] = shape
-    return meta
-
-
-def set_image_metadata(annotation, images):
-    image_sizes = []
-    data = images.data
-    if not isinstance(data, list):
-        data = [data]
-    for image in data:
-        image_sizes.append(image.shape)
-    annotation.set_image_size(image_sizes)
-
-    return annotation, images
-
-
-def get_indexs(container, element):
-    return [index for index, container_element in enumerate(container) if container_element == element]
-
-
-def find_nearest(array, value, mode=None):
-    if not array:
-        return -1
-    array = np.asarray(array)
-    idx = (np.abs(array - value)).argmin()
-    if mode == 'less':
-        return idx - 1 if array[idx] > value else idx
-    if mode == 'more':
-        return idx + 1 if array[idx] < value else idx
-    return idx
diff --git a/tools/accuracy_checker/configs/face-detection-adas-0001.yml b/tools/accuracy_checker/configs/face-detection-adas-0001.yml
deleted file mode 100644 (file)
index 952fbb5..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-models:
-  - name: face-detection-adas-0001
-
-      - framework: dlsdk
-        device: CPU
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-
-      - framework: dlsdk
-        tags:
-          - GPU32
-        device: GPU
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        tags:
-          - GPU16
-        device: GPU
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001-fp16.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001-fp16.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001-fp16.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001-fp16.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001-fp16.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001-fp16.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP16_MobileNet_Clamp.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP11_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP16_ResNet_SqueezeNet_VGG_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.xml
-        weights: Transportation/object_detection/face/pruned_mobilenet_reduced_ssd_shared_weights/dldt/face-detection-adas-0001.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP11_MobileNetCaffe.aocx
-
-    datasets:
-      - name: wider
-        data_source: WIDER_val/images
-        annotation_conversion:
-          converter: wider
-          annotation_file: wider_face_split/wider_face_val_bbx_gt.txt
-
-        preprocessing:
-          - type: resize
-            dst_width: 672
-            dst_height: 384
-
-        postprocessing:
-          - type: resize_prediction_boxes
-          - type: filter
-            height_range: 100
-            apply_to: annotation
-
-        metrics:
-          - type: map
-            ignore_difficult: True
-            include_boundaries: False
-            allow_multiple_matches_per_ignored: True
-            use_filtered_tp: True
diff --git a/tools/accuracy_checker/configs/face-detection-retail-0004.yml b/tools/accuracy_checker/configs/face-detection-retail-0004.yml
deleted file mode 100644 (file)
index affe639..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-models:
-  - name: face-detection-retail-0004
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004-fp16.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004-fp16.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004-fp16.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004-fp16.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004-fp16.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004-fp16.bin
-        adapter: ssd
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP16_TinyYolo.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP11_CaffeMobileNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP16_ResNet_SqueezeNet_VGG_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.xml
-        weights: Retail/object_detection/face/sqnet1.0modif-ssd/0004/dldt/face-detection-retail-0004.bin
-        adapter: ssd
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP11_ResNet_SqueezeNet_VGG.aocx
-
-    datasets:
-      - name: wider
-        data_source: WIDER_val/images
-        annotation_conversion:
-          converter: wider
-          annotation_file: wider_face_split/wider_face_val_bbx_gt.txt
-
-        preprocessing:
-          - type: resize
-            size: 300
-
-        postprocessing:
-          - type: resize_prediction_boxes
-          - type: cast_to_int
-          - type: filter
-            apply_to: annotation
-            height_range: 60
-            is_empty: True
-          - type: filter
-            min_confidence: 0.0
-            apply_to: prediction
-
-        metrics:
-          - type: map
-            ignore_difficult: True
-            include_boundaries: False
-            allow_multiple_matches_per_ignored: False
-            distinct_conf: False
diff --git a/tools/accuracy_checker/configs/face-reidentification-retail-0095.yml b/tools/accuracy_checker/configs/face-reidentification-retail-0095.yml
deleted file mode 100644 (file)
index 50c0a55..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-models:
-  - name: face-reidentification-retail-0095
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095-fp16.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095-fp16.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095-fp16.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP16_SSD300.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FPGA11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP11_CaffeMobileNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP16_MobileNet_Clamp.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.xml
-        weights: Retail/object_reidentification/face/mobilenet_based/dldt/face-reidentification-retail-0095.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP11_MobileNetCaffe.aocx
-
-    datasets:
-      - name: lfw
-        data_source: LFW/lfw
-        annotation_conversion:
-          converter: face_reid_pairwise
-          pairs_file: LFW/annotation/pairs.txt
-          landmarks_file: LFW/annotation/lfw_landmark.txt
-
-        preprocessing:
-          - type: point_alignment
-            size: 400
-          - type: resize
-            size: 128
-
-        metrics:
-          - type: pairwise_accuracy_subsets
diff --git a/tools/accuracy_checker/configs/human-pose-estimation-0001.yml b/tools/accuracy_checker/configs/human-pose-estimation-0001.yml
deleted file mode 100644 (file)
index efe6105..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-models:
-  - name: human-pose-estimation-0001
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-
-      - framework: dlsdk
-        tags:
-          - INT8
-        device: CPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-int8.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-int8.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-fp16.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-fp16.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-fp16.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-fp16.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-fp16.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001-fp16.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-        bitstream: 2019R1_A10DK_FP16_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-        bitstream: 2019R1_A10DK_FP11_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-        bitstream: 2019R1_PL1_FP16_ResNet_SqueezeNet_VGG_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.xml
-        weights: Transportation/human_pose_estimation/mobilenet-v1/dldt/human-pose-estimation-0001.bin
-        allow_reshape_input: True
-        adapter:
-          type: human_pose_estimation
-          part_affinity_fields_out: Mconv7_stage2_L1
-          keypoints_heatmap_out: Mconv7_stage2_L2
-        bitstream: 2019R1_PL1_FP11_ELU.aocx
-
-    datasets:
-      - name: ms_coco_keypoints
-        data_source: val2017
-        annotation_conversion:
-          converter: mscoco_keypoints
-          annotation_file: person_keypoints_val2017.json
-
-        preprocessing:
-          - type: resize
-            size: 368
-            interpolation: CUBIC
-            aspect_ratio_scale: width
-          - type: padding
-            stride: 8
-
-        postprocessing:
-          - type: filter
-            apply_to: annotation
-            area_range: 1, 10000000000
-          - type: filter
-            apply_to: prediction
-            area_range: 1, 10000000000
-
-        metrics:
-          - name: AP
-            type: coco_precision
-            max_detections: 20
diff --git a/tools/accuracy_checker/configs/landmarks-regression-retail-0009.yml b/tools/accuracy_checker/configs/landmarks-regression-retail-0009.yml
deleted file mode 100644 (file)
index 533398f..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-models:
-  - name: landmarks-regression-retail-0009
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.bin
-        adapter: landmarks_regression
-        cpu_extensions: AUTO
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.bin
-        adapter: landmarks_regression
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009-fp16.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009-fp16.bin
-        adapter: landmarks_regression
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009-fp16.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009-fp16.bin
-        adapter: landmarks_regression
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009-fp16.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009-fp16.bin
-        adapter: landmarks_regression
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.bin
-        adapter: landmarks_regression
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP16_AlexNet_GoogleNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.bin
-        adapter: landmarks_regression
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP11_RMNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.bin
-        adapter: landmarks_regression
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP16_ResNet_SqueezeNet_VGG_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.xml
-        weights: Retail/object_attributes/landmarks_regression/0009/dldt/landmarks-regression-retail-0009.bin
-        adapter: landmarks_regression
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP11_RMNet.aocx
-
-    datasets:
-      - name: vgg2face
-        data_source: VGGFaces2/test
-        annotation_conversion:
-          converter: landmarks_regression
-          landmarks_csv_file: VGGFaces2/bb_landmark/loose_landmark_test.csv
-          bbox_csv_file: VGGFaces2/bb_landmark/loose_bb_test.csv
-
-        preprocessing:
-          - type: crop_rect
-          - type: resize
-            size: 48
-
-        postprocessing:
-          - type: normalize_landmarks_points
-            use_annotation_rect: True
-
-        metrics:
-          - type: per_point_normed_error
-            presenter: print_vector
-          - type: normed_error
diff --git a/tools/accuracy_checker/configs/person-reidentification-retail-0031.yml b/tools/accuracy_checker/configs/person-reidentification-retail-0031.yml
deleted file mode 100644 (file)
index 090f69c..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-models:
-  - name: person-reidentification-retail-0031
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - INT8
-        device: CPU
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-int8.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-int8.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-fp16.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-fp16.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-fp16.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP16_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP11_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.xml
-        weights: Security/object_attributes/pedestrian/person-attributes-recognition-crossroad-0031/dldt/person-attributes-recognition-crossroad-0031.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP16_ResNet_SqueezeNet_VGG_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml
-        weights: person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP11_ELU.aocx
-
-    datasets:
-      - name: market1501
-        reader: pillow_imread
-        data_source: Market-1501-v15.09.15
-        annoation_conversion:
-          converter: market1501
-          data_dir: Market-1501-v15.09.15
-
-        preprocessing:
-          - type: bgr_to_rgb
-          - type: resize
-            dst_width: 48
-            dst_height: 96
-            use_pillow: True
-            interpolation: ANTIALIAS
-
-        metrics:
-          - name: rank@1
-            type: cmc
-            top_k: 1
-
-          - type: reid_map
diff --git a/tools/accuracy_checker/configs/person-reidentification-retail-0076.yml b/tools/accuracy_checker/configs/person-reidentification-retail-0076.yml
deleted file mode 100644 (file)
index 9439818..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-models:
-  - name: person-reidentification-retail-0076
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - INT8
-        device: CPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-int8.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-int8.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-fp16.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-fp16.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-fp16.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP16_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP11_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP16_ResNet_SqueezeNet_VGG_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.xml
-        weights: Retail/object_redidentification/pedestrian/rmnet_based/0076/dldt/person-reidentification-retail-0076.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP11_ELU.aocx
-
-    datasets:
-      - name: market1501
-        data_source: Market-1501-v15.09.15
-        annoation_conversion:
-          converter: market1501
-          data_dir: Market-1501-v15.09.15
-
-        preprocessing:
-          - type: resize
-            dst_width: 128
-            dst_height: 384
-
-        metrics:
-          - name: rank@1
-            type: cmc
-            top_k: 1
-
-          - type: reid_map
diff --git a/tools/accuracy_checker/configs/person-reidentification-retail-0079.yml b/tools/accuracy_checker/configs/person-reidentification-retail-0079.yml
deleted file mode 100644 (file)
index bd07fa0..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-models:
-  - name: person-reidentification-retail-0079
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - INT8
-        device: CPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-int8.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-int8.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-fp16.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-fp16.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-fp16.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079-fp16.bin
-        adapter: reid
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP16_RMNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.bin
-        adapter: reid
-        bitstream: 2019R1_A10DK_FP11_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP16_RMNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.xml
-        weights: Retail/object_reidentification/pedestrian/rmnet_based/0079/dldt/person-reidentification-retail-0079.bin
-        adapter: reid
-        bitstream: 2019R1_PL1_FP11_ELU.aocx
-
-    datasets:
-      - name: market1501
-        data_source: Market-1501-v15.09.15
-        annoation_conversion:
-          converter: market1501
-          data_dir: Market-1501-v15.09.15
-
-        preprocessing:
-          - type: resize
-            dst_width: 64
-            dst_height: 160
-
-        metrics:
-          - name: rank@1
-            type: cmc
-            top_k: 1
-
-          - type: reid_map
diff --git a/tools/accuracy_checker/configs/resnet50-binary-0001.yml b/tools/accuracy_checker/configs/resnet50-binary-0001.yml
deleted file mode 100644 (file)
index 2291474..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-models:
-  - name: resnet50-binary-0001
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - INT1
-        device: CPU
-        model:   PublicCompressed/classification/resnet50_binary/dldt/resnet50-binary-0001.xml
-        weights: PublicCompressed/classification/resnet50_binary/dldt/resnet50-binary-0001.bin
-        adapter: classification
-
-
-    datasets:
-      - name: imagenet
-        data_source: ImageNet
-        annotation_conversion:
-          converter: imagenet
-          annotation_file: val.txt
-        annotation: imagenet.pickle
-        reader: pillow_imread
-
-        preprocessing:
-        - type: resize
-          size: 256
-          aspect_ratio_scale: greater
-          use_pillow: True
-          interpolation: BILINEAR
-        - type: crop
-          size: 224
-          use_pillow: True
-        - type: bgr_to_rgb
-
-        metrics:
-          - name: accuracy@top1
-            type: accuracy
-            top_k: 1
-          - name: accuracy@top5
-            type: accuracy
-            top_k: 5
diff --git a/tools/accuracy_checker/configs/text-detection-0002.yml b/tools/accuracy_checker/configs/text-detection-0002.yml
deleted file mode 100644 (file)
index d1ebd9b..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-models:
-  - name: text-detection-0002
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-        cpu_extensions: AUTO
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002-fp16.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002-fp16.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002-fp16.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002-fp16.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002-fp16.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002-fp16.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA.CPU
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP16_MobileNet_Clamp.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA.CPU
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP11_MobileNet_Clamp.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA.CPU
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP16_MobileNet_Clamp.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA.CPU
-        model:   Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.xml
-        weights: Retail/object_detection/text/pixel_link_mobilenet_v2/0001/text-detection-0002.bin
-        adapter:
-          type: text_detection
-          pixel_link_out: pixel_link/add_2
-          pixel_class_out: pixel_cls/add_2
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP11_MobileNet_Clamp.aocx
-
-    datasets:
-      - name: ICDAR2015
-
-        data_source: ICDAR15_DET_validation/ch4_test_images
-        annotation_conversion:
-          converter: icdar15_detection
-          data_dir: ICDAR15_DET_validation/gt
-
-        preprocessing:
-          - type: resize
-            dst_width: 1280
-            dst_height: 768
-
-        postprocessing:
-          - type: cast_to_int
-          - type: filter
-            area_range: 300, 980993
-            height_range: 10
-            width_range: 10
-            apply_to: prediction
-            remove_filtered: True
-          - type: clip_points
-            apply_to: prediction
-
-        metrics:
-          - type: text_detection
-            name: f-measure
-            ignore_difficult: True
diff --git a/tools/accuracy_checker/configs/text-recognition-0012.yml b/tools/accuracy_checker/configs/text-recognition-0012.yml
deleted file mode 100644 (file)
index f304517..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-models:
-  - name: text-recognition-0012
-
-    launchers:
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: CPU
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.bin
-        adapter: beam_search_decoder
-        cpu_extensions: AUTO
-
-      - framework: dlsdk
-        tags:
-          - FP32
-        device: GPU
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.bin
-        adapter: beam_search_decoder
-
-      - framework: dlsdk
-        tags:
-          - FP16
-        device: GPU
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012-fp16.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012-fp16.bin
-        adapter: beam_search_decoder
-
-      - framework: dlsdk
-        device: MYRIAD
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012-fp16.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012-fp16.bin
-        adapter: beam_search_decoder
-
-      - framework: dlsdk
-        device: HDDL
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012-fp16.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012-fp16.bin
-        adapter: beam_search_decoder
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.bin
-        adapter: beam_search_decoder
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP16_AlexNet_GoogleNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - A10_devkit
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.bin
-        adapter: beam_search_decoder
-        cpu_extensions: AUTO
-        bitstream: 2019R1_A10DK_FP11_AlexNet_GoogleNet_SqueezeNet.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP16
-        device: HETERO:FPGA,CPU
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.bin
-        adapter: beam_search_decoder
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP16_ResNet_SqueezeNet_VGG_ELU.aocx
-
-      - framework: dlsdk
-        tags:
-          - HDDL-F
-          - FP11
-        device: HETERO:FPGA,CPU
-        model:   Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.xml
-        weights: Retail/text_recognition/bilstm_crnn_bilstm_decoder/0012/dldt/text-recognition-0012.bin
-        adapter: beam_search_decoder
-        cpu_extensions: AUTO
-        bitstream: 2019R1_PL1_FP11_AlexNet_GoogleNet.aocx
-
-    datasets:
-      - name: ICDAR2013
-        data_source: ICDAR13_REC_validation/Challenge2_Test_Task3_Images
-        annotation_conversion:
-          converter: icdar13_recognition
-          annotation_file: ICDAR13_REC_validation/gt/gt.txt.fixed.alfanumeric
-
-        preprocessing:
-          - type: bgr_to_gray
-          - type: resize
-            dst_width: 120
-            dst_height: 32
-
-        metrics:
-          - type: character_recognition_accuracy
diff --git a/tools/accuracy_checker/data/test_data/1.jpg b/tools/accuracy_checker/data/test_data/1.jpg
deleted file mode 100644 (file)
index 20edaae..0000000
Binary files a/tools/accuracy_checker/data/test_data/1.jpg and /dev/null differ
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.bin b/tools/accuracy_checker/data/test_models/SampLeNet.bin
deleted file mode 100644 (file)
index da11860..0000000
Binary files a/tools/accuracy_checker/data/test_models/SampLeNet.bin and /dev/null differ
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.caffemodel b/tools/accuracy_checker/data/test_models/SampLeNet.caffemodel
deleted file mode 100644 (file)
index 274a072..0000000
Binary files a/tools/accuracy_checker/data/test_models/SampLeNet.caffemodel and /dev/null differ
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.prototxt b/tools/accuracy_checker/data/test_models/SampLeNet.prototxt
deleted file mode 100644 (file)
index d6b158f..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-name: "SampLeNet"
-
-layer {
-  name: "data"
-  type: "Input"
-  top: "data"
-  input_param { shape: { dim: 1 dim: 3 dim: 32 dim: 32 } }
-}
-
-layer {
-  name: "conv1"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv1"
-
-  convolution_param {
-    num_output: 6
-    kernel_size: 5
-    stride: 1
-  }
-}
-layer {
-  name: "relu_conv1"
-  type: "ReLU"
-  bottom: "conv1"
-  top: "conv1"
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv1"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-
-layer {
-  name: "conv2"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "conv2"
-
-  convolution_param {
-    num_output: 16
-    kernel_size: 5
-    stride: 1
-  }
-}
-
-layer {
-  name: "relu_conv2"
-  type: "ReLU"
-  bottom: "conv2"
-  top: "conv2"
-}
-layer {
-  name: "pool2"
-  type: "Pooling"
-  bottom: "conv2"
-  top: "pool2"
-
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-
-layer {
-  name: "fc1"
-  type: "InnerProduct"
-  bottom: "pool2"
-  top: "fc1"
-
-  inner_product_param {
-    num_output: 120
-  }
-}
-layer {
-  name: "relu_fc1"
-  type: "ReLU"
-  bottom: "fc1"
-  top: "fc1"
-}
-
-layer {
-  name: "fc2"
-  type: "InnerProduct"
-  bottom: "fc1"
-  top: "fc2"
-
-  inner_product_param {
-    num_output: 84
-  }
-}
-
-layer {
-  name: "relu_fc2"
-  type: "ReLU"
-  bottom: "fc2"
-  top: "fc2"
-}
-
-layer {
-  name: "fc3"
-  type: "InnerProduct"
-  bottom: "fc2"
-  top: "fc3"
-
-  inner_product_param {
-    num_output: 10
-  }
-}
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.xml b/tools/accuracy_checker/data/test_models/SampLeNet.xml
deleted file mode 100644 (file)
index f3d55ee..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-<?xml version="1.0" ?>
-<net batch="1" name="SampLeNet" version="2">
-       <layers>
-               <layer id="0" name="data" precision="FP32" type="Input">
-                       <output>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>3</dim>
-                                       <dim>32</dim>
-                                       <dim>32</dim>
-                               </port>
-                       </output>
-               </layer>
-               <layer id="1" name="conv1" precision="FP32" type="Convolution">
-                       <data dilation-x="1" dilation-y="1" group="1" kernel-x="5" kernel-y="5" output="6" pad-b="0" pad-r="0" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>3</dim>
-                                       <dim>32</dim>
-                                       <dim>32</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="3">
-                                       <dim>1</dim>
-                                       <dim>6</dim>
-                                       <dim>28</dim>
-                                       <dim>28</dim>
-                               </port>
-                       </output>
-                       <blobs>
-                               <weights offset="0" size="1800"/>
-                               <biases offset="1800" size="24"/>
-                       </blobs>
-               </layer>
-               <layer id="2" name="relu_conv1" precision="FP32" type="ReLU">
-                       <data negative_slope="0.0"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>6</dim>
-                                       <dim>28</dim>
-                                       <dim>28</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="1">
-                                       <dim>1</dim>
-                                       <dim>6</dim>
-                                       <dim>28</dim>
-                                       <dim>28</dim>
-                               </port>
-                       </output>
-               </layer>
-               <layer id="3" name="pool1" precision="FP32" type="Pooling">
-                       <data exclude-pad="false" kernel-x="2" kernel-y="2" pad-b="0" pad-r="0" pad-x="0" pad-y="0" pool-method="max" rounding-type="ceil" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>6</dim>
-                                       <dim>28</dim>
-                                       <dim>28</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="1">
-                                       <dim>1</dim>
-                                       <dim>6</dim>
-                                       <dim>14</dim>
-                                       <dim>14</dim>
-                               </port>
-                       </output>
-               </layer>
-               <layer id="4" name="conv2" precision="FP32" type="Convolution">
-                       <data dilation-x="1" dilation-y="1" group="1" kernel-x="5" kernel-y="5" output="16" pad-b="0" pad-r="0" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>6</dim>
-                                       <dim>14</dim>
-                                       <dim>14</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="3">
-                                       <dim>1</dim>
-                                       <dim>16</dim>
-                                       <dim>10</dim>
-                                       <dim>10</dim>
-                               </port>
-                       </output>
-                       <blobs>
-                               <weights offset="1824" size="9600"/>
-                               <biases offset="11424" size="64"/>
-                       </blobs>
-               </layer>
-               <layer id="5" name="relu_conv2" precision="FP32" type="ReLU">
-                       <data negative_slope="0.0"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>16</dim>
-                                       <dim>10</dim>
-                                       <dim>10</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="1">
-                                       <dim>1</dim>
-                                       <dim>16</dim>
-                                       <dim>10</dim>
-                                       <dim>10</dim>
-                               </port>
-                       </output>
-               </layer>
-               <layer id="6" name="pool2" precision="FP32" type="Pooling">
-                       <data exclude-pad="false" kernel-x="2" kernel-y="2" pad-b="0" pad-r="0" pad-x="0" pad-y="0" pool-method="max" rounding-type="ceil" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>16</dim>
-                                       <dim>10</dim>
-                                       <dim>10</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="1">
-                                       <dim>1</dim>
-                                       <dim>16</dim>
-                                       <dim>5</dim>
-                                       <dim>5</dim>
-                               </port>
-                       </output>
-               </layer>
-               <layer id="7" name="fc1" precision="FP32" type="FullyConnected">
-                       <data out-size="120"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>16</dim>
-                                       <dim>5</dim>
-                                       <dim>5</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="3">
-                                       <dim>1</dim>
-                                       <dim>120</dim>
-                               </port>
-                       </output>
-                       <blobs>
-                               <weights offset="11488" size="192000"/>
-                               <biases offset="203488" size="480"/>
-                       </blobs>
-               </layer>
-               <layer id="8" name="relu_fc1" precision="FP32" type="ReLU">
-                       <data negative_slope="0.0"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>120</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="1">
-                                       <dim>1</dim>
-                                       <dim>120</dim>
-                               </port>
-                       </output>
-               </layer>
-               <layer id="9" name="fc2" precision="FP32" type="FullyConnected">
-                       <data out-size="84"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>120</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="3">
-                                       <dim>1</dim>
-                                       <dim>84</dim>
-                               </port>
-                       </output>
-                       <blobs>
-                               <weights offset="203968" size="40320"/>
-                               <biases offset="244288" size="336"/>
-                       </blobs>
-               </layer>
-               <layer id="10" name="relu_fc2" precision="FP32" type="ReLU">
-                       <data negative_slope="0.0"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>84</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="1">
-                                       <dim>1</dim>
-                                       <dim>84</dim>
-                               </port>
-                       </output>
-               </layer>
-               <layer id="11" name="fc3" precision="FP32" type="FullyConnected">
-                       <data out-size="10"/>
-                       <input>
-                               <port id="0">
-                                       <dim>1</dim>
-                                       <dim>84</dim>
-                               </port>
-                       </input>
-                       <output>
-                               <port id="3">
-                                       <dim>1</dim>
-                                       <dim>10</dim>
-                               </port>
-                       </output>
-                       <blobs>
-                               <weights offset="244624" size="3360"/>
-                               <biases offset="247984" size="40"/>
-                       </blobs>
-               </layer>
-       </layers>
-       <edges>
-               <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-               <edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
-               <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-               <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
-               <edge from-layer="4" from-port="3" to-layer="5" to-port="0"/>
-               <edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
-               <edge from-layer="6" from-port="1" to-layer="7" to-port="0"/>
-               <edge from-layer="7" from-port="3" to-layer="8" to-port="0"/>
-               <edge from-layer="8" from-port="1" to-layer="9" to-port="0"/>
-               <edge from-layer="9" from-port="3" to-layer="10" to-port="0"/>
-               <edge from-layer="10" from-port="1" to-layer="11" to-port="0"/>
-       </edges>
-</net>
diff --git a/tools/accuracy_checker/pylint_checkers.py b/tools/accuracy_checker/pylint_checkers.py
deleted file mode 100644 (file)
index a42ccd6..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import astroid
-from pylint.checkers import BaseChecker
-from pylint.interfaces import IAstroidChecker, IRawChecker
-
-
-class BackslashChecker(BaseChecker):
-    """
-    Checks for line continuations with '\' instead of using triple quoted string or parenthesis.
-    """
-
-    __implements__ = IRawChecker
-
-    name = 'backslash'
-    msgs = {
-        'W9901': (
-            'use of \\ for line continuation', 'backslash-line-continuation',
-            'Used when a \\ is used for a line continuation instead of using triple quoted string or parenthesis.'
-        ),
-    }
-    options = ()
-
-    def process_module(self, node):
-        with node.stream() as stream:
-            for (line_number, line) in enumerate(stream):
-                if not line.decode().rstrip().endswith('\\'):
-                    continue
-
-                self.add_message('backslash-line-continuation', line=line_number)
-
-
-class AbsoluteImportsChecker(BaseChecker):
-    """
-    Check for absolute import from the same package.
-    """
-
-    __implements__ = IAstroidChecker
-
-    name = 'absolute-imports'
-    priority = -1
-    msgs = {
-        'W9902': (
-            'absolute import from same package', 'package-absolute-imports',
-            'Used when module of same package imported using absolute import'
-        )
-    }
-
-    def visit_importfrom(self, node):
-        node_package = self._node_package(node)
-        import_name = node.modname
-        if import_name.startswith(node_package):
-            self.add_message('package-absolute-imports', node=node)
-
-    @staticmethod
-    def _node_package(node):
-        return node.scope().name.split('.')[0]
-
-
-class StringFormatChecker(BaseChecker):
-    """
-    Check for absolute import from the same package.
-    """
-
-    __implements__ = IAstroidChecker
-
-    name = 'string-format'
-    priority = -1
-    msgs = {
-        'W9903': (
-            'use of "%" for string formatting', 'deprecated-string-format',
-            '"%" operator is used for string formatting instead of str.format method'
-        )
-    }
-
-    def visit_binop(self, node):
-        if node.op != '%':
-            return
-
-        left = node.left
-        if not (isinstance(left, astroid.Const) and isinstance(left.value, str)):
-            return
-
-        self.add_message('deprecated-string-format', node=node)
-
-
-class BadFunctionChecker(BaseChecker):
-    """
-    Check for absolute import from the same package.
-    """
-
-    __implements__ = IAstroidChecker
-
-    name = 'bad-function'
-    priority = -1
-    msgs = {'W9904': ('using prohibited function', 'bad-function-call', '')}
-
-    options = (
-        (
-            'bad-functions',
-            {
-                'default': '',
-                'help': 'List of prohibited functions',
-            },
-        ),
-    )
-
-    def visit_call(self, node):
-        bad_functions = set(f.strip() for f in self.config.bad_functions.split(','))
-        if self._function_name(node) in bad_functions:
-            self.add_message('bad-function-call', node=node)
-
-    @staticmethod
-    def _function_name(node):
-        func = node.func
-        if hasattr(func, 'attrname'):
-            return func.attrname
-        elif hasattr(func, 'name'):
-            return func.name
-
-
-def register(linter):
-    """
-    Required method to auto register this checker.
-    """
-
-    linter.register_checker(BackslashChecker(linter))
-    linter.register_checker(AbsoluteImportsChecker(linter))
-    linter.register_checker(StringFormatChecker(linter))
-    linter.register_checker(BadFunctionChecker(linter))
diff --git a/tools/accuracy_checker/requirements.txt b/tools/accuracy_checker/requirements.txt
deleted file mode 100644 (file)
index 3775f8b..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-numpy
-cython
-tqdm
-PyYAML
-yamlloader
-pillow
-scikit-learn
-scipy<=0.19
-py-cpuinfo
-shapely
-nibabel
diff --git a/tools/accuracy_checker/sample/README.md b/tools/accuracy_checker/sample/README.md
deleted file mode 100644 (file)
index 60fc9f4..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-Sample
-===========
-
-In this sample we will go through typical steps required to evaluate DL topologies. 
-
-We will try to evaluate **SampLeNet** topology as an example
-
-### 1. Extract dataset
-
-In this sample we will use toy dataset which we refer to as *sample dataset*, which contains 10k images 
-of 10 different classes (classification problem), which is actually CIFAR10 dataset converted to png.
-
-```bash
-tar xvf sample/sample_dataset.tar.gz -C sample
-```
-
-### 2. Evaluate sample topology
-
-Typically you need to write configuration file, describing evaluation process of your topology. 
-There is already config file for evaluating SampLeNet using OpenVINO framework, read it carefully.
-
-```bash
-accuracy_check -c sample/sample_config.yml -m data/test_models -s sample
-```
-
-Used options: `-c` path to evaluation config, `-m` directory where models are stored, `-s` directory where source data (datasets).
-
-If everything worked correctly, you should be able to get `75.02%` accuracy. 
-
-Now try edit config, to run SampLeNet on other plugin of Inference Engine, or go directly to your topology!
diff --git a/tools/accuracy_checker/sample/sample_config.yml b/tools/accuracy_checker/sample/sample_config.yml
deleted file mode 100644 (file)
index b7b7955..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-models:
-  - name: SampLeNet_example
-
-    # list of launchers for your topology.
-    launchers:
-        # launcher framework (e.g. caffe, dlsdk)
-      - framework: dlsdk
-        # device for infer (e.g. for dlsdk cpu, gpu, hetero:cpu,gpu ...)
-        device: CPU
-        # topology IR (*.prototxt for caffe, *.xml for InferenceEngine, etc)
-        # path to topology is prefixed with directory, specified in "-m/--models" option
-        caffe_model:   SampLeNet.prototxt
-        # topology weights binary (*.caffemodel for caffe, *.bin for InferenceEngine)
-        caffe_weights: SampLeNet.caffemodel
-        # launcher returns raw result, so it should be converted
-        # to an appropriate representation with adapter
-        adapter: classification
-        # batch size
-        batch: 1
-
-    # metrics, preprocessing and postprocessing are typically dataset specific, so dataset field
-    # specifies data and all other steps required to validate topology
-    # there is typically definitions file, which contains options for common datasets and which is merged
-    # during evaluation, but since "sample_dataset" is not used anywhere else, this config contains full definition
-    datasets:
-        # uniquely distinguishable name for dataset
-        # note that all other steps are specific for this dataset only
-        # if you need to test topology on multiple datasets, you need to specify
-        # every step explicitly for each dataset
-      - name: sample_dataset
-        # directory where input images are searched.
-        # prefixed with directory specified in "-s/--source" option
-        data_source: sample_dataset/test
-        # parameters for annotation conversion to a common annotation representation format.
-        annotation_conversion:
-          # specified which annotation converter will be used
-          #  In order to do this you need to provide your own annotation converter,
-          # i.e. implement BaseFormatConverter interface.
-          # All annotation converters are stored in accuracy_checker/annotation_converters directory.
-          converter: sample
-          # converter specific parameters.
-          # Full range available options you can find in accuracy_checker/annotation_converters/README.md
-          # relative paths will be merged with "-s/--source" option
-          data_dir: sample_dataset
-
-        # list of preprocessing, applied to each image during validation
-        # order of entries matters
-        preprocessing:
-            # resize input image to topology input size
-            # you may specify size to which image should be resized
-            # via dst_width, dst_height fields
-          - type: resize
-            size: 32
-            # topology is trained on RGB images, but OpenCV reads in BGR
-            # thence it must be converted to RGB
-          - type: bgr_to_rgb
-            # dataset mean and standard deviation
-          - type: normalization
-            # you may specify precomputed statistics manually or use precomputed values, such as ImageNet as well
-            mean: (125.307, 122.961, 113.8575)
-            std: (51.5865, 50.847, 51.255)
-
-        # list of metrics, calculated on dataset
-        metrics:
-          - type: accuracy
-            top_k: 1
diff --git a/tools/accuracy_checker/setup.cfg b/tools/accuracy_checker/setup.cfg
deleted file mode 100644 (file)
index 5d5a13c..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-[flake8]
-max-line-length = 120
-ignore = F401
-
-[isort]
-line_length = 120
-use_parentheses = True
-known_third_party = openvino.inference_engine,caffe,cv2
diff --git a/tools/accuracy_checker/tests/__init__.py b/tools/accuracy_checker/tests/__init__.py
deleted file mode 100644 (file)
index 43d061d..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
diff --git a/tools/accuracy_checker/tests/common.py b/tools/accuracy_checker/tests/common.py
deleted file mode 100644 (file)
index 063a6cd..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from contextlib import contextmanager
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import List
-
-import numpy as np
-
-from accuracy_checker.representation import DetectionAnnotation, DetectionPrediction, SegmentationPrediction, SegmentationAnnotation
-from accuracy_checker.utils import get_path
-
-
-@contextmanager
-# since it seems not possible to create pathlib.Path from str with '/' at the end we accept strings
-# expect paths in posix format
-def mock_filesystem(hierarchy: List[str]):
-    with TemporaryDirectory() as prefix:
-        for entry in hierarchy:
-            path = Path(prefix) / entry
-            if entry.endswith("/"):
-                path.mkdir(parents=True, exist_ok=True)
-            else:
-                parent = path.parent
-                if parent != Path("."):
-                    parent.mkdir(parents=True, exist_ok=True)
-                # create file
-                path.open('w').close()
-
-        yield get_path(prefix, is_directory=True)
-
-
-def make_representation(bounding_boxes, is_ground_truth=False, score=None, meta=None):
-    """
-    Args:
-        bounding_boxes: string or list of strings `score label x0 y0 x1 y1; label score x0 y0 x1 y1; ...`.
-        is_ground_truth: True if bbs are annotation boxes.
-        score: value in [0, 1], if not None, all prediction boxes are considered with the given score.
-        meta: metadata for representation
-    """
-
-    if not isinstance(bounding_boxes, list):
-        bounding_boxes = [bounding_boxes]
-
-    result = []
-    for idx, box in enumerate(bounding_boxes):
-        arr = np.array(np.mat(box))
-
-        if box == "":
-            arr = np.array([]).reshape((0, 5))
-
-        if is_ground_truth or score:
-            assert arr.shape[1] == 5
-        elif not is_ground_truth and not score:
-            assert arr.shape[1] == 6
-
-        if not is_ground_truth and score:
-            score_ = score
-            if np.isscalar(score_) or len(score_) == 1:
-                score_ = np.full(arr.shape[0], score_)
-            arr = np.c_[score_, arr]
-
-        if is_ground_truth:
-            detection = DetectionAnnotation(str(idx), arr[:, 0], arr[:, 1], arr[:, 2], arr[:, 3], arr[:, 4])
-        else:
-            detection = DetectionPrediction(str(idx), arr[:, 1], arr[:, 0], arr[:, 2], arr[:, 3], arr[:, 4], arr[:, 5])
-
-        if meta:
-            detection.metadata = meta[idx]
-
-        result.append(detection)
-
-    return result
-
-
-def make_segmentation_representation(mask, ground_truth=False):
-    if ground_truth:
-        representation = SegmentationAnnotation('identifier', None)
-        representation.mask = mask
-        return [representation]
-
-    return [SegmentationPrediction('identifier', mask)]
-
-
-def update_dict(dictionary, **kwargs):
-    copied = dictionary.copy()
-    copied.update(**kwargs)
-
-    return copied
-
-
-class DummyDataset:
-    def __init__(self, label_map, bg=-1):
-        self.label_map = label_map
-        self.background = bg
-        self.name = 'dummy'
-
-    @property
-    def metadata(self):
-        return {"label_map": self.label_map, "background_label": self.background}
-
-    @property
-    def labels(self):
-        return self.metadata['label_map']
-
-
-def multi_class_dataset():
-    labels = {0: 'dog', 1: 'cat', 2: 'human', -1: 'background'}
-    return DummyDataset(label_map=labels, bg=-1)
-
-
-def multi_class_dataset_without_background():
-    labels = {0: 'dog', 1: 'cat', 2: 'human'}
-    return DummyDataset(label_map=labels)
-
-
-def single_class_dataset():
-    labels = {0: 'dog', -1: 'background'}
-    return DummyDataset(label_map=labels, bg=-1)
diff --git a/tools/accuracy_checker/tests/conftest.py b/tools/accuracy_checker/tests/conftest.py
deleted file mode 100644 (file)
index 7657240..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import os
-from pathlib import Path
-
-import pytest
-
-test_root = Path(__file__).parent
-project_root = test_root.parent
-
-
-def pytest_addoption(parser):
-    parser.addoption(
-        "--caffe_logging", action="store_true", default=False, help="Enable Google log"
-    )
-
-
-def pytest_configure(config):
-    if not config.getoption('caffe_logging'):
-        os.environ['GLOG_minloglevel'] = '2'
-
-
-@pytest.fixture
-def data_dir():
-    return project_root / 'data' / 'test_data'
-
-
-@pytest.fixture
-def models_dir():
-    return project_root / 'data' / 'test_models'
-
-
-@pytest.fixture
-def mock_path_exists(mocker):
-    mocker.patch('pathlib.Path.exists', return_value=True)
-    mocker.patch('pathlib.Path.is_dir', return_value=True)
-    mocker.patch('pathlib.Path.is_file', return_value=True)
-    mocker.patch('os.path.exists', return_value=True)
diff --git a/tools/accuracy_checker/tests/test_adapters.py b/tools/accuracy_checker/tests/test_adapters.py
deleted file mode 100644 (file)
index 3e36313..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-import pytest
-
-from accuracy_checker.adapters import SSDAdapter, Adapter
-from accuracy_checker.config import ConfigError
-from .common import make_representation
-
-
-def test_detection_adapter():
-    raw = {
-        'detection_out': np.array([[[[0, 3, 0.2, 0, 0, 1, 1], [0, 2, 0.5, 4, 4, 7, 7], [0, 5, 0.7, 3, 3, 9, 8]]]])
-    }
-    expected = make_representation('0.2,3,0,0,1,1;0.5,2,4,4,7,7;0.7,5,3,3,9,8')
-
-    actual = SSDAdapter({}, output_blob='detection_out').process([raw], ['0'], [{}])
-
-    assert np.array_equal(actual, expected)
-
-
-def test_detection_adapter_partially_filling_output_blob():
-    raw = {
-        'detection_out': np.array(
-            [[[[0, 3, 0.2, 0, 0, 1, 1], [0, 2, 0.5, 4, 4, 7, 7], [0, 5, 0.7, 3, 3, 9, 8], [-1, 0, 0, 0, 0, 0, 0]]]]
-        )
-    }
-    expected = make_representation('0.2,3,0,0,1,1;0.5,2,4,4,7,7;0.7,5,3,3,9,8')
-
-    actual = SSDAdapter({}, output_blob='detection_out').process([raw], ['0'])
-
-    assert np.array_equal(actual, expected)
-
-
-def test_detection_adapter_partially_filling_output_blob_with_zeros_at_the_end():
-    raw = {
-        'detection_out': np.array([[[
-            [0,  3, 0.2, 0, 0, 1, 1],
-            [0,  2, 0.5, 4, 4, 7, 7],
-            [0,  5, 0.7, 3, 3, 9, 8],
-            [-1, 0, 0,   0, 0, 0, 0],
-            [0,  0, 0,   0, 0, 0, 0]
-        ]]])
-    }
-    expected = make_representation('0.2,3,0,0,1,1;0.5,2,4,4,7,7;0.7,5,3,3,9,8')
-
-    actual = SSDAdapter({}, output_blob='detection_out').process([raw], ['0'])
-
-    assert np.array_equal(actual, expected)
-
-
-def test_detection_adapter_batch_2():
-    raw = {
-        'detection_out': np.array([[[[0, 3, 0.2, 0, 0, 1, 1], [0, 2, 0.5, 4, 4, 7, 7], [1, 5, 0.7, 3, 3, 9, 8]]]])
-    }
-    expected = make_representation(['0.2,3,0,0,1,1;0.5,2,4,4,7,7', '0.7,5,3,3,9,8'])
-
-    actual = SSDAdapter({}, output_blob='detection_out').process([raw], ['0', '1'])
-
-    assert np.array_equal(actual, expected)
-
-
-def test_dictionary_adapter_no_raise_warning_on_specific_args():
-    adapter_config = {'type': 'age_gender', 'gender_out': 'gender', 'age_out': 'age'}
-    with pytest.warns(None) as record:
-        Adapter.provide('age_gender', adapter_config)
-        assert len(record) == 0
-
-
-def test_age_gender_adapter_raise_config_error_on_extra_args():
-    adapter_config = {'type': 'age_gender', 'gender_out': 'gender', 'age_out': 'age', 'something_extra': 'extra'}
-    with pytest.raises(ConfigError):
-        Adapter.provide('age_gender', adapter_config)
-
-
-def test_face_person_detection_adapter_raise_config_error_on_extra_args():
-    adapter_config = {
-        'type': 'face_person_detection',
-        'face_detection_out': 'face',
-        'person_detection_out': 'person',
-        'something_extra': 'extra'
-    }
-    with pytest.raises(ConfigError):
-        Adapter.provide('face_person_detection', adapter_config)
-
-
-def test_head_pose_adapter_raise_config_error_on_extra_args():
-    adapter_config = {
-        'type': 'head_pose',
-        'angle_yaw': 'yaw',
-        'angle_pitch': 'pitch',
-        'angle_roll': 'roll',
-        'something_extra': 'extra'
-    }
-    with pytest.raises(ConfigError):
-        Adapter.provide('head_pose', adapter_config)
-
-
-def test_vehicle_attributes_adapter_raise_config_error_on_extra_args():
-    adapter_config = {
-        'type': 'vehicle_attributes',
-        'color_out': 'color',
-        'type_out': 'type',
-        'something_extra': 'extra'
-    }
-    with pytest.raises(ConfigError):
-        Adapter.provide('vehicle_attributes', adapter_config)
diff --git a/tools/accuracy_checker/tests/test_caffe_launcher.py b/tools/accuracy_checker/tests/test_caffe_launcher.py
deleted file mode 100644 (file)
index 77a5cdf..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import pytest
-pytest.importorskip('accuracy_checker.launcher.caffe_launcher')
-
-import cv2
-import numpy as np
-
-from accuracy_checker.launcher.launcher import create_launcher
-from accuracy_checker.config import ConfigError
-from accuracy_checker.data_readers import DataRepresentation
-
-
-def get_caffe_test_model(models_dir):
-    config = {
-        "framework": "caffe",
-        "weights": str(models_dir / "SampLeNet.caffemodel"),
-        "model": str(models_dir / "SampLeNet.prototxt"),
-        "adapter": 'classification',
-        "device": "cpu"
-    }
-
-    return create_launcher(config)
-
-
-class TestCaffeLauncher:
-    def test_launcher_creates(self, models_dir):
-        assert get_caffe_test_model(models_dir).inputs['data'] == (3, 32, 32)
-
-    def test_infer(self, data_dir, models_dir):
-        caffe_test_model = get_caffe_test_model(models_dir)
-        c, h, w = caffe_test_model.inputs['data']
-        img_raw = cv2.imread(str(data_dir / '1.jpg'))
-        img_resized = cv2.resize(img_raw, (w, h))
-        input_blob = np.transpose([img_resized], (0, 3, 1, 2))
-        res = caffe_test_model.predict([{'data': input_blob.astype(np.float32)}], [{}])
-
-        assert np.argmax(res[0]['fc3']) == 6
-
-    def test_caffe_launcher_provide_input_shape_to_adapter(self, mocker, models_dir):
-        mocker.patch('caffe.Net.forward', return_value={'fc3': 0})
-        launcher = get_caffe_test_model(models_dir)
-        zeros = DataRepresentation(np.zeros((1, 3, 32, 32)))
-        launcher.predict([{'data': zeros.data}], [zeros.metadata])
-        assert zeros.metadata['input_shape'] == {'data': (3, 32, 32)}
-
-
-def test_missed_model_in_create_caffe_launcher_raises_config_error_exception():
-    launcher = {'framework': 'caffe', 'weights': 'custom', 'adapter': 'classification'}
-
-    with pytest.raises(ConfigError):
-        create_launcher(launcher)
-
-
-def test_missed_weights_in_create_caffe_launcher_raises_config_error_exception():
-    launcher = {'framework': 'caffe', 'model': 'custom', 'adapter': 'ssd'}
-
-    with pytest.raises(ConfigError):
-        create_launcher(launcher)
-
-
-def dummy_adapter():
-    pass
diff --git a/tools/accuracy_checker/tests/test_config_reader.py b/tools/accuracy_checker/tests/test_config_reader.py
deleted file mode 100644 (file)
index b03e23a..0000000
+++ /dev/null
@@ -1,1295 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import copy
-from pathlib import Path
-from argparse import Namespace
-
-import pytest
-from accuracy_checker.config import ConfigReader, ConfigError
-
-
-class TestConfigReader:
-    def setup_method(self):
-        self.global_launchers = [
-            {
-                'framework': 'dlsdk',
-                'device': 'fpga',
-                'cpu_extensions': 'dlsdk_shared.so',
-                'bitstream': 'bitstream'
-            },
-            {
-                'framework': 'caffe',
-                'device': 'gpu_0'
-            }
-        ]
-
-        self.global_datasets = [
-            {
-                'name': 'global_dataset',
-                'annotation': Path('/pascal_voc_2007_annotation.pickle'),
-                'data_source': Path('/VOCdevkit/VOC2007/JPEGImages'),
-                'preprocessing': [
-                    {
-                        'type': 'resize',
-                        'interpolation': 'mean_image',
-                    },
-                    {
-                        'type': 'normalization',
-                        'mean': 'voc',
-                    }
-                ],
-                'metrics': [{
-                    'type': 'fppi',
-                    'mr_rates': [0.0, 0.1]
-                }],
-                'postprocessing': [
-                    {
-                        'type': 'filter',
-                        'labels': ['dog', 'airplane'],
-                        'min_confidence': 0.05,
-                        'min_box_size': 60,
-                    },
-                    {
-                        'type': 'nms',
-                        'overlap': 0.5
-                    }
-                ]
-            }
-        ]
-
-        self.global_config = {
-            'launchers': self.global_launchers,
-            'datasets': self.global_datasets
-        }
-
-        self.module = 'accuracy_checker.config.ConfigReader'
-        self.arguments = Namespace(**{
-            'models': Path('models'),
-            'extensions': Path('extensions'),
-            'source': Path('source'),
-            'annotations': Path('annotations'),
-            'converted_models': Path('converted_models'),
-            'model_optimizer': Path('model_optimizer'),
-            'bitstreams': Path('bitstreams'),
-            'definitions': None,
-            'stored_predictions': None,
-            'tf_custom_op_config': None,
-            'tf_obj_detection_api_pipeline_config_path': None,
-            'progress': 'bar',
-            'target_framework': None,
-            'target_devices': None,
-            'log_file': None,
-            'target_tags': None,
-            'cpu_extensions_mode': None,
-            'aocl': None
-        })
-
-    def test_read_configs_without_global_config(self, mocker):
-        config = {'models': [{
-            'name': 'model',
-            'launchers': [{'framework': 'dlsdk', 'model': Path('/absolute_path'), 'weights': Path('/absolute_path')}],
-            'datasets': [{'name': 'global_dataset'}]
-        }]}
-        empty_args = Namespace(**{
-            'models': None, 'extensions': None, 'source': None, 'annotations': None,
-            'converted_models': None, 'model_optimizer': None, 'bitstreams': None,
-            'definitions': None, 'config': None, 'stored_predictions': None, 'tf_custom_op_config': None,
-            'progress': 'bar', 'target_framework': None, 'target_devices': None, 'log_file': None,
-            'tf_obj_detection_api_pipeline_config_path': None, 'target_tags': None, 'cpu_extensions_mode': None,
-            'aocl': None
-        })
-        mocker.patch('accuracy_checker.utils.get_path', return_value=Path.cwd())
-        mocker.patch('yaml.load', return_value=config)
-        mocker.patch('pathlib.Path.open')
-
-        result = ConfigReader.merge(empty_args)
-
-        assert 'models' == result[1]
-        assert config == result[0]
-
-    def test_empty_local_config_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Missing local config'
-
-    def test_missed_models_in_local_config_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'not_models': 'custom'}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Missed "{}" in local config'.format('models')
-
-    def test_empty_models_in_local_config_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'models': []}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Missed "{}" in local config'.format('models')
-
-    def test_missed_name_in_model_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'models': [{'launchers': None, 'datasets': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each model must specify {}'.format(', '.join(['name', 'launchers', 'datasets']))
-
-    def test_missed_launchers_in_model_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'models': [{'name': None, 'datasets': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each model must specify {}'.format(', '.join(['name', 'launchers', 'datasets']))
-
-    def test_missed_datasets_in_model_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'models': [{'name': None, 'launchers': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each model must specify {}'.format(', '.join(['name', 'launchers', 'datasets']))
-
-    def test_invalid_model_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'models': [{'name': None, 'launchers': None, 'datasets': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each model must specify {}'.format(', '.join(['name', 'launchers', 'datasets']))
-
-    def test_empty_pipeline_in_local_config_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'pipelines': []}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Missed "{}" in local config'.format('pipelines')
-
-    def test_missed_name_in_pipeline_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'pipelines': [{'device_info': None, 'stages': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each pipeline must specify {}'.format(', '.join(['name', 'device_info', 'stages']))
-
-    def test_missed_device_info_in_pipeline_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'pipelines': [{'name': None, 'stages': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each pipeline must specify {}'.format(', '.join(['name', 'device_info', 'stages']))
-
-    def test_missed_stages_in_pipeline_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'pipelines': [{'name': None, 'device_info': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each pipeline must specify {}'.format(', '.join(['name', 'device_info', 'stages']))
-
-    def test_invalid_pipeline_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'pipelines': [{'name': None, 'device_info': None, 'stages': None}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each pipeline must specify {}'.format(', '.join(['name', 'device_info', 'stages']))
-
-    def test_pipeline_empty_stages_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'pipelines': [{'name': 'stage1', 'device_info': [{'framework': 'caffe', 'device': 'CPU'}], 'stages': []}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each pipeline must specify {}'.format(', '.join(['name', 'device_info', 'stages']))
-
-    def test_pipeline_empty_device_info_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {'pipelines': [{'name': 'stage1', 'device_info': [], 'stages': [{'stage1': {}}]}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Each pipeline must specify {}'.format(', '.join(['name', 'device_info', 'stages']))
-
-    def test_pipeline_stage_does_not_contain_dataset_raises_value_error_exception(self, mocker):
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {
-                'pipelines': [{'name': 'stage1', 'device_info': [{'framework': 'caffe', 'device': 'CPU'}],
-                               'stages': [{'stage': 'stage1'}]}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'First stage should contain dataset'
-
-    def test_pipeline_contains_several_datasets_raises_value_error_exception(self, mocker):
-        dataset_config = {
-            'name': 'global_dataset',
-            'dataset_meta': 'relative_annotation_path',
-            'data_source': 'relative_source_path',
-            'segmentation_masks_source': 'relative_source_path',
-            'annotation': 'relative_annotation_path'
-        }
-        launcher_config = {'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}
-        pipelines_config = [
-            {'name': 'pipeline', 'device_info': [{'framework': 'caffe', 'device': 'CPU'}],
-             'stages': [{'stage': 'stage1', 'dataset': dataset_config},
-                        {'stage': 'stage2', 'dataset': dataset_config, 'launcher': launcher_config, 'metrics': {}}
-                        ]
-             }
-        ]
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {
-                'pipelines': pipelines_config}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Exactly one dataset per pipeline is supported'
-
-    def test_pipeline_without_launchers_raises_value_error_exception(self, mocker):
-        dataset_config = {
-                'name': 'global_dataset',
-                'dataset_meta': 'relative_annotation_path',
-                'data_source': 'relative_source_path',
-                'segmentation_masks_source': 'relative_source_path',
-                'annotation': 'relative_annotation_path'
-            }
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, {
-                'pipelines': [{'name': 'stage1', 'device_info': [{'framework': 'caffe', 'device': 'CPU'}],
-                               'stages': [{'stage': 'stage1', 'dataset': dataset_config}]}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Launchers are not specified'
-
-    def test_pipeline_without_metrics_raises_value_error_exception(self, mocker):
-        dataset_config = {
-                'name': 'global_dataset',
-                'dataset_meta': 'relative_annotation_path',
-                'annotation': 'relative_annotation_path'
-            }
-        launcher_config = {'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}
-        mocker.patch(self.module + '._read_configs', return_value=(
-            None, {
-                'pipelines': [{'name': 'stage1', 'device_info': [{'framework': 'caffe', 'device': 'CPU'}],
-                               'stages': [{'stage': 'stage1', 'dataset': dataset_config, 'launcher': launcher_config}]}]}
-        ))
-
-        with pytest.raises(ConfigError) as exception:
-            ConfigReader.merge(self.arguments)
-
-        error_message = str(exception).split(sep=': ')[-1]
-        assert error_message == 'Metrics are not specified'
-
-    def test_merge_datasets_with_definitions(self, mocker):
-        local_config = {'models': [{
-            'name': 'model',
-            'launchers': [{'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}],
-            'datasets': [{'name': 'global_dataset'}]
-        }]}
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, local_config
-        ))
-        arguments = copy.deepcopy(self.arguments)
-        arguments.model_optimizer = None
-
-        config = ConfigReader.merge(arguments)[0]
-
-        assert config['models'][0]['datasets'][0] == self.global_datasets[0]
-
-    def test_merge_datasets_with_definitions_and_meta_is_not_modified(self, mocker):
-        local_config = {'models': [{
-            'name': 'model',
-            'launchers': [{'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}],
-            'datasets': [{'name': 'global_dataset', 'dataset_meta': '/absolute_path'}]
-        }]}
-        expected = self.global_datasets[0]
-        expected['dataset_meta'] = Path('/absolute_path')
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, local_config
-        ))
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        assert config['models'][0]['datasets'][0] == expected
-
-    def test_expand_relative_paths_in_datasets_config_using_command_line(self, mocker):
-        local_config = {'models': [{
-            'name': 'model',
-            'launchers': [{'framework': 'caffe'}],
-            'datasets': [{
-                'name': 'global_dataset',
-                'dataset_meta': 'relative_annotation_path',
-                'data_source': 'relative_source_path',
-                'segmentation_masks_source': 'relative_source_path',
-                'annotation': 'relative_annotation_path'
-            }]
-        }]}
-
-        mocker.patch(self.module + '._read_configs', return_value=(
-            None, local_config
-        ))
-        expected = copy.deepcopy(local_config['models'][0]['datasets'][0])
-        expected['annotation'] = self.arguments.annotations / 'relative_annotation_path'
-        expected['dataset_meta'] = self.arguments.annotations / 'relative_annotation_path'
-        expected['segmentation_masks_source'] = self.arguments.source / 'relative_source_path'
-        expected['data_source'] = self.arguments.source / 'relative_source_path'
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        assert config['models'][0]['datasets'][0] == expected
-
-    def test_not_modify_absolute_paths_in_datasets_config_using_command_line(self):
-        local_config = {'models': [{
-            'name': 'model',
-            'datasets': [{
-                'name': 'global_dataset',
-                'dataset_meta': '/absolute_annotation_meta_path',
-                'data_source': '/absolute_source_path',
-                'annotation': '/absolute_annotation_path',
-            }]
-        }]}
-
-        expected = copy.deepcopy(local_config['models'][0]['datasets'][0])
-        expected['annotation'] = Path('/absolute_annotation_path')
-        expected['dataset_meta'] = Path('/absolute_annotation_meta_path')
-        expected['data_source'] = Path('/absolute_source_path')
-
-        ConfigReader._merge_paths_with_prefixes(self.arguments, local_config)
-
-        assert local_config['models'][0]['datasets'][0] == expected
-
-    def test_expand_relative_paths_in_pipeline_stage_dataset_config_using_command_line(self, mocker):
-        dataset_config = {
-                'name': 'global_dataset',
-                'dataset_meta': 'relative_annotation_path',
-                'data_source': 'relative_source_path',
-                'segmentation_masks_source': 'relative_source_path',
-                'annotation': 'relative_annotation_path'
-            }
-        launcher_config = {'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}
-        pipelines_config = [
-            {
-                'name': 'pipeline', 'device_info': [{'framework': 'caffe', 'device': 'CPU'}],
-                'stages': [
-                    {'stage': 'stage1', 'dataset': dataset_config},
-                    {'stage': 'stage2', 'launcher': launcher_config, 'metrics': {}}
-                ]
-            }
-        ]
-        mocker.patch(self.module + '._read_configs', return_value=(
-            None, {
-                'pipelines': pipelines_config}
-        ))
-
-        expected = copy.deepcopy(dataset_config)
-        expected['annotation'] = self.arguments.annotations / 'relative_annotation_path'
-        expected['dataset_meta'] = self.arguments.annotations / 'relative_annotation_path'
-        expected['segmentation_masks_source'] = self.arguments.source / 'relative_source_path'
-        expected['data_source'] = self.arguments.source / 'relative_source_path'
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        assert config['pipelines'][0]['stages'][0]['dataset'] == expected
-
-    def test_not_modify_absolute_paths_in_pipeline_stage_dataset_config_using_command_line(self, mocker):
-        dataset_config = {
-            'name': 'global_dataset',
-            'dataset_meta': '/absolute_annotation_meta_path',
-            'data_source': '/absolute_source_path',
-            'annotation': '/absolute_annotation_path'
-        }
-        launcher_config = {'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}
-        pipelines_config = [
-            {
-                'name': 'pipeline', 'device_info': [{'device': 'CPU'}],
-                'stages': [
-                    {'stage': 'stage1', 'dataset': dataset_config},
-                    {'stage': 'stage2', 'launcher': launcher_config, 'metrics': {}}
-                ]
-            }
-        ]
-        mocker.patch(self.module + '._read_configs', return_value=(
-            None, {
-                'pipelines': pipelines_config}
-        ))
-
-        expected = copy.deepcopy(dataset_config)
-        expected['annotation'] = Path('/absolute_annotation_path')
-        expected['dataset_meta'] = Path('/absolute_annotation_meta_path')
-        expected['data_source'] = Path('/absolute_source_path')
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        assert config['pipelines'][0]['stages'][0]['dataset'] == expected
-
-    def test_merge_launcher_with_device_info(self, mocker):
-        dataset_config = {
-            'name': 'global_dataset',
-            'dataset_meta': '/absolute_annotation_meta_path',
-            'data_source': '/absolute_source_path',
-            'annotation': '/absolute_annotation_path'
-        }
-        launcher_config = {'framework': 'caffe', 'model': Path('/absolute_path'), 'weights': Path('/absolute_path')}
-        device_info = {'device': 'CPU'}
-        expected = copy.deepcopy(launcher_config)
-        expected.update(device_info)
-        pipelines_config = [
-            {
-                'name': 'pipeline', 'device_info': [device_info],
-                'stages': [
-                    {'stage': 'stage1', 'dataset': dataset_config},
-                    {'stage': 'stage2', 'launcher': launcher_config, 'metrics': {}}
-                ]
-            }
-        ]
-        mocker.patch(self.module + '._read_configs', return_value=(
-            None, {
-                'pipelines': pipelines_config}
-        ))
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        assert config['pipelines'][0]['stages'][1]['launcher'] == expected
-
-    def test_merge_launcher_with_2_device_info(self, mocker):
-        dataset_config = {
-            'name': 'global_dataset',
-            'dataset_meta': '/absolute_annotation_meta_path',
-            'data_source': '/absolute_source_path',
-            'annotation': '/absolute_annotation_path'
-        }
-        launcher_config = {'framework': 'caffe', 'model': Path('/absolute_path'), 'weights': Path('/absolute_path')}
-        device_info = [{'device': 'CPU'}, {'device': 'GPU'}]
-        expected = [copy.deepcopy(launcher_config), copy.deepcopy(launcher_config)]
-        expected[0].update(device_info[0])
-        expected[1].update(device_info[1])
-        pipelines_config = [
-            {
-                'name': 'pipeline', 'device_info': device_info,
-                'stages': [
-                    {'stage': 'stage1', 'dataset': dataset_config},
-                    {'stage': 'stage2', 'launcher': launcher_config, 'metrics': {}}
-                ]
-            }
-        ]
-        mocker.patch(self.module + '._read_configs', return_value=(
-            None, {
-                'pipelines': pipelines_config}
-        ))
-
-        config = ConfigReader.merge(self.arguments)[0]
-        assert len(config['pipelines']) == 2
-        assert config['pipelines'][0]['stages'][1]['launcher'] == expected[0]
-        assert config['pipelines'][1]['stages'][1]['launcher'] == expected[1]
-
-    def test_merge_launchers_with_definitions(self, mocker):
-        local_config = {'models': [{
-            'name': 'model',
-            'launchers': [{'framework': 'dlsdk'}],
-            'datasets': [{'name': 'global_dataset'}]
-        }]}
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, local_config
-        ))
-        expected = copy.deepcopy(self.get_global_launcher('dlsdk'))
-        expected['bitstream'] = self.arguments.bitstreams / expected['bitstream']
-        expected['cpu_extensions'] = self.arguments.extensions / expected['cpu_extensions']
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.models = None
-
-        config = ConfigReader.merge(args)[0]
-
-        assert config['models'][0]['launchers'][0] == expected
-
-    def test_merge_launchers_with_model_is_not_modified(self, mocker):
-        local_config = {'models': [{
-            'name': 'model',
-            'launchers': [{'framework': 'dlsdk', 'model': 'custom'}],
-            'datasets': [{'name': 'global_dataset'}]
-        }]}
-        expected = copy.deepcopy(self.get_global_launcher('dlsdk'))
-        expected['model'] = 'custom'
-        expected['bitstream'] = self.arguments.bitstreams / expected['bitstream']
-        expected['cpu_extensions'] = self.arguments.extensions / expected['cpu_extensions']
-        mocker.patch(self.module + '._read_configs', return_value=(
-            self.global_config, local_config
-        ))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.models = None
-        args.converted_models = None
-        config = ConfigReader.merge(args)[0]
-
-        assert config['models'][0]['launchers'][0] == expected
-
-    def test_expand_relative_paths_in_launchers_config_using_command_line(self, mocker):
-        local_config = {'models': [{
-            'name': 'model',
-            'launchers': [{
-                'framework': 'dlsdk',
-                'model': 'relative_model_path',
-                'weights': 'relative_weights_path',
-                'cpu_extensions': 'relative_extensions_path',
-                'gpu_extensions': 'relative_extensions_path',
-                'caffe_model': 'relative_model_path',
-                'caffe_weights': 'relative_weights_path',
-                'tf_model': 'relative_model_path',
-                'mxnet_weights': 'relative_weights_path',
-                'bitstream': 'relative_bitstreams_path'
-            }],
-            'datasets': [{'name': 'dataset'}]
-        }]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-
-        expected = copy.deepcopy(local_config['models'][0]['launchers'][0])
-        expected['model'] = self.arguments.models / 'relative_model_path'
-        expected['caffe_model'] = self.arguments.models / 'relative_model_path'
-        expected['tf_model'] = self.arguments.models / 'relative_model_path'
-        expected['weights'] = self.arguments.models / 'relative_weights_path'
-        expected['caffe_weights'] = self.arguments.models / 'relative_weights_path'
-        expected['mxnet_weights'] = self.arguments.models / 'relative_weights_path'
-        expected['cpu_extensions'] = self.arguments.extensions / 'relative_extensions_path'
-        expected['gpu_extensions'] = self.arguments.extensions / 'relative_extensions_path'
-        expected['bitstream'] = self.arguments.bitstreams / 'relative_bitstreams_path'
-        expected['_models_prefix'] = self.arguments.models
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        config = ConfigReader.merge(args)[0]
-
-        assert config['models'][0]['launchers'][0] == expected
-
-    def test_both_launchers_are_filtered_by_target_tags_if_tags_not_provided_in_config(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': '/absolute_path1',
-                'weights': '/absolute_path1',
-                'adapter': 'classification',
-                'device': 'CPU',
-            },
-            {
-                'framework': 'dlsdk',
-                'model': '/absolute_path2',
-                'weights': '/absolute_path2',
-                'adapter': 'classification',
-                'device': 'GPU',
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        self.arguments.target_tags = ['some_tag']
-
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-
-        with pytest.warns(Warning):
-            config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 0
-
-    def test_launcher_is_not_filtered_by_the_same_tag(self, mocker):
-        config_launchers = [{
-            'framework': 'dlsdk',
-            'tags': ['some_tag'],
-            'model': Path('/absolute_path1'),
-            'weights': Path('/absolute_path1'),
-            'adapter': 'classification',
-            'device': 'CPU',
-            '_model_optimizer': self.arguments.model_optimizer,
-            '_models_prefix': self.arguments.models
-        }]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_tags = ['some_tag']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers[0] == config_launchers[0]
-
-    def test_both_launchers_are_not_filtered_by_the_same_tag(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'tags': ['some_tag'],
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'dlsdk',
-                'tags': ['some_tag'],
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_tags = ['some_tag']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == config_launchers
-
-    def test_both_launchers_are_filtered_by_another_tag(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'tags': ['some_tag'],
-                'model': '/absolute_path1',
-                'weights': '/absolute_path1',
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'dlsdk',
-                'tags': ['some_tag'],
-                'model': '/absolute_path2',
-                'weights': '/absolute_path2',
-                'adapter': 'classification',
-                'device': 'GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_tags = ['other_tag']
-
-        with pytest.warns(Warning):
-            config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 0
-
-    def test_only_appropriate_launcher_is_filtered_by_another_tag(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'tags': ['tag1'],
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'tags': ['tag2'],
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        self.arguments.target_tags = ['tag2']
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 1
-        assert launchers[0] == config_launchers[1]
-
-    def test_only_appropriate_launcher_is_filtered_by_another_tag_if_provided_several_target_tags(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'tags': ['tag1'],
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'tags': ['tag2'],
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        self.arguments.target_tags = ['tag2', 'tag3']
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 1
-        assert launchers[0] == config_launchers[1]
-
-    def test_launcher_with_several_tags_contained_at_least_one_from_target_tegs_is_not_filtered(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'tags': ['tag1', 'tag2'],
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_tags = ['tag2']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 1
-        assert launchers[0] == config_launchers[0]
-
-    def test_both_launchers_with_different_tags_are_not_filtered_by_the_same_tags(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'tags': ['tag1'],
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'dlsdk',
-                'tags': ['tag2'],
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_tags = ['tag1', 'tag2']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == config_launchers
-
-    def test_launcher_is_not_filtered_by_the_same_framework(self, mocker):
-        config_launchers = [{
-            'framework': 'dlsdk',
-            'model': Path('/absolute_path1'),
-            'weights': Path('/absolute_path1'),
-            'adapter': 'classification',
-            'device': 'CPU',
-            '_model_optimizer': self.arguments.model_optimizer,
-            '_models_prefix': self.arguments.models
-        }]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_framework = 'dlsdk'
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == config_launchers
-
-    def test_both_launchers_are_not_filtered_by_the_same_framework(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_framework = 'dlsdk'
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == config_launchers
-
-    def test_launcher_is_filtered_by_another_framework(self, mocker):
-        config_launchers = [{
-            'framework': 'dlsdk',
-            'model': Path('/absolute_path'),
-            'weights': Path('/absolute_path'),
-            'adapter': 'classification',
-            '_model_optimizer': self.arguments.model_optimizer,
-            '_models_prefix': self.arguments.models
-        }]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        self.arguments.target_framework = 'caffe'
-
-        with pytest.warns(Warning):
-            config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 0
-
-    def test_both_launchers_are_filtered_by_another_framework(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': '/absolute_path1',
-                'weights': '/absolute_path1',
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'dlsdk',
-                'model': '/absolute_path2',
-                'weights': '/absolute_path2',
-                'adapter': 'classification',
-                'device': 'GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        self.arguments.target_framework = 'caffe'
-
-        with pytest.warns(Warning):
-            config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 0
-
-    def test_only_appropriate_launcher_is_filtered_by_another_framework(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU'
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        self.arguments.target_framework = 'caffe'
-
-        config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 1
-        assert launchers[0] == config_launchers[1]
-
-    def test_launcher_is_not_filtered_by_the_same_device(self, mocker):
-        config_launchers = [{
-            'framework': 'dlsdk',
-            'model': Path('/absolute_path1'),
-            'weights': Path('/absolute_path1'),
-            'adapter': 'classification',
-            'device': 'CPU',
-            '_model_optimizer': self.arguments.model_optimizer,
-            '_models_prefix': self.arguments.models
-        }]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.model_optimizer = None
-        args.converted_models = None
-        args.target_devices = ['CPU']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == config_launchers
-
-    def test_both_launchers_are_not_filtered_by_the_same_device(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'CPU'
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.converted_models = None
-        args.target_devices = ['CPU']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == config_launchers
-
-    def test_launcher_is_filtered_by_another_device(self, mocker):
-        config_launchers = [{
-            'framework': 'dlsdk',
-            'model': Path('/absolute_path1'),
-            'weights': Path('/absolute_path1'),
-            'adapter': 'classification',
-            'device': 'CPU',
-            '_model_optimizer': self.arguments.model_optimizer,
-            '_models_prefix': self.arguments.models
-        }]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.converted_models = None
-        args.target_devices = ['GPU']
-
-        with pytest.warns(Warning):
-            config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 0
-
-    def test_both_launchers_are_filtered_by_another_device(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'CPU'
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        self.arguments.target_devices = ['GPU']
-
-        with pytest.warns(Warning):
-            config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 0
-
-    def test_only_appropriate_launcher_is_filtered_by_another_device(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU'
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.converted_models = None
-        args.target_devices = ['GPU']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 1
-        assert launchers[0] == config_launchers[1]
-
-    def test_only_appropriate_launcher_is_filtered_by_user_input_devices(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'HETERO:CPU,GPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU',
-            }
-        ]
-
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.converted_models = None
-        args.target_devices = ['GPU', 'CPU']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == [config_launchers[0], config_launchers[2]]
-
-    def test_both_launchers_are_filtered_by_other_devices(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': '/absolute_path1',
-                'weights': '/absolute_path1',
-                'adapter': 'classification',
-                'device': 'CPU',
-            },
-            {
-                'framework': 'caffe',
-                'model': '/absolute_path2',
-                'weights': '/absolute_path2',
-                'adapter': 'classification',
-                'device': 'CPU'
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        self.arguments.target_devices = ['FPGA', 'MYRIAD']
-
-        with pytest.warns(Warning):
-            config = ConfigReader.merge(self.arguments)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 0
-
-    def test_both_launchers_are_not_filtered_by_same_devices(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU'
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.converted_models = None
-        args.target_devices = ['GPU', 'CPU']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert launchers == config_launchers
-
-    def test_launcher_is_not_filtered_by_device_with_tail(self, mocker):
-        config_launchers = [
-            {
-                'framework': 'dlsdk',
-                'model': Path('/absolute_path1'),
-                'weights': Path('/absolute_path1'),
-                'adapter': 'classification',
-                'device': 'CPU',
-                '_model_optimizer': self.arguments.model_optimizer,
-                '_models_prefix': self.arguments.models
-            },
-            {
-                'framework': 'caffe',
-                'model': Path('/absolute_path2'),
-                'weights': Path('/absolute_path2'),
-                'adapter': 'classification',
-                'device': 'GPU'
-            }
-        ]
-        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
-        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
-        args = copy.deepcopy(self.arguments)
-        args.converted_models = None
-        args.target_devices = ['CPU', 'GPU_unexpected_tail']
-
-        config = ConfigReader.merge(args)[0]
-
-        launchers = config['models'][0]['launchers']
-        assert len(launchers) == 1
-        assert launchers[0] == config_launchers[0]
-
-    def get_global_launcher(self, framework):
-        for launcher in self.global_launchers:
-            if launcher['framework'] == framework:
-                return launcher
-
-        raise ValueError('Undefined global launcher with framework = "{}"'.format(framework))
-
-    def get_global_dataset(self, name):
-        for dataset in self.global_datasets:
-            if dataset['name'] == name:
-                return dataset
-
-        raise ValueError('Undefined global dataset with name = "{}"'.format(name))
diff --git a/tools/accuracy_checker/tests/test_config_validator.py b/tools/accuracy_checker/tests/test_config_validator.py
deleted file mode 100644 (file)
index eae6576..0000000
+++ /dev/null
@@ -1,385 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from math import inf, nan
-from pathlib import Path
-from unittest.mock import ANY
-
-import pytest
-from accuracy_checker.config.config_validator import (
-    ConfigError,
-    ConfigValidator,
-    DictField,
-    ListField,
-    NumberField,
-    PathField,
-    StringField
-)
-from tests.common import mock_filesystem
-
-
-class TestStringField:
-    def test_expects_string(self):
-        string_field = StringField()
-
-        with pytest.raises(ConfigError):
-            string_field.validate(b"foo")
-        with pytest.raises(ConfigError):
-            string_field.validate({})
-        with pytest.raises(ConfigError):
-            string_field.validate(42)
-
-        string_field.validate("foo")
-
-    def test_choices(self):
-        string_field = StringField(choices=['foo', 'bar'])
-
-        with pytest.raises(ConfigError):
-            string_field.validate('baz')
-
-        string_field.validate('bar')
-
-    def test_case_sensitive(self):
-        string_field = StringField(choices=['foo', 'bar'], case_sensitive=False)
-
-        string_field.validate('foo')
-        string_field.validate('FOO')
-
-        string_field = StringField(choices=['foo', 'bar'], case_sensitive=True)
-
-        string_field.validate('foo')
-        with pytest.raises(ConfigError):
-            string_field.validate('FOO')
-
-    def test_regex(self):
-        string_field = StringField(regex=r'foo\d*')
-
-        string_field.validate('foo')
-        string_field.validate('foo42')
-
-        with pytest.raises(ConfigError):
-            string_field.validate('baz')
-
-    def test_custom_exception(self, mocker):
-        stub = mocker.stub(name='custom_on_error')
-        string_field = StringField(choices=['foo'], on_error=stub)
-
-        with pytest.raises(ConfigError):
-            string_field.validate('bar', 'foo')
-        stub.assert_called_once_with('bar', 'foo', ANY)
-
-    def test_custom_validator(self, mocker):
-        stub = mocker.stub(name='custom_validator')
-        string_field = StringField(choices=['foo'], additional_validator=stub)
-
-        string_field.validate('foo', 'baz')
-        stub.assert_called_once_with('foo', 'baz')
-
-
-class TestNumberField:
-    def test_expects_number(self):
-        number_field = NumberField(floats=True)
-
-        number_field.validate(1.0)
-        with pytest.raises(ConfigError):
-            number_field.validate("foo")
-        with pytest.raises(ConfigError):
-            number_field.validate({})
-        with pytest.raises(ConfigError):
-            number_field.validate([])
-
-        number_field = NumberField(floats=False)
-        number_field.validate(1)
-        with pytest.raises(ConfigError):
-            number_field.validate(1.0)
-
-    def test_nans(self):
-        number_field = NumberField(allow_nan=True)
-        number_field.validate(nan)
-
-        number_field = NumberField(allow_nan=False)
-        with pytest.raises(ConfigError):
-            number_field.validate(nan)
-
-    def test_infinity(self):
-        number_field = NumberField(allow_inf=True)
-        number_field.validate(inf)
-
-        number_field = NumberField(allow_inf=False)
-        with pytest.raises(ConfigError):
-            number_field.validate(inf)
-
-    def test_ranges(self):
-        number_field = NumberField(min_value=0, max_value=5)
-
-        number_field.validate(0)
-        number_field.validate(1)
-        number_field.validate(2)
-
-        with pytest.raises(ConfigError):
-            number_field.validate(-1)
-        with pytest.raises(ConfigError):
-            number_field.validate(7)
-
-
-class TestDictField:
-    def test_expects_dict(self):
-        dict_field = DictField()
-
-        dict_field.validate({})
-        with pytest.raises(ConfigError):
-            dict_field.validate("foo")
-        with pytest.raises(ConfigError):
-            dict_field.validate(42)
-        with pytest.raises(ConfigError):
-            dict_field.validate([])
-
-    def test_validates_keys(self):
-        dict_field = DictField()
-        dict_field.validate({'foo': 42, 1: 'bar'})
-
-        dict_field = DictField(key_type=str)
-        dict_field.validate({'foo': 42, 'bar': 'bar'})
-        with pytest.raises(ConfigError):
-            dict_field.validate({'foo': 42, 1: 'bar'})
-
-        dict_field = DictField(key_type=StringField(choices=['foo', 'bar']))
-        dict_field.validate({'foo': 42, 'bar': 42})
-        with pytest.raises(ConfigError):
-            dict_field.validate({'foo': 42, 1: 'bar'})
-        with pytest.raises(ConfigError):
-            dict_field.validate({'foo': 42, 'baz': 42})
-
-    def test_validates_values(self):
-        dict_field = DictField()
-        dict_field.validate({'foo': 42, 1: 'bar'})
-
-        dict_field = DictField(value_type=str)
-        dict_field.validate({'foo': 'foo', 1: 'bar'})
-        with pytest.raises(ConfigError):
-            dict_field.validate({'foo': 42, 1: 2})
-
-        dict_field = DictField(value_type=StringField(choices=['foo', 'bar']))
-        dict_field.validate({1: 'foo', 'bar': 'bar'})
-        with pytest.raises(ConfigError):
-            dict_field.validate({1: 'foo', 2: 3})
-        with pytest.raises(ConfigError):
-            dict_field.validate({1: 'foo', 2: 'baz'})
-
-    def test_converts_basic_types(self):
-        dict_field = DictField(value_type=str)
-        assert isinstance(dict_field.value_type, StringField)
-
-        dict_field = DictField(value_type=int)
-        assert isinstance(dict_field.value_type, NumberField)
-        assert dict_field.value_type.floats is False
-
-        dict_field = DictField(value_type=float)
-        assert isinstance(dict_field.value_type, NumberField)
-        assert dict_field.value_type.floats is True
-
-        dict_field = DictField(value_type=list)
-        assert isinstance(dict_field.value_type, ListField)
-
-        dict_field = DictField(value_type=dict)
-        assert isinstance(dict_field.value_type, DictField)
-
-        dict_field = DictField(value_type=Path)
-        assert isinstance(dict_field.value_type, PathField)
-
-    def test_empty(self):
-        dict_field = DictField()
-        dict_field.validate({})
-
-        dict_field = DictField(allow_empty=False)
-        with pytest.raises(ConfigError):
-            dict_field.validate({})
-
-
-class TestListField:
-    def test_expects_list(self):
-        list_field = ListField()
-
-        list_field.validate([])
-        with pytest.raises(ConfigError):
-            list_field.validate("foo")
-        with pytest.raises(ConfigError):
-            list_field.validate(42)
-        with pytest.raises(ConfigError):
-            list_field.validate({})
-
-    def test_validates_values(self):
-        list_field = ListField()
-        list_field.validate(['foo', 42])
-
-        list_field = ListField(value_type=str)
-        list_field.validate(['foo', 'bar'])
-        with pytest.raises(ConfigError):
-            list_field.validate(['foo', 42])
-
-        list_field = ListField(value_type=StringField(choices=['foo', 'bar']))
-        list_field.validate(['foo', 'bar'])
-        with pytest.raises(ConfigError):
-            list_field.validate(['foo', 42])
-        with pytest.raises(ConfigError):
-            list_field.validate(['foo', 'bar', 'baz'])
-
-    def test_empty(self):
-        list_field = ListField()
-        list_field.validate([])
-
-        list_field = ListField(allow_empty=False)
-        with pytest.raises(ConfigError):
-            list_field.validate([])
-
-
-class TestPathField:
-    @pytest.mark.usefixtures('mock_path_exists')
-    def test_expects_path_like(self):
-        path_field = PathField()
-        path_field.validate('foo/bar')
-        path_field.validate('/home/user')
-        path_field.validate(Path('foo/bar'))
-
-        with pytest.raises(ConfigError):
-            path_field.validate(42)
-        with pytest.raises(ConfigError):
-            path_field.validate({})
-        with pytest.raises(ConfigError):
-            path_field.validate([])
-
-    def test_path_is_checked(self):
-        with mock_filesystem(['foo/bar']) as prefix:
-            prefix_path = Path(prefix)
-            file_field = PathField(is_directory=False)
-            with pytest.raises(ConfigError):
-                file_field.validate(prefix_path / 'foo')
-            file_field.validate(prefix_path / 'foo' / 'bar')
-
-            dir_field = PathField(is_directory=True)
-            dir_field.validate(prefix_path / 'foo')
-
-            with pytest.raises(ConfigError):
-                dir_field.validate(prefix_path / 'foo' / 'bar')
-
-    def test_path_not_checked(self):
-        with mock_filesystem(['foo/bar']) as prefix:
-            prefix_path = Path(prefix)
-            file_field = PathField(is_directory=False, check_exists=False)
-            file_field.validate(prefix_path / 'foo' / 'bar')
-
-
-class TestConfigValidator:
-    def test_compound(self):
-        class SampleValidator(ConfigValidator):
-            foo = StringField(choices=['foo'])
-            bar = NumberField()
-
-        sample_validator = SampleValidator('Sample')
-        sample_validator.validate({'foo': 'foo', 'bar': 1})
-
-        with pytest.raises(ConfigError):
-            sample_validator.validate({'foo': 'foo'})
-        with pytest.raises(ConfigError):
-            sample_validator.validate({'foo': 'bar', 'bar': 1})
-
-    def test_optional_fields(self):
-        class SampleValidatorNoOptionals(ConfigValidator):
-            foo = StringField(choices=['foo'])
-            bar = NumberField(optional=False)
-
-        sample_validator = SampleValidatorNoOptionals('Sample')
-        sample_validator.validate({'foo': 'foo', 'bar': 1})
-        with pytest.raises(ConfigError):
-            sample_validator.validate({'foo': 'bar'})
-
-        class SampleValidatorWithOptionals(ConfigValidator):
-            foo = StringField(choices=['foo'])
-            bar = NumberField(optional=True)
-
-        sample_validator = SampleValidatorWithOptionals('Sample')
-        sample_validator.validate({'foo': 'foo', 'bar': 1})
-        sample_validator.validate({'foo': 'foo'})
-
-    def test_extra_fields__warn_on_extra(self):
-        class SampleValidatorWarnOnExtra(ConfigValidator):
-            foo = StringField(choices=['foo'])
-
-        sample_validator = SampleValidatorWarnOnExtra(
-            'Sample', on_extra_argument=ConfigValidator.WARN_ON_EXTRA_ARGUMENT
-        )
-
-        with pytest.warns(UserWarning):
-            sample_validator.validate({'foo': 'foo', 'bar': 'bar'})
-
-    def test_extra_fields__error_on_extra(self):
-        class SampleValidatorErrorOnExtra(ConfigValidator):
-            foo = StringField(choices=['foo'])
-
-        sample_validator = SampleValidatorErrorOnExtra(
-            'Sample', on_extra_argument=ConfigValidator.ERROR_ON_EXTRA_ARGUMENT)
-
-        with pytest.raises(ConfigError):
-            sample_validator.validate({'foo': 'bar', 'bar': 'bar'})
-
-    def test_extra_fields__ignore_extra(self):
-        class SampleValidatorIgnoresExtra(ConfigValidator):
-            foo = StringField(choices=['foo'])
-
-        sample_validator = SampleValidatorIgnoresExtra(
-            'Sample', on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT)
-
-        sample_validator.validate({'foo': 'foo', 'bar': 'bar'})
-
-    def test_custom_exception(self, mocker):
-        class SampleValidator(ConfigValidator):
-            foo = StringField(choices=['foo'])
-
-        stub = mocker.stub(name='custom_on_error')
-        sample_validator = SampleValidator('Sample', on_error=stub)
-        sample_validator.validate({})
-        stub.assert_called_once_with(ANY, 'Sample', ANY)
-
-    def test_custom_validator(self, mocker):
-        class SampleValidator(ConfigValidator):
-            foo = StringField(choices=['foo'])
-
-        stub = mocker.stub(name='custom_validator')
-        sample_validator = SampleValidator('Sample', additional_validator=stub)
-        entry = {'foo': 'foo'}
-        sample_validator.validate(entry)
-        stub.assert_called_once_with(entry, 'Sample')
-
-    def test_nested(self):
-        class InnerValidator(ConfigValidator):
-            foo = StringField(choices=['foo'])
-
-        class OuterValidator(ConfigValidator):
-            bar = ListField(InnerValidator('Inner'))
-
-        outer_validator = OuterValidator('Outer', on_extra_argument=ConfigValidator.ERROR_ON_EXTRA_ARGUMENT)
-
-        outer_validator.validate({'bar': [{'foo': 'foo'}, {'foo': 'foo'}]})
-
-    def test_inheritance(self):
-        class ParentValidator(ConfigValidator):
-            foo = StringField(choices=['foo'])
-
-        class DerivedValidator(ParentValidator):
-            bar = StringField(choices=['bar'])
-
-        derived_validator = DerivedValidator('Derived', on_extra_argument=ConfigValidator.ERROR_ON_EXTRA_ARGUMENT)
-        derived_validator.validate({'foo': 'foo', 'bar': 'bar'})
diff --git a/tools/accuracy_checker/tests/test_dataset.py b/tools/accuracy_checker/tests/test_dataset.py
deleted file mode 100644 (file)
index 4d8d15f..0000000
+++ /dev/null
@@ -1,220 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import copy
-from pathlib import Path
-import pytest
-from .common import make_representation
-from accuracy_checker.config import ConfigError
-
-from accuracy_checker.dataset import Dataset
-
-def copy_dataset_config(config):
-    new_config = copy.deepcopy(config)
-
-    return new_config
-
-class MockPreprocessor:
-    @staticmethod
-    def process(images):
-        return images
-
-
-class TestDataset:
-    dataset_config = {
-            'name': 'custom',
-            'annotation': 'custom',
-            'data_source': 'custom',
-            'metrics': [{'type': 'map'}]
-        }
-
-    def test_missed_name_raises_config_error_exception(self):
-        local_dataset = copy_dataset_config(self.dataset_config)
-        local_dataset.pop('name')
-
-        with pytest.raises(ConfigError):
-            Dataset(local_dataset)
-
-    def test_setting_custom_dataset_with_missed_annotation_raises_config_error_exception(self):
-        local_dataset = copy_dataset_config(self.dataset_config)
-        local_dataset.pop('annotation')
-        with pytest.raises(ConfigError):
-            Dataset(local_dataset)
-
-
-@pytest.mark.usefixtures('mock_path_exists')
-class TestAnnotationConversion:
-    dataset_config = {
-        'name': 'custom',
-        'data_source': 'custom',
-        'metrics': [{'type': 'map'}]
-    }
-
-    def test_annotation_conversion_unknown_converter_raise_config_error(self):
-        addition_options = {'annotation_conversion': {'converter': 'unknown'}}
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        with pytest.raises(ValueError):
-            Dataset(config)
-
-    def test_annotation_conversion_converter_without_required_options_raise_config_error(self):
-        addition_options = {'annotation_conversion': {'converter': 'wider'}}
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        with pytest.raises(ConfigError):
-            Dataset(config)
-
-    def test_annotation_conversion_raise_config_error_on_extra_args(self):
-        addition_options = {'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file', 'something_extra': 'extra'}}
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        with pytest.raises(ConfigError):
-            Dataset(config)
-
-    def test_sucessful_annotation_conversion(self, mocker):
-        addition_options = {'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')}}
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        annotation_converter_mock = mocker.patch(
-            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
-            return_value=(make_representation("0 0 0 5 5", True), None)
-        )
-        Dataset(config)
-        annotation_converter_mock.assert_called_once_with()
-
-    def test_annotation_conversion_not_convert_twice(self, mocker):
-        addition_options = {
-            'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
-            'annotation': Path('custom')
-        }
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        converted_annotation = make_representation('0 0 0 5 5', True)
-        annotation_reader_mock = mocker.patch(
-            'accuracy_checker.dataset.read_annotation',
-            return_value=(converted_annotation, None)
-        )
-        Dataset(config)
-
-        annotation_reader_mock.assert_called_once_with(Path('custom'))
-
-    def test_annotation_conversion_with_store_annotation(self, mocker):
-        addition_options = {
-            'annotation_conversion': {'converter': 'wider', 'annotation_file':'file'},
-            'annotation': Path('custom')
-        }
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        converted_annotation = make_representation('0 0 0 5 5', True)
-        mocker.patch(
-            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
-            return_value=(converted_annotation, None)
-        )
-        mocker.patch('pathlib.Path.exists', return_value=False)
-        annotation_saver_mock = mocker.patch(
-            'accuracy_checker.dataset.save_annotation'
-        )
-        Dataset(config)
-
-        annotation_saver_mock.assert_called_once_with(converted_annotation, None, Path('custom'), None)
-
-    def test_annotation_conversion_subset_size(self, mocker):
-        addition_options = {
-            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
-            'subsample_size': 1
-        }
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
-        mocker.patch(
-            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
-            return_value=(converted_annotation, None)
-        )
-        dataset = Dataset(config)
-        assert dataset.annotation == [converted_annotation[1]]
-
-    def test_annotation_conversion_subset_ratio(self, mocker):
-        addition_options = {
-            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
-            'subsample_size': '50%'
-        }
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
-        mocker.patch(
-            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
-            return_value=(converted_annotation, None)
-        )
-        subset_maker_mock = mocker.patch(
-            'accuracy_checker.dataset.make_subset'
-        )
-        Dataset(config)
-        subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666)
-
-    def test_annoation_conversion_subset_more_than_dataset_size(self, mocker):
-        addition_options = {
-            'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
-            'subsample_size': 3,
-            'subsample_seed': 1
-        }
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
-        mocker.patch(
-            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
-            return_value=(converted_annotation, None)
-        )
-        with pytest.warns(UserWarning):
-            dataset = Dataset(config)
-            annotation = dataset.annotation
-            assert annotation == converted_annotation
-
-    def test_annotation_conversion_subset_with_seed(self, mocker):
-        addition_options = {
-            'annotation_conversion': {'converter': 'wider', 'annotation_file': Path('file')},
-            'subsample_size': 1,
-            'subsample_seed': 1
-        }
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
-        mocker.patch(
-            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
-            return_value=(converted_annotation, None)
-        )
-        dataset = Dataset(config)
-        annotation = dataset.annotation
-        assert annotation == [converted_annotation[0]]
-
-    def test_annotation_conversion_save_subset(self, mocker):
-        addition_options = {
-            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
-            'annotation': Path('custom'),
-            'subsample_size': 1,
-        }
-        config = copy_dataset_config(self.dataset_config)
-        config.update(addition_options)
-        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
-        mocker.patch(
-            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
-            return_value=(converted_annotation, None)
-        )
-        annotation_saver_mock = mocker.patch(
-            'accuracy_checker.dataset.save_annotation'
-        )
-        mocker.patch('pathlib.Path.exists', return_value=False)
-        Dataset(config)
-        annotation_saver_mock.assert_called_once_with([converted_annotation[1]], None, Path('custom'), None)
-
diff --git a/tools/accuracy_checker/tests/test_dependency.py b/tools/accuracy_checker/tests/test_dependency.py
deleted file mode 100644 (file)
index 0f98842..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from accuracy_checker.dependency import ClassProvider, get_opts
-
-
-def test_get_opts_positional_and_kwargs():
-    opts = {'o': ((1,), {'a': 1})}
-    args, kwargs = get_opts(opts['o'])
-
-    assert args == (1,)
-    assert kwargs == {'a': 1}
-
-
-def test_get_opts_kwargs_only():
-    opts = {'o': {'a': 1}}
-    args, kwargs = get_opts(opts['o'])
-
-    assert args == ()
-    assert kwargs == {'a': 1}
-
-
-def test_get_opts_positional_only():
-    opts = {'o': (1, 2, 3)}
-    args, kwargs = get_opts(opts['o'])
-
-    assert args == (1, 2, 3)
-    assert kwargs == {}
-
-
-def test_class_provider():
-    class BaseService(ClassProvider):
-        __provider_type__ = 'Service'
-
-    class ServiceA(BaseService):
-        __provider__ = 'service_a'
-
-    class ServiceB(BaseService):
-        __provider__ = 'service_b'
-
-    assert issubclass(ServiceA, BaseService)
-    assert issubclass(ServiceB, BaseService)
-
-    assert 'service_a' in BaseService.providers
-    assert 'service_b' in BaseService.providers
-
-
-def test_provide():
-    class BaseService(ClassProvider):
-        __provider_type__ = 'service'
-
-        def __init__(self):
-            pass
-
-    class ServiceA(BaseService):
-        __provider__ = 'service_a'
-
-    provided = BaseService.provide('service_a')
-
-    assert isinstance(provided, ServiceA)
-
-
-def test_provide_with_args():
-    class BaseService(ClassProvider):
-        __provider_type__ = 'service'
-
-        def __init__(self, bar):
-            self.bar = bar
-
-    class ServiceA(BaseService):
-        __provider__ = 'service_a'
-
-    provided = BaseService.provide('service_a', bar=42)
-
-    assert isinstance(provided, ServiceA)
-    assert provided.bar == 42
diff --git a/tools/accuracy_checker/tests/test_detection_metrics.py b/tools/accuracy_checker/tests/test_detection_metrics.py
deleted file mode 100644 (file)
index def1354..0000000
+++ /dev/null
@@ -1,459 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import pytest
-import numpy as np
-from accuracy_checker.metrics import DetectionMAP
-from accuracy_checker.metrics.detection import Recall, bbox_match
-from accuracy_checker.metrics.overlap import IOU, IOA
-from tests.common import (make_representation, single_class_dataset, multi_class_dataset,
-                          multi_class_dataset_without_background)
-
-
-def _test_metric_wrapper(metric_cls, dataset, **kwargs):
-    provider = metric_cls.__provider__
-    config = {'type': provider, 'name': provider}
-    config.update(**kwargs)
-    return metric_cls(config, dataset, provider)
-
-
-class TestBoxMatch:
-    def test_single(self):
-        gt = "0 0 0 5 5"
-        pred = "0 0 0 5 5"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 1
-        assert fp[0] == 0
-
-    def test_single_with_ignored_tp(self):
-        gt = "0 0 0 5 5"
-        pred = "0 0 0 5 5"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        pred[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 0
-        assert fp[0] == 0
-
-    def test_single_with_use_filtered_tp(self):
-        gt = "0 0 0 5 5"
-        pred = "0 0 0 5 5"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        pred[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator, use_filtered_tp=True)
-        assert tp[0] == 1
-        assert fp[0] == 0
-
-    def test_single_non_overlap(self):
-        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
-        pred = make_representation("0 0 0 5 5", score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 0
-        assert fp[0] == 1
-
-    def test_single_non_overlap_ignored(self):
-        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
-        pred = make_representation("0 0 0 5 5", score=1)
-        pred[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 0
-        assert fp[0] == 0
-
-    def test_multiple(self):
-        gt = make_representation("0 0 0 5 5; 0 7 7 8 8", is_ground_truth=True)
-        pred = make_representation("0 0 0 5 5; 0 7 7 8 8", score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 1
-        assert tp[1] == 1
-        assert fp[0] == 0
-        assert fp[0] == 0
-
-    def test_multiple_2(self):
-        gt = make_representation("0 0 0 5 5; 0 9 9 10 10", is_ground_truth=True)
-        pred = make_representation("1 0 0 0 5 5; 0.8 0 7 7 8 8")
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 1
-        assert tp[1] == 0
-        assert fp[0] == 0
-        assert fp[1] == 1
-
-    def test_multi_label(self):
-        gt = make_representation("1 0 0 5 5; 0 9 9 10 10", is_ground_truth=True)
-        pred = make_representation("1 1 0 0 5 5; 0.8 0 7 7 8 8")
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 1, overlap_evaluator)
-        assert tp.shape[0] == 1
-        assert tp[0] == 1
-        assert fp[0] == 0
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp.shape[0] == 1
-        assert tp[0] == 0
-        assert fp[0] == 1
-
-    def test_multi_image(self):
-        gt = make_representation(["0 0 0 5 5", "0 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5", "0 0 0 5 5"], score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 1
-        assert tp[1] == 1
-        assert fp[0] == 0
-        assert fp[1] == 0
-
-    def test_false_negative(self):
-        gt = make_representation("0 0 0 5 5; 0 1 1 6 6", is_ground_truth=True)
-        pred = make_representation("0 0 0 5 5", score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, ngt = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 1
-        assert tp.shape[0] == 1
-        assert ngt == 2
-
-    def test_multiple_detections(self):
-        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
-        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 1
-        assert tp[1] == 0
-
-    def test_no_annotations(self):
-        gt = "1 0 0 5 5"
-        pred = "0 0 0 5 5"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert tp[0] == 0
-        assert fp[0] == 1
-
-    def test_no_predictions(self):
-        gt = "0 0 0 5 5"
-        pred = "1 0 0 5 5"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert n == 1
-        assert len(tp) == 0
-        assert len(fp) == 0
-
-    def test_iou_empty_prediction_box(self):
-        gt = "0 0 0 5 5"
-        pred = "0 0 0 0 0"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        overlap_evaluator = IOU({})
-
-        with pytest.warns(None) as warnings:
-            tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
-            assert len(warnings) == 0
-            assert n == 1
-            assert tp[0] == 0
-            assert fp[0] == 1
-
-    def test_ioa_empty_prediction_box(self):
-        gt = "0 0 0 5 5"
-        pred = "0 0 0 0 0"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        overlap_evaluator = IOA({})
-
-        with pytest.warns(None) as warnings:
-            tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
-            assert len(warnings) == 0
-            assert n == 1
-            assert tp[0] == 0
-            assert fp[0] == 1
-
-    def test_iou_zero_union(self):
-        gt = "0 0 0 0 0"
-        pred = "0 0 0 0 0"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        overlap_evaluator = IOA({})
-
-        with pytest.warns(None) as warnings:
-            tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
-            assert len(warnings) == 0
-            assert n == 1
-            assert tp[0] == 0
-            assert fp[0] == 1
-
-    def test_single_difficult(self):
-        gt = "0 0 0 5 5"
-        pred = "0 0 0 5 5"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        gt[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=True)
-        assert n == 0
-        assert tp[0] == 0
-        assert fp[0] == 0
-
-    def test_single_with_not_ignore_difficult(self):
-        gt = "0 0 0 5 5"
-        pred = "0 0 0 5 5"
-
-        gt = make_representation(gt, is_ground_truth=True)
-        pred = make_representation(pred, score=1)
-        gt[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=False)
-        assert n == 1
-        assert tp[0] == 1
-        assert fp[0] == 0
-
-    def test_single_difficult_non_overlap(self):
-        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
-        gt[0].metadata['difficult_boxes'] = [0]
-        pred = make_representation("0 0 0 5 5", score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
-        assert n == 0
-        assert tp[0] == 0
-        assert fp[0] == 1
-
-    def test_single_difficult_non_overlap_not_ignore_difficult(self):
-        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
-        gt[0].metadata['difficult_boxes'] = [0]
-        pred = make_representation("0 0 0 5 5", score=1)
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=False)
-        assert n == 1
-        assert tp[0] == 0
-        assert fp[0] == 1
-
-    def test_multiple_detections_with_ignore_difficult(self):
-        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
-        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
-        gt[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=True)
-        assert n == 0
-        assert tp[0] == 0
-        assert tp[1] == 0
-        assert fp[0] == 0
-        assert fp[1] == 0
-
-    def test_multiple_detections_with_not_ignore_difficult(self):
-        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
-        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
-        gt[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=False)
-        assert n == 1
-        assert tp[0] == 1
-        assert tp[1] == 0
-        assert fp[0] == 0
-        assert fp[1] == 1
-
-    def test_multiple_detections_with_ignore_difficult_and_not_allow_multiple_matches_per_ignored(self):
-        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
-        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
-        gt[0].metadata['difficult_boxes'] = [0]
-        overlap_evaluator = IOU({})
-
-        tp, fp, _, n = bbox_match(
-            gt, pred, 0, overlap_evaluator,
-            ignore_difficult=True, allow_multiple_matches_per_ignored=False
-        )
-
-        assert n == 0
-        assert tp[0] == 0
-        assert tp[1] == 0
-        assert fp[0] == 0
-        assert fp[1] == 1
-
-
-class TestRecall:
-    def test_one_object(self):
-        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5"], score=1)
-        metric = _test_metric_wrapper(Recall, single_class_dataset())
-        assert 1 == metric(gt, pred)[0]
-        assert metric.meta.get('names') == ['dog']
-
-    def test_two_objects(self):
-        gt = make_representation(["0 0 0 5 5; 0 10 10 20 20"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 0 10 10 20 20"], score=1)
-        assert 1 == _test_metric_wrapper(Recall, single_class_dataset())(gt, pred)[0]
-
-    def test_false_positive(self):
-        gt2 = make_representation(["0 10 10 20 20"], is_ground_truth=True)
-        pred2 = make_representation(["0 0 0 5 5"], score=1)
-        metric = _test_metric_wrapper(Recall, single_class_dataset())
-        assert 0 == metric(gt2, pred2)[0]
-        assert metric.meta.get('names') == ['dog']
-
-        gt1 = make_representation(["0 0 0 5 5"], is_ground_truth=True)
-        pred1 = make_representation(["0 0 0 5 5; 0 10 10 20 20"], score=1)
-        assert 1 == metric(gt1, pred1)[0]
-        assert metric.meta.get('names') == ['dog']
-
-    def test_false_negative(self):
-        gt = make_representation(["0 10 10 20 20; 0 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5"], score=1)
-        metric = _test_metric_wrapper(Recall, single_class_dataset())
-        assert 0.5 == metric(gt, pred)[0]
-        assert metric.meta.get('names') == ['dog']
-
-    def test_duplicate_detections(self):
-        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 0 0 0 5 5"], score=1)
-
-        metric = _test_metric_wrapper(Recall, single_class_dataset())
-        assert 1 == metric(gt, pred)[0]
-        assert metric.meta.get('names') == ['dog']
-
-    def test_no_warnings_in_recall_calculation(self):
-        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
-
-        with pytest.warns(None) as warnings:
-            _test_metric_wrapper(Recall, multi_class_dataset())(gt, pred)
-        assert len(warnings) == 0
-
-    def test_on_dataset_without_background(self):
-        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
-
-        with pytest.warns(None) as warnings:
-            _test_metric_wrapper(Recall, multi_class_dataset_without_background())(gt, pred)
-        assert len(warnings) == 0
-
-    def test_not_gt_boxes_for_matching(self):
-        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["1 0 0 5 5"], score=1)
-
-        metric = _test_metric_wrapper(Recall, multi_class_dataset_without_background())
-        assert 0 == metric(gt, pred)[0]
-        assert metric.meta.get('names') == ['cat']
-
-
-class TestMAP:
-    def test_selects_all_detections(self):
-        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 0 0 0 5 5"], score=1)
-
-        metric = _test_metric_wrapper(DetectionMAP, single_class_dataset())
-        metric(gt, pred)
-
-        assert not metric.distinct_conf
-        assert metric.overlap_threshold == 0.5
-        assert metric.ignore_difficult
-        assert metric.meta.get('names') == ['dog']
-
-    def test_no_warnings_in_map_calculation(self):
-        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
-
-        with pytest.warns(None) as warnings:
-            _test_metric_wrapper(DetectionMAP, multi_class_dataset())(gt, pred)
-        assert len(warnings) == 0
-
-    def test_perfect_detection(self):
-        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
-
-        metric = _test_metric_wrapper(DetectionMAP, multi_class_dataset())
-        assert metric(gt, pred) == [1.0, 1.0]
-        assert metric.meta.get('names') == ['dog', 'cat']
-
-    def test_one_false_alarm(self):
-        gt = make_representation(["0 0 0 5 5", "1 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["1 10 10 20 20; 0 0 0 5 5", "1 0 0 5 5"], score=1)
-        metric = _test_metric_wrapper(DetectionMAP, multi_class_dataset())
-        values = metric(gt, pred)
-        assert values == [1.0, 0.5]
-        map_ = np.mean(values)
-        assert 0.75 == map_
-        assert metric.meta.get('names') == ['dog', 'cat']
-
-    def test_zero_detection(self):
-        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20"], is_ground_truth=True)
-        pred = make_representation(["0 30 30 40 40"], score=1)
-
-        metric = _test_metric_wrapper(DetectionMAP, multi_class_dataset())
-        assert metric(gt, pred) == [0.0]
-        assert metric.meta.get('names') == ['dog']
-
-    def test_no_detections_warn_user_warning(self):
-        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20"], is_ground_truth=True)
-        pred = make_representation("", score=1)
-        with pytest.warns(UserWarning) as warnings:
-            map_ = _test_metric_wrapper(DetectionMAP, multi_class_dataset())(gt, pred)[0]
-            assert len(warnings) == 1
-
-            assert map_ == 0
-
-    def test_detection_on_dataset_without_background(self):
-        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
-
-        with pytest.warns(None) as warnings:
-            map_ = _test_metric_wrapper(DetectionMAP, multi_class_dataset_without_background())(gt, pred)
-            mean = np.mean(map_)
-            assert 1.0 == mean
-        assert len(warnings) == 0
-
-    def test_not_gt_boxes_for_box_matching(self):
-        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
-        pred = make_representation(["1 0 0 5 5"], score=1)
-
-        metric = _test_metric_wrapper(Recall, multi_class_dataset_without_background())
-        assert 0 == metric(gt, pred)[0]
-        assert metric.meta.get('names') == ['cat']
diff --git a/tools/accuracy_checker/tests/test_dlsdk_launcher.py b/tools/accuracy_checker/tests/test_dlsdk_launcher.py
deleted file mode 100644 (file)
index 3e772fe..0000000
+++ /dev/null
@@ -1,1121 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import subprocess
-
-import pytest
-
-pytest.importorskip('accuracy_checker.launcher.dlsdk_launcher')
-import os
-import cv2
-import numpy as np
-
-from pathlib import Path
-from unittest.mock import PropertyMock
-from accuracy_checker.config import ConfigError
-from accuracy_checker.launcher import DLSDKLauncher
-from accuracy_checker.launcher.dlsdk_launcher import DLSDKLauncherConfig
-from accuracy_checker.launcher.launcher import create_launcher
-from accuracy_checker.launcher.model_conversion import FrameworkParameters
-from tests.common import update_dict
-from accuracy_checker.data_readers import DataRepresentation
-from accuracy_checker.utils import contains_all
-
-
-@pytest.fixture()
-def mock_inference_engine(mocker):
-    try:
-        mocker.patch('openvino.inference_engine.IEPlugin')
-        mocker.patch('openvino.inference_engine.IENetwork')
-    except ImportError:
-        mocker.patch('inference_engine.IEPlugin')
-        mocker.patch('inference_engine.IENetwork')
-
-
-@pytest.fixture()
-def mock_inputs(mocker):
-    mocker.patch(
-        'accuracy_checker.launcher.input_feeder.InputFeeder._parse_inputs_config', return_value=({}, ['data'], None)
-    )
-
-
-def get_dlsdk_test_model(models_dir, config_update=None):
-    config = {
-        'framework': 'dlsdk',
-        'weights': str(models_dir / 'SampLeNet.bin'),
-        'model': str(models_dir / 'SampLeNet.xml'),
-        'device': 'CPU',
-        'adapter': 'classification',
-        '_models_prefix': str(models_dir)
-    }
-    if config_update:
-        config.update(config_update)
-
-    return create_launcher(config)
-
-
-def get_image(image_path, input_shape):
-    _, h, w = input_shape
-    img_raw = cv2.imread(str(image_path))
-
-    return DataRepresentation(cv2.resize(img_raw, (w, h)))
-
-
-class TestDLSDKLauncherInfer:
-    def test_infer(self, data_dir, models_dir):
-        dlsdk_test_model = get_dlsdk_test_model(models_dir)
-        image = get_image(data_dir / '1.jpg', dlsdk_test_model.inputs['data'])
-        input_blob = np.transpose([image.data], (0, 3, 1, 2))
-        result = dlsdk_test_model.predict([{'data': input_blob.astype(np.float32)}], [image.metadata])
-        assert dlsdk_test_model.output_blob == 'fc3'
-
-        assert np.argmax(result[0][dlsdk_test_model.output_blob]) == 6
-        assert image.metadata['input_shape'] == {'data': [3, 32, 32]}
-
-    def test_launcher_creates(self, models_dir):
-        assert get_dlsdk_test_model(models_dir).inputs['data'] == [3, 32, 32]
-
-    def test_infer_with_additional_outputs(self, data_dir, models_dir):
-        dlsdk_test_model = get_dlsdk_test_model(models_dir, {'outputs': ['fc1', 'fc2']})
-        outputs = list(dlsdk_test_model.network.outputs.keys())
-
-        assert contains_all(outputs, ['fc1', 'fc2', 'fc3'])
-        assert dlsdk_test_model.output_blob == 'fc3'
-
-    def test_dlsd_launcher_set_batch_size(self, models_dir):
-        dlsdk_test_model = get_dlsdk_test_model(models_dir, {'batch': 2})
-        assert dlsdk_test_model.batch == 2
-
-
-@pytest.mark.usefixtures('mock_path_exists')
-class TestDLSDKLauncherAffinity:
-    def test_dlsdk_launcher_valid_affinity_map(self, mocker, models_dir):
-        affinity_map = {'conv1': 'GPU'}
-
-        mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.read_yaml', return_value=affinity_map
-        )
-
-        dlsdk_test_model = get_dlsdk_test_model(models_dir, {'device' : 'HETERO:CPU,GPU', 'affinity_map' : './affinity_map.yml'})
-        layers = dlsdk_test_model.network.layers
-        for key, value in affinity_map.items():
-            assert layers[key].affinity == value
-
-    def test_dlsdk_launcher_affinity_map_invalid_device(self, mocker, models_dir):
-        affinity_map = {'conv1': 'GPU'}
-
-        mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.read_yaml', return_value=affinity_map
-        )
-
-        with pytest.raises(ConfigError):
-            get_dlsdk_test_model(models_dir, {'device' : 'HETERO:CPU,CPU', 'affinity_map' : './affinity_map.yml'})
-
-    def test_dlsdk_launcher_affinity_map_invalid_layer(self, mocker, models_dir):
-        affinity_map = {'none-existing-layer' : 'CPU'}
-
-        mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.read_yaml', return_value=affinity_map
-        )
-
-        with pytest.raises(ConfigError):
-            get_dlsdk_test_model(models_dir, {'device' : 'HETERO:CPU,CPU', 'affinity_map' : './affinity_map.yml'})
-
-
-@pytest.mark.usefixtures('mock_path_exists', 'mock_inference_engine', 'mock_inputs')
-class TestDLSDKLauncher:
-    def test_program_bitsream_when_device_is_fpga(self, mocker):
-        subprocess_mock = mocker.patch('subprocess.run')
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'fpga',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix',
-            '_aocl': Path('aocl')
-        }
-        launcher = create_launcher(config)
-        subprocess_mock.assert_called_once_with(['aocl', 'program', 'acl0', 'custom_bitstream'], check=True)
-        launcher.release()
-
-    def test_program_bitsream_when_fpga_in_hetero_device(self, mocker):
-        subprocess_mock = mocker.patch('subprocess.run')
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'hetero:fpga,cpu',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix',
-            '_aocl': Path('aocl')
-        }
-        launcher = create_launcher(config)
-        subprocess_mock.assert_called_once_with(['aocl', 'program', 'acl0', 'custom_bitstream'], check=True)
-        launcher.release()
-
-    def test_does_not_program_bitsream_when_device_is_not_fpga(self, mocker):
-        subprocess_mock = mocker.patch('subprocess.run')
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'cpu',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix',
-            '_aocl': Path('aocl')
-        }
-        create_launcher(config)
-        subprocess_mock.assert_not_called()
-
-    def test_does_not_program_bitsream_when_hetero_without_fpga(self, mocker):
-        subprocess_mock = mocker.patch('subprocess.run')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'hetero:cpu,cpu',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix',
-            '_aocl': Path('aocl')
-        }
-        create_launcher(config)
-        subprocess_mock.assert_not_called()
-
-    def test_does_not_program_bitstream_if_compiler_mode_3_in_env_when_fpga_in_hetero_device(self, mocker):
-        subprocess_mock = mocker.patch('subprocess.run')
-        mocker.patch('os.environ.get', return_value='3')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'hetero:fpga,cpu',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix',
-            '_aocl': Path('aocl')
-        }
-        create_launcher(config)
-
-        subprocess_mock.assert_not_called()
-
-    def test_does_not_program_bitstream_if_compiler_mode_3_in_env_when_fpga_in_device(self, mocker):
-        subprocess_mock = mocker.patch('subprocess.run')
-        mocker.patch('os.environ.get', return_value='3')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'fpga',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix',
-            '_aocl': Path('aocl')
-        }
-        create_launcher(config)
-
-        subprocess_mock.assert_not_called()
-
-    def test_sets_dla_aocx_when_device_is_fpga(self, mocker):
-        mocker.patch('os.environ')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'fpga',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix'
-        }
-        create_launcher(config)
-
-        os.environ.__setitem__.assert_called_once_with('DLA_AOCX', 'custom_bitstream')
-
-    def test_sets_dla_aocx_when_fpga_in_hetero_device(self, mocker):
-        mocker.patch('os.environ')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'hetero:fpga,cpu',
-            'bitstream': Path('custom_bitstream'),
-            'adapter': 'classification',
-            '_models_prefix': 'prefix'
-        }
-        create_launcher(config)
-        os.environ.__setitem__.assert_called_once_with('DLA_AOCX', 'custom_bitstream')
-
-    def test_does_not_set_dla_aocx_when_device_is_not_fpga(self, mocker):
-        mocker.patch('os.environ')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'cpu',
-            'bitstream': 'custom_bitstream',
-            'adapter': 'classification',
-            '_models_prefix': 'prefix'
-        }
-        create_launcher(config)
-
-        os.environ.__setitem__.assert_not_called()
-
-    def test_does_not_set_dla_aocx_when_hetero_without_fpga(self, mocker):
-        mocker.patch('os.environ')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'hetero:cpu,cpu',
-            'bitstream': 'custom_bitstream',
-            'adapter': 'classification',
-            '_models_prefix': 'prefix'
-        }
-        create_launcher(config)
-
-        os.environ.__setitem__.assert_not_called()
-
-    def test_does_not_set_dla_aocx_if_compiler_mode_3_in_env_when_fpga_in_hetero_device(self, mocker):
-        mocker.patch('os.environ')
-        mocker.patch('os.environ.get', return_value='3')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'hetero:fpga,cpu',
-            'bitstream': 'custom_bitstream',
-            'adapter': 'classification',
-            '_models_prefix': 'prefix'
-        }
-        create_launcher(config)
-
-        os.environ.__setitem__.assert_not_called()
-
-    def test_does_not_set_dla_aocx_if_compiler_mode_3_in_env_when_fpga_in_device(self, mocker):
-        mocker.patch('os.environ')
-        mocker.patch('os.environ.get', return_value='3')
-
-        config = {
-            'framework': 'dlsdk',
-            'weights': 'custom_weights',
-            'model': 'custom_model',
-            'device': 'fpga',
-            'bitstream': 'custom_bitstream',
-            'adapter': 'classification',
-            '_models_prefix': 'prefix'
-        }
-        create_launcher(config)
-
-        os.environ.__setitem__.assert_not_called()
-
-    def test_model_converted_from_caffe(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': 'dlsdk',
-            'caffe_model': '/path/to/source_models/custom_model',
-            'caffe_weights': '/path/to/source_models/custom_weights',
-            "device": 'cpu',
-            'bitstream': Path('custom_bitstream'),
-            '_models_prefix': '/path/to/source_models',
-            'adapter': 'classification'
-        }
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_model', '/path/to/source_models/custom_model', '/path/to/source_models/custom_weights', '',
-            FrameworkParameters('caffe', False),
-            [], None, None, None, None
-        )
-
-    def test_model_converted_with_mo_params(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': "dlsdk",
-            'caffe_model': '/path/to/source_models/custom_model',
-            'caffe_weights': '/path/to/source_models/custom_weights',
-            'device': 'cpu',
-            'bitstream': Path('custom_bitstream'),
-            '_models_prefix': '/path/to/source_models',
-            'mo_params': {'data_type': 'FP16'},
-            'adapter': 'classification'
-        }
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_model', '/path/to/source_models/custom_model', '/path/to/source_models/custom_weights', '',
-            FrameworkParameters('caffe', False),
-            [], {'data_type': 'FP16'}, None, None, None
-        )
-
-    def test_model_converted_with_mo_flags(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': 'dlsdk',
-            'caffe_model': '/path/to/source_models/custom_model',
-            'caffe_weights': '/path/to/source_models/custom_weights',
-            'device': 'cpu',
-            'bitstream': Path('custom_bitstream'),
-            '_models_prefix': '/path/to/source_models',
-            'mo_flags': ['reverse_input_channels'],
-            'adapter': 'classification'
-        }
-
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_model', '/path/to/source_models/custom_model', '/path/to/source_models/custom_weights', '',
-            FrameworkParameters('caffe', False),
-            [], None, ['reverse_input_channels'], None, None
-        )
-
-    def test_model_converted_to_output_dir_in_mo_params(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'output_dir': '/path/to/output/models'}
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value='ModelOptimizer')
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-        args = {
-            'input_model': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'output_dir': '/path/to/output/models',
-            'framework': 'tf'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to/source_models',
-            'adapter': 'classification'
-        }
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_model', '/path/to/source_models/custom_model', '', '',
-            FrameworkParameters('tf', False), [], None, None, None, None
-        )
-
-    def test_model_converted_from_tf_checkpoint(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': 'dlsdk',
-            'tf_meta': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to/source_models',
-            'adapter': 'classification'
-        }
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_model', '', '', '/path/to/source_models/custom_model',
-            FrameworkParameters('tf', True), [], None, None, None, None
-        )
-
-    def test_model_converted_from_tf_with_arg_path_to_custom_tf_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_use_custom_operations_config': 'ssd_v2_support.json'},
-            '_tf_custom_op_config_dir': 'config/dir'
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_model': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_use_custom_operations_config': 'config/dir/ssd_v2_support.json'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf_with_default_path_to_custom_tf_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_use_custom_operations_config': 'config.json'}
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_model': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_use_custom_operations_config': '/path/extensions/front/tf/config.json'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf_with_default_path_to_obj_detection_api_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_object_detection_api_pipeline_config': 'operations.config'},
-            '_tf_obj_detection_api_pipeline_config_path': None
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_model': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_object_detection_api_pipeline_config': '/path/to/source_models/operations.config'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf_with_arg_path_to_obj_detection_api_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_object_detection_api_pipeline_config': 'operations.config'},
-            '_tf_custom_op_config_dir': 'config/dir',
-            '_tf_obj_detection_api_pipeline_config_path': 'od_api'
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_model': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_object_detection_api_pipeline_config': 'od_api/operations.config'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf_checkpoint_with_arg_path_to_custom_tf_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_meta': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_use_custom_operations_config': 'ssd_v2_support.json'},
-            '_tf_custom_op_config_dir': 'config/dir'
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_meta_graph': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_use_custom_operations_config': 'config/dir/ssd_v2_support.json'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf_checkoint_with_default_path_to_custom_tf_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_meta': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_use_custom_operations_config': 'config.json'}
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_meta_graph': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_use_custom_operations_config': '/path/extensions/front/tf/config.json'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf_checkoint_with_default_path_to_obj_detection_api_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_meta': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_object_detection_api_pipeline_config': 'operations.config'},
-            '_tf_obj_detection_api_pipeline_config_path': None
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_meta_graph': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_object_detection_api_pipeline_config': '/path/to/source_models/operations.config'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_tf_checkpoint_with_arg_path_to_obj_detection_api_config(self, mocker):
-        config = {
-            'framework': 'dlsdk',
-            'tf_meta': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to',
-            'adapter': 'classification',
-            'mo_params': {'tensorflow_object_detection_api_pipeline_config': 'operations.config'},
-            '_tf_custom_op_config_dir': 'config/dir',
-            '_tf_obj_detection_api_pipeline_config_path': 'od_api'
-        }
-        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
-        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
-
-        args = {
-            'input_meta_graph': '/path/to/source_models/custom_model',
-            'model_name': 'custom_model',
-            'framework': 'tf',
-            'tensorflow_object_detection_api_pipeline_config': 'od_api/operations.config'
-        }
-
-        mocker.patch(
-            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
-            return_value=subprocess.CompletedProcess(args, returncode=0)
-        )
-        DLSDKLauncher(config)
-        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
-
-    def test_model_converted_from_mxnet(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': 'dlsdk',
-            'mxnet_weights': '/path/to/source_models/custom_weights',
-            'device': 'cpu',
-            '_models_prefix': '/path/to/source_models',
-            'adapter': 'classification'
-        }
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_weights', '', '/path/to/source_models/custom_weights', '',
-            FrameworkParameters('mxnet', False), [], None, None, None, None
-        )
-
-    def test_model_converted_from_onnx(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': 'dlsdk',
-            'onnx_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to/source_models',
-            'adapter': 'classification'
-        }
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_model', '/path/to/source_models/custom_model', '', '',
-            FrameworkParameters('onnx', False), [], None, None, None, None
-        )
-
-    def test_model_converted_from_kaldi(self, mocker):
-        mock = mocker.patch(
-            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
-            return_value=('converted_model', 'converted_weights')
-        )
-
-        config = {
-            'framework': 'dlsdk',
-            'kaldi_model': '/path/to/source_models/custom_model',
-            'device': 'cpu',
-            '_models_prefix': '/path/to/source_models',
-            'adapter': 'classification'
-        }
-        DLSDKLauncher(config)
-
-        mock.assert_called_once_with(
-            'custom_model', '/path/to/source_models/custom_model', '', '',
-            FrameworkParameters('kaldi', False), [], None, None, None, None
-        )
-
-    def test_raises_with_multiple_models_caffe_dlsdk(self):
-        config = {
-            'framework': 'dlsdk',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_tf_dlsdk(self):
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': 'tf_model',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_mxnet_dlsdk(self):
-        config = {
-            'framework': 'dlsdk',
-            'mxnet_weights': 'mxnet_weights',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_onnx_dlsdk(self):
-        config = {
-            'framework': 'dlsdk',
-            'onnx_model': 'onnx_model',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_kaldi_dlsdk(self):
-        config = {
-            'framework': 'dlsdk',
-            'onnx_model': 'kaldi_model',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_mxnet_caffe(self):
-        config = {
-            'framework': 'dlsdk',
-            'mxnet_weights': 'mxnet_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_tf_caffe(self):
-
-        config = {
-            'framework': 'dlsdk',
-            'tf_model': 'tf_model',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_onnx_caffe(self):
-
-        config = {
-            'framework': 'dlsdk',
-            'onnx_model': 'onnx_model',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_mxnet_tf(self):
-        config = {
-            'framework': 'dlsdk',
-            'mxnet_weights': 'mxnet_weights',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_onnx_tf(self):
-        config = {
-            'framework': 'dlsdk',
-            'onnx_model': 'onnx_model',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_mxnet_caffe_tf(self):
-        config = {
-            'framework': 'dlsdk',
-            'mxnet_weights': 'mxnet_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_dlsdk_caffe_tf(self):
-        config = {
-            'framework': 'dlsdk',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_dlsdk_caffe_onnx(self):
-        config = {
-            'framework': 'dlsdk',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'onnx_model': 'onnx_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_dlsdk_caffe_mxnet(self):
-        config = {
-            'framework': 'dlsdk',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'mxnet_weights': 'mxnet_weights',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_dlsdk_tf_mxnet(self):
-        config = {
-            'framework': "dlsdk",
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'mxnet_weights': 'mxnet_weights',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_dlsdk_tf_onnx(self):
-        config = {
-            'framework': 'dlsdk',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'onnx_model': 'onnx_model',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_dlsdk_tf_mxnet_caffe(self):
-        config = {
-            'framework': 'dlsdk',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'mxnet_weights': 'mxnet_weights',
-            'onnx_model': 'onnx_model',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_multiple_models_dlsdk_tf_mxnet_caffe_onnx(self):
-        config = {
-            'framework': 'dlsdk',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'mxnet_weights': 'mxnet_weights',
-            'tf_model': 'tf_model',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-    def test_raises_with_tf_model_and_tf_meta_both_provided(self):
-        config = {
-            'framework': 'dlsdk',
-            'model': 'custom_model',
-            'weights': 'custom_weights',
-            'caffe_model': 'caffe_model',
-            'caffe_weights': 'caffe_weights',
-            'mxnet_weights': 'mxnet_weights',
-            'tf_model': 'tf_model',
-            'tf_meta': 'tf_meta',
-            'device': 'cpu',
-            '_models_prefix': 'prefix'
-        }
-
-        with pytest.raises(ConfigError):
-            DLSDKLauncher(config)
-
-
-@pytest.mark.usefixtures('mock_path_exists', 'mock_inputs', 'mock_inference_engine')
-class TestDLSDKLauncherConfig:
-    def setup(self):
-        self.launcher = {
-            'model': 'foo.xml',
-            'weights': 'foo.bin',
-            'device': 'CPU',
-            'framework': 'dlsdk',
-            'adapter': 'classification',
-            '_models_prefix': 'prefix'
-        }
-        self.config = DLSDKLauncherConfig('dlsdk_launcher')
-
-    def test_hetero_correct(self):
-        self.config.validate(update_dict(self.launcher, device='HETERO:CPU'))
-        self.config.validate(update_dict(self.launcher, device='HETERO:CPU,FPGA'))
-
-    def test_hetero_endswith_comma(self):
-        with pytest.raises(ConfigError):
-            self.config.validate(update_dict(self.launcher, device='HETERO:CPU,FPGA,'))
-
-    def test_normal_multiple_devices(self):
-        with pytest.raises(ConfigError):
-            self.config.validate(update_dict(self.launcher, device='CPU,FPGA'))
-
-    def test_hetero_empty(self):
-        with pytest.raises(ConfigError):
-            self.config.validate(update_dict(self.launcher, device='HETERO:'))
-
-    def test_normal(self):
-        self.config.validate(update_dict(self.launcher, device='CPU'))
-
-    def test_missed_model_in_create_dlsdk_launcher_raises_config_error_exception(self):
-        config = {'framework': 'dlsdk', 'weights': 'custom', 'adapter': 'classification', 'device': 'cpu'}
-
-        with pytest.raises(ConfigError):
-            create_launcher(config)
-
-    def test_missed_weights_in_create_dlsdk_launcher_raises_config_error_exception(self):
-        launcher = {'framework': 'dlsdk', 'model': 'custom', 'adapter': 'ssd', 'device': 'cpu'}
-
-        with pytest.raises(ConfigError):
-            create_launcher(launcher)
-
-    def test_missed_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
-        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom'}
-
-        with pytest.raises(ConfigError):
-            create_launcher(launcher_config)
-
-    def test_undefined_str_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
-        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': 'undefined_str'}
-
-        with pytest.raises(ConfigError):
-            create_launcher(launcher_config)
-
-    def test_empty_dir_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
-        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {}}
-
-        with pytest.raises(ConfigError):
-            create_launcher(launcher_config)
-
-    def test_missed_type_in_dir_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
-        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {'key': 'val'}}
-
-        with pytest.raises(ConfigError):
-            create_launcher(launcher_config)
-
-    def test_undefined_type_in_dir_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
-        launcher_config = {
-            'framework': 'dlsdk',
-            'model': 'custom',
-            'weights': 'custom',
-            'adapter': {'type': 'undefined'}
-        }
-
-        with pytest.raises(ConfigError):
-            create_launcher(launcher_config)
-
-    def test_dlsdk_launcher(self):
-        launcher = {
-            'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': 'ssd', 'device': 'cpu',
-            '_models_prefix': 'models'
-        }
-        create_launcher(launcher)
-
-    def test_dlsdk_launcher_model_with_several_image_inputs_raise_value_error(self, mocker):
-        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {'key': 'val'}}
-
-        with pytest.raises(ValueError):
-            mocker.patch(
-                'accuracy_checker.launcher.dlsdk_launcher.DLSDKLauncher.inputs',
-                new_callable=PropertyMock(return_value={'data1': [3, 227, 227], 'data2': [3, 227, 227]})
-            )
-            create_launcher(launcher_config)
-
-    def test_dlsdk_launcher_model_no_image_inputs_raise_value_error(self):
-        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {'key': 'val'}}
-
-        with pytest.raises(ValueError):
-            create_launcher(launcher_config)
-
-
-def dummy_adapter():
-    pass
diff --git a/tools/accuracy_checker/tests/test_input_feeder.py b/tools/accuracy_checker/tests/test_input_feeder.py
deleted file mode 100644 (file)
index 6a6f882..0000000
+++ /dev/null
@@ -1,255 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import pytest
-import re
-import numpy as np
-from accuracy_checker.config import ConfigError
-from accuracy_checker.launcher.input_feeder import InputFeeder
-from accuracy_checker.data_readers import DataRepresentation
-
-# InputInfo from openvino is needed here, but there is no appropriate API
-# to create InputInfo with specific shape, therefore lets use analog
-class InputInfo_test:
-    layout = ''
-    precision = ''
-    shape = []
-    def __init__(self, layout = '', precision = '', shape = []):
-        self.layout = layout
-        self.precision = precision
-        self.shape = shape
-
-class TestInputFeeder:
-    def test_create_input_feeder_without_inputs_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            InputFeeder([], {})
-
-    def test_create_input_feeder_with_config_inputs_and_empty_network_inputs_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            InputFeeder([{'name': 'const_data', 'type': 'CONST_INPUT', 'value': '[1, 1, 1, 1]'}], {})
-
-    def test_create_input_feeder_with_config_const_inputs_not_in_network_inputs_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            InputFeeder([{'name': 'const_data', 'type': 'CONST_INPUT', 'value': '[1, 1, 1, 1]'}], {'data': (1, 3, 10, 10)})
-
-    def test_create_input_feeder_with_config_inputs_not_in_network_inputs_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            InputFeeder([{'name': 'data2', 'type': 'INPUT', 'value': '.'}], {'data': (1, 3, 10, 10)})
-
-    def test_create_input_feeder_without_config_inputs(self):
-        input_feeder = InputFeeder([], {'data': (1, 3, 10, 10)})
-        assert not input_feeder.const_inputs
-        assert not input_feeder.inputs_mapping
-        assert input_feeder.non_constant_inputs == ['data']
-
-    def test_create_input_feeder_config_inputs_fully_match_to_network_inputs(self):
-        input_feeder = InputFeeder([{'name': 'data', 'type': 'INPUT', 'value': '.'}], {'data': (1, 3, 10, 10)})
-        assert not input_feeder.const_inputs
-        assert input_feeder.inputs_mapping == {'data': re.compile('.')}
-        assert input_feeder.non_constant_inputs == ['data']
-
-    def test_create_input_feeder_config_inputs_contain_only_const_inputs_with_list_value(self):
-        input_feeder = InputFeeder([{'name': 'const_data', 'type': 'CONST_INPUT', 'value': [1, 1, 1, 1]}], {'data': (1, 3, 10, 10), 'const_data': (1, 4)})
-        assert np.array_equal(input_feeder.const_inputs['const_data'], np.ones(4))
-        assert not input_feeder.inputs_mapping
-        assert input_feeder.non_constant_inputs == ['data']
-
-    def test_create_input_feeder_config_inputs_contain_only_const_inputs_with_not_list_value(self):
-        input_feeder = InputFeeder(
-            [{'name': 'const_data', 'type': 'CONST_INPUT', 'value': 'value'}],
-            {'data': (1, 3, 10, 10), 'const_data': (1, 4)}
-        )
-        assert input_feeder.const_inputs['const_data'] == 'value'
-        assert not input_feeder.inputs_mapping
-        assert input_feeder.non_constant_inputs == ['data']
-
-    def test_create_input_feeder_not_all_non_constant_inputs_in_config_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            InputFeeder(
-                [{'name': '0', 'type': 'INPUT', 'value': '.'}],
-                {'0': (1, 3, 10, 10), '1': (1, 3, 10, 10)}
-            )
-
-    def test_fill_non_constant_input_with_one_input_without_specific_mapping_batch_1(self):
-        input_feeder = InputFeeder([], { 'input': InputInfo_test(shape=(1, 3, 10, 10)) })
-        result = input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])[0]
-        expected_data = np.zeros((1, 3, 10, 10))
-        assert 'input' in result
-        assert np.array_equal(result['input'], expected_data)
-
-    def test_fill_non_constant_input_without_specific_mapping_batch_2(self):
-        input_feeder = InputFeeder([], { 'input': InputInfo_test(shape=(1, 3, 10, 10))})
-        result = input_feeder.fill_non_constant_inputs([
-            DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
-            DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
-        ])[0]
-        expected_data = np.zeros((2, 3, 10, 10))
-        assert 'input' in result
-        assert np.array_equal(result['input'], expected_data)
-
-    def test_fill_non_constant_input_with_specific_mapping_batch_1(self):
-        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
-        result = input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])[0]
-        expected_data = np.zeros((1, 3, 10, 10))
-        assert 'input' in result
-        assert np.array_equal(result['input'], expected_data)
-
-    def test_fill_non_constant_input_with_specific_mapping_sevaral_image_matched(self):
-        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
-        result = input_feeder.fill_non_constant_inputs([DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1'])])[0]
-        expected_data = np.zeros((1, 3, 10, 10))
-        assert 'input' in result
-        assert np.array_equal(result['input'], expected_data)
-
-    def test_fill_non_constant_input_with_specific_mapping_not_match_raise_config_error(self):
-        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '1.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
-        with pytest.raises(ConfigError):
-            input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])
-
-    def test_fill_non_constant_input_with_specific_mapping_batch_2(self):
-        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
-        result = input_feeder.fill_non_constant_inputs([
-            DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
-            DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
-        ])[0]
-        expected_data = np.zeros((2, 3, 10, 10))
-        assert 'input' in result
-        assert np.array_equal(result['input'], expected_data)
-
-    def test_fill_non_constant_input_with_specific_mapping_not_all_image_in_batch_matched_raise_config_error(self):
-        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '0+'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
-        with pytest.raises(ConfigError):
-            input_feeder.fill_non_constant_inputs([
-                DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
-                DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
-            ])
-
-    def test_fill_non_constant_inputs_without_specific_mapping_batch_1(self):
-        input_feeder = InputFeeder([], { 'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))})
-        result = input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])[0]
-        expected_data = np.zeros((1, 3, 10, 10))
-        assert 'input1' in result
-        assert np.array_equal(result['input1'], expected_data)
-        assert 'input2' in result
-        assert np.array_equal(result['input2'], expected_data)
-
-    def test_fill_non_constant_inputs_without_specific_mapping_batch_2(self):
-        input_feeder = InputFeeder([], {'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape = (1, 3, 10, 10))})
-        result = input_feeder.fill_non_constant_inputs([
-            DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
-            DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
-        ])[0]
-        expected_data = np.zeros((2, 3, 10, 10))
-        assert 'input1' in result
-        assert np.array_equal(result['input1'], expected_data)
-        assert 'input2' in result
-        assert np.array_equal(result['input2'], expected_data)
-
-    def test_fill_non_constant_inputs_with_specific_mapping_batch_1(self):
-        input_feeder = InputFeeder(
-            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
-            {'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))}
-        )
-        result = input_feeder.fill_non_constant_inputs(
-            [DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))],identifier=['0', '1'])]
-        )[0]
-        expected_data = [np.zeros((1, 3, 10, 10)), np.ones((1, 3, 10, 10))]
-        assert 'input1' in result
-        assert np.array_equal(result['input1'], expected_data[0])
-        assert 'input2' in result
-        assert np.array_equal(result['input2'], expected_data[1])
-
-    def test_fill_non_constant_inputs_with_specific_mapping_not_match_raise_config_error(self):
-        input_feeder = InputFeeder(
-            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
-            {'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))}
-        )
-        with pytest.raises(ConfigError):
-            input_feeder.fill_non_constant_inputs([DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '2'])])
-
-    def test_fill_non_constant_inputs_with_specific_mapping_batch_2(self):
-        input_feeder = InputFeeder(
-            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
-            { 'input1': InputInfo_test(shape = (1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))}
-        )
-        result = input_feeder.fill_non_constant_inputs([
-            DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1']),
-            DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1'])
-        ])[0]
-        expected_data = [np.zeros((2, 3, 10, 10)), np.ones((2, 3, 10, 10))]
-        assert 'input1' in result
-        assert np.array_equal(result['input1'], expected_data[0])
-        assert 'input2' in result
-        assert np.array_equal(result['input2'], expected_data[1])
-
-    def test_fill_non_constant_inputs_with_specific_mapping_not_all_image_in_batch_matched_raise_config_error(self):
-        input_feeder = InputFeeder(
-            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
-            {'input1': (1, 3, 10, 10), 'input2': (1, 3, 10, 10)}
-        )
-        with pytest.raises(ConfigError):
-            input_feeder.fill_non_constant_inputs([
-                DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1']),
-                DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '2'])
-            ])
-
-    def test_fill_non_const_input_with_multi_infer_data_batch_1(self):
-        input_feeder = InputFeeder({}, {'input': (1, 3, 10, 10)})
-        result = input_feeder.fill_non_constant_inputs([
-            DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], {'multi_infer': True}, identifier='0')
-        ])
-        expected = [{'input': np.zeros((1, 3, 10, 10))}, {'input': np.ones((1, 3, 10, 10))}]
-        assert len(result) == len(expected)
-        assert np.array_equal(result[0]['input'], expected[0]['input'])
-        assert np.array_equal(result[1]['input'], expected[1]['input'])
-
-    def test_fill_non_const_input_with_multi_infer_data_batch_2(self):
-        input_feeder = InputFeeder({}, {'input': (2, 3, 10, 10)})
-        result = input_feeder.fill_non_constant_inputs([
-            DataRepresentation(
-                [np.zeros((10, 10, 3)), np.ones((10, 10, 3))],
-                {'multi_infer': True},
-                identifier='0'
-            ),
-            DataRepresentation(
-                [np.zeros((10, 10, 3)), np.ones((10, 10, 3))],
-                {'multi_infer': True},
-                identifier='1'
-            ),
-        ])
-        expected = [{'input': np.zeros((2, 3, 10, 10))}, {'input': np.ones((2, 3, 10, 10))}]
-        assert len(result) == len(expected)
-        assert np.array_equal(result[0]['input'], expected[0]['input'])
-        assert np.array_equal(result[1]['input'], expected[1]['input'])
-
-    def test_fill_non_const_input_with_multi_infer_not_consistent_data_batch_2(self):
-        input_feeder = InputFeeder({}, {'input': (2, 3, 10, 10)})
-        result = input_feeder.fill_non_constant_inputs([
-            DataRepresentation(
-                [np.zeros((10, 10, 3))],
-                {'multi_infer': True},
-                identifier='0'
-            ),
-            DataRepresentation(
-                [np.zeros((10, 10, 3)), np.ones((10, 10, 3))],
-                {'multi_infer': True},
-                identifier='1'
-            ),
-        ])
-        expected = [{'input': np.zeros((2, 3, 10, 10))}, {'input': np.ones((1, 3, 10, 10))}]
-        assert len(result) == len(expected)
-        assert np.array_equal(result[0]['input'], expected[0]['input'])
-        assert np.array_equal(result[1]['input'], expected[1]['input'])
diff --git a/tools/accuracy_checker/tests/test_metric_evaluator.py b/tools/accuracy_checker/tests/test_metric_evaluator.py
deleted file mode 100644 (file)
index fc0c4d2..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import pytest
-from accuracy_checker.config import ConfigError
-from accuracy_checker.metrics import ClassificationAccuracy, MetricsExecutor
-from accuracy_checker.metrics.metric import Metric
-from accuracy_checker.representation import (
-    ClassificationAnnotation,
-    ClassificationPrediction,
-    ContainerAnnotation,
-    ContainerPrediction,
-    DetectionAnnotation,
-    DetectionPrediction
-)
-from .common import DummyDataset
-
-
-class TestMetric:
-    def setup_method(self):
-        self.module = 'accuracy_checker.metrics.metric_evaluator'
-
-    def test_missed_metrics_raises_config_error_exception(self):
-        with pytest.raises(ConfigError):
-            MetricsExecutor([], None)
-
-    def test_metrics_with_empty_entry_raises_config_error_exception(self):
-        with pytest.raises(ConfigError):
-            MetricsExecutor([{}], None)
-
-    def test_missed_metric_type_raises_config_error_exception(self):
-        with pytest.raises(ConfigError):
-            MetricsExecutor([{'undefined': ''}], None)
-
-    def test_undefined_metric_type_raises_config_error_exception(self):
-        with pytest.raises(ConfigError):
-            MetricsExecutor([{'type': ''}], None)
-
-    def test_accuracy_arguments(self):
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        assert len(dispatcher.metrics) == 1
-        _, _, accuracy_metric,  _, _, _ = dispatcher.metrics[0]
-        assert isinstance(accuracy_metric, ClassificationAccuracy)
-        assert accuracy_metric.top_k == 1
-
-    def test_accuracy_with_several_annotation_source_raises_config_error_exception(self):
-        with pytest.raises(ConfigError):
-            MetricsExecutor([{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'annotation1, annotation2'}], None)
-
-    def test_accuracy_with_several_prediction_source_raises_value_error_exception(self):
-        with pytest.raises(ConfigError):
-            MetricsExecutor([{'type': 'accuracy', 'top_k': 1, 'prediction_source': 'prediction1, prediction2'}], None)
-
-    def test_accuracy_on_container_with_wrong_annotation_source_name_raise_config_error_exception(self):
-        annotations = [ContainerAnnotation({'annotation': ClassificationAnnotation('identifier', 3)})]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'a'}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_with_wrong_annotation_type_raise_config_error_exception(self):
-        annotations = [DetectionAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_with_unsupported_annotations_in_container_raise_config_error_exception(self):
-        annotations = [ContainerAnnotation({'annotation': DetectionAnnotation('identifier', 3)})]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_with_unsupported_annotation_type_as_annotation_source_for_container_raises_config_error(self):
-        annotations = [ContainerAnnotation({'annotation': DetectionAnnotation('identifier', 3)})]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'annotation'}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_on_annotation_container_with_several_suitable_representations_config_value_error_exception(self):
-        annotations = [ContainerAnnotation({
-            'annotation1': ClassificationAnnotation('identifier', 3),
-            'annotation2': ClassificationAnnotation('identifier', 3)
-        })]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_with_wrong_prediction_type_raise_config_error_exception(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_with_unsupported_prediction_in_container_raise_config_error_exception(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ContainerPrediction({'prediction': DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_with_unsupported_prediction_type_as_prediction_source_for_container_raises_config_error(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ContainerPrediction({'prediction': DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1, 'prediction_source': 'prediction'}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_accuracy_on_prediction_container_with_several_suitable_representations_raise_config_error_exception(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ContainerPrediction({
-            'prediction1': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]),
-            'prediction2': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])
-        })]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        with pytest.raises(ConfigError):
-            dispatcher.update_metrics_on_batch(annotations, predictions)
-
-    def test_complete_accuracy(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == pytest.approx(1.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_complete_accuracy_with_container_default_sources(self):
-        annotations = [ContainerAnnotation({'a': ClassificationAnnotation('identifier', 3)})]
-        predictions = [ContainerPrediction({'p': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == pytest.approx(1.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_complete_accuracy_with_container_sources(self):
-        annotations = [ContainerAnnotation({'a': ClassificationAnnotation('identifier', 3)})]
-        predictions = [ContainerPrediction({'p': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
-        config = [{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'a', 'prediction_source': 'p'}]
-
-        dispatcher = MetricsExecutor(config, None)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == pytest.approx(1.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_zero_accuracy(self):
-        annotation = [ClassificationAnnotation('identifier', 2)]
-        prediction = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None)
-
-        for _, evaluation_result in dispatcher.iterate_metrics([annotation], [prediction]):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == 0.0
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_complete_accuracy_top_3(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [1.0, 3.0, 4.0, 2.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 3}], None)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == pytest.approx(1.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_zero_accuracy_top_3(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 3}], None)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == 0.0
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_reference_is_10_by_config(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 3, 'reference': 10}], None)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == 0.0
-            assert evaluation_result.reference_value == 10
-            assert evaluation_result.threshold is None
-
-    def test_threshold_is_10_by_config(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0])]
-
-        dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 3, 'threshold': 10}], None)
-
-        for _, evaluation_result in dispatcher.iterate_metrics([annotations], [predictions]):
-            assert evaluation_result.name == 'accuracy'
-            assert evaluation_result.evaluated_value == 0.0
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold == 10
-
-    def test_classification_per_class_accuracy_fully_zero_prediction(self):
-        annotation = ClassificationAnnotation('identifier', 0)
-        prediction = ClassificationPrediction('identifier', [1.0, 2.0])
-        dataset = DummyDataset(label_map={0: '0', 1: '1'})
-        dispatcher = MetricsExecutor([{'type': 'accuracy_per_class', 'top_k': 1}], dataset)
-        dispatcher.update_metrics_on_batch([annotation], [prediction])
-        for _, evaluation_result in dispatcher.iterate_metrics([annotation], [prediction]):
-            assert evaluation_result.name == 'accuracy_per_class'
-            assert len(evaluation_result.evaluated_value) == 2
-            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
-            assert evaluation_result.evaluated_value[1] == pytest.approx(0.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_classification_per_class_accuracy_partially_zero_prediction(self):
-        annotation = [ClassificationAnnotation('identifier', 1)]
-        prediction = [ClassificationPrediction('identifier', [1.0, 2.0])]
-        dataset = DummyDataset(label_map={0: '0', 1: '1'})
-        dispatcher = MetricsExecutor([{'type': 'accuracy_per_class', 'top_k': 1}], dataset)
-
-        dispatcher.update_metrics_on_batch(annotation, prediction)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
-            assert evaluation_result.name == 'accuracy_per_class'
-            assert len(evaluation_result.evaluated_value) == 2
-            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
-            assert evaluation_result.evaluated_value[1] == pytest.approx(1.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_classification_per_class_accuracy_complete_prediction(self):
-        annotation = [ClassificationAnnotation('identifier_1', 1), ClassificationAnnotation('identifier_2', 0)]
-        prediction = [
-            ClassificationPrediction('identifier_1', [1.0, 2.0]),
-            ClassificationPrediction('identifier_2', [2.0, 1.0])
-        ]
-        dataset = DummyDataset(label_map={0: '0', 1: '1'})
-        dispatcher = MetricsExecutor([{'type': 'accuracy_per_class', 'top_k': 1}], dataset)
-
-        dispatcher.update_metrics_on_batch(annotation, prediction)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
-            assert evaluation_result.name == 'accuracy_per_class'
-            assert len(evaluation_result.evaluated_value) == 2
-            assert evaluation_result.evaluated_value[0] == pytest.approx(1.0)
-            assert evaluation_result.evaluated_value[1] == pytest.approx(1.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_classification_per_class_accuracy_partially_prediction(self):
-        annotation = [
-            ClassificationAnnotation('identifier_1', 1),
-            ClassificationAnnotation('identifier_2', 0),
-            ClassificationAnnotation('identifier_3', 0)
-        ]
-        prediction = [
-            ClassificationPrediction('identifier_1', [1.0, 2.0]),
-            ClassificationPrediction('identifier_2', [2.0, 1.0]),
-            ClassificationPrediction('identifier_3', [1.0, 5.0])
-        ]
-        dataset = DummyDataset(label_map={0: '0', 1: '1'})
-        dispatcher = MetricsExecutor([{'type': 'accuracy_per_class', 'top_k': 1}], dataset)
-
-        dispatcher.update_metrics_on_batch(annotation, prediction)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
-            assert evaluation_result.name == 'accuracy_per_class'
-            assert len(evaluation_result.evaluated_value) == 2
-            assert evaluation_result.evaluated_value[0] == pytest.approx(0.5)
-            assert evaluation_result.evaluated_value[1] == pytest.approx(1.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_classification_per_class_accuracy_prediction_top3_zero(self):
-        annotation = [ClassificationAnnotation('identifier_1', 0), ClassificationAnnotation('identifier_2', 1)]
-        prediction = [
-            ClassificationPrediction('identifier_1', [1.0, 2.0, 3.0, 4.0]),
-            ClassificationPrediction('identifier_2', [2.0, 1.0, 3.0, 4.0])
-        ]
-        dataset = DummyDataset(label_map={0: '0', 1: '1', 2: '2', 3: '3'})
-        dispatcher = MetricsExecutor([{'type': 'accuracy_per_class', 'top_k': 3}], dataset)
-
-        dispatcher.update_metrics_on_batch(annotation, prediction)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
-            assert evaluation_result.name == 'accuracy_per_class'
-            assert len(evaluation_result.evaluated_value) == 4
-            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
-            assert evaluation_result.evaluated_value[1] == pytest.approx(0.0)
-            assert evaluation_result.evaluated_value[2] == pytest.approx(0.0)
-            assert evaluation_result.evaluated_value[3] == pytest.approx(0.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-    def test_classification_per_class_accuracy_prediction_top3(self):
-        annotation = [ClassificationAnnotation('identifier_1', 1), ClassificationAnnotation('identifier_2', 1)]
-        prediction = [
-            ClassificationPrediction('identifier_1', [1.0, 2.0, 3.0, 4.0]),
-            ClassificationPrediction('identifier_2', [2.0, 1.0, 3.0, 4.0])
-        ]
-        dataset = DummyDataset(label_map={0: '0', 1: '1', 2: '2', 3: '3'})
-        dispatcher = MetricsExecutor([{'type': 'accuracy_per_class', 'top_k': 3}], dataset)
-
-        dispatcher.update_metrics_on_batch(annotation, prediction)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
-            assert evaluation_result.name == 'accuracy_per_class'
-            assert len(evaluation_result.evaluated_value) == 4
-            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
-            assert evaluation_result.evaluated_value[1] == pytest.approx(0.5)
-            assert evaluation_result.evaluated_value[2] == pytest.approx(0.0)
-            assert evaluation_result.evaluated_value[3] == pytest.approx(0.0)
-            assert evaluation_result.reference_value is None
-            assert evaluation_result.threshold is None
-
-
-class TestMetricExtraArgs:
-    def test_all_metrics_raise_config_error_on_extra_args(self):
-        for provider in Metric.providers:
-            adapter_config = {'type': provider, 'something_extra': 'extra'}
-            with pytest.raises(ConfigError):
-                Metric.provide(provider, adapter_config, None)
-
-    def test_detection_recall_raise_config_error_on_extra_args(self):
-        adapter_config = {'type': 'recall', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('recall', adapter_config, None)
-
-    def test_detection_miss_rate_raise_config_error_on_extra_args(self):
-        adapter_config = {'type': 'miss_rate', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('miss_rate', adapter_config, None)
-
-    def test_accuracy_raise_config_error_on_extra_args(self):
-        adapter_config = {'type': 'accuracy', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('accuracy', adapter_config, None)
-
-    def test_per_class_accuracy_raise_config_error_on_extra_args(self):
-        adapter_config = {'type': 'accuracy_per_class', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('accuracy_per_class', adapter_config, None)
-
-    def test_character_recognition_accuracy_raise_config_error_on_extra_args(self):
-        adapter_config = {'type': 'character_recognition_accuracy', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('character_recognition_accuracy', adapter_config, None)
-
-    def test_multi_accuracy_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'multi_accuracy', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('multi_accuracy', metric_config, None)
-
-    def test_multi_precision_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'multi_precision', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('multi_precision', metric_config, None)
-
-    def test_f1_score_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'f1-score', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('f1-score', metric_config, None)
-
-    def test_mae_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'mae', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('mae', metric_config, None)
-
-    def test_mse_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'mse', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('mse', metric_config, None)
-
-    def test_rmse_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'rmse', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('rmse', metric_config, None)
-
-    def test_mae_on_interval_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'mae_on_interval', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('mae_on_interval', metric_config, None)
-
-    def test_mse_on_interval_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'mse_on_interval', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('mse_on_interval', metric_config, None)
-
-    def test_rmse_on_interval_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'rmse_on_interval', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('rmse_on_interval', metric_config, None)
-
-    def test_per_point_normed_error_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'per_point_normed_error', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('per_point_normed_error', metric_config, None)
-
-    def test_average_point_error_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'normed_error', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('normed_error', metric_config, None)
-
-    def test_reid_cmc_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'cmc', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('cmc', metric_config, None)
-
-    def test_reid_map_raise_config_error_on_extra_args(self):
-        adapter_config = {'type': 'reid_map', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('reid_map', adapter_config, None)
-
-    def test_pairwise_accuracy_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'pairwise_accuracy', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('pairwise_accuracy', metric_config, None)
-
-    def test_segmentation_accuracy_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'segmentation_accuracy', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('segmentation_accuracy', metric_config, None)
-
-    def test_mean_iou_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'mean_iou', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('mean_iou', metric_config, None)
-
-    def test_mean_accuracy_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'mean_accuracy', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('mean_accuracy', metric_config, None)
-
-    def test_frequency_weighted_accuracy_raise_config_error_on_extra_args(self):
-        metric_config = {'type': 'frequency_weighted_accuracy', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            Metric.provide('frequency_weighted_accuracy', metric_config, None)
diff --git a/tools/accuracy_checker/tests/test_model_conversion.py b/tools/accuracy_checker/tests/test_model_conversion.py
deleted file mode 100644 (file)
index a5a8c77..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import sys
-import pytest
-
-from accuracy_checker.launcher.model_conversion import (exec_mo_binary, find_dlsdk_ir, find_mo, prepare_args)
-from tests.common import mock_filesystem
-
-
-def test_mock_file_system():
-    with mock_filesystem(['foo/bar', 'foo/baz/']) as prefix:
-        assert (prefix / 'foo' / 'bar').is_file()
-        assert (prefix / 'foo' / 'baz').is_dir()
-
-
-def test_find_mo():
-    with mock_filesystem(['deployment_tools/model_optimizer/mo.py']) as prefix:
-        assert find_mo([prefix / 'deployment_tools' / 'model_optimizer'])
-
-
-def test_find_mo_is_none_when_not_exist():
-    with mock_filesystem(['deployment_tools/model_optimizer/mo.py']) as prefix:
-        assert find_mo([prefix / 'deployment_tools']) is None
-
-
-def test_find_mo_list_not_corrupted():
-    with mock_filesystem(['deployment_tools/model_optimizer/mo.py']) as prefix:
-        search_paths = [prefix]
-        find_mo(search_paths)
-        assert len(search_paths) == 1
-
-
-def test_find_ir__in_root():
-    with mock_filesystem(['model.xml', 'model.bin']) as root:
-        model, weights = find_dlsdk_ir(root, 'model')
-        assert model == root / 'model.xml'
-        assert weights == root / 'model.bin'
-
-
-def test_find_ir_raises_file_not_found_error_when_ir_not_found():
-    with mock_filesystem(['foo/']) as root:
-        with pytest.raises(FileNotFoundError):
-            find_dlsdk_ir(root, 'model')
-
-
-def test_prepare_args():
-    args = prepare_args('foo', ['a', 'b'], {'bar': 123, 'x': 'baz'})
-    assert args[0] == sys.executable
-    assert args[1] == 'foo'
-    assert '--a' in args
-    assert '--b' in args
-    assert '--bar' in args
-    assert '--x' in args
-
-    assert args[args.index('--bar') + 1] == '123'
-    assert args[args.index('--x') + 1] == 'baz'
-
-
-def test_exec_mo_binary(mocker):
-    subprocess_run = mocker.patch('subprocess.run')
-    mocker.patch('os.chdir')
-
-    args = prepare_args('ModelOptimizer', value_options={'--foo': 'bar'})
-    exec_mo_binary(args)
-
-    subprocess_run.assert_called_once_with(args, check=False, timeout=None)
diff --git a/tools/accuracy_checker/tests/test_model_evaluator.py b/tools/accuracy_checker/tests/test_model_evaluator.py
deleted file mode 100644 (file)
index 8540b26..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from unittest.mock import Mock, MagicMock
-
-from accuracy_checker.evaluators import ModelEvaluator
-
-
-class TestModelEvaluator:
-    def setup_method(self):
-        self.launcher = Mock()
-        self.launcher.predict.return_value = []
-        data = MagicMock(data=MagicMock(), metadata=MagicMock(), identifier=0)
-        self.preprocessor = Mock()
-        self.preprocessor.process = Mock(return_value=data)
-        self.postprocessor = Mock()
-        self.adapter = MagicMock(return_value=[])
-        self.input_feeder = Mock()
-        self.data_reader = Mock(return_value=data)
-        self.data_reader.data_source = 'source'
-
-        annotation_0 = MagicMock()
-        annotation_0.identifier = 0
-        annotation_0.metadata = {'data_source': MagicMock()}
-        annotation_1 = MagicMock()
-        annotation_1.identifier = 1
-        annotation_1.metadata = {'data_source': MagicMock()}
-        annotation_container_0 = MagicMock()
-        annotation_container_0.values = MagicMock(return_value=[annotation_0])
-        annotation_container_1 = MagicMock()
-        annotation_container_1.values = MagicMock(return_value=([annotation_1]))
-        self.annotations = [[annotation_container_0], [annotation_container_1]]
-
-        self.dataset = MagicMock()
-        self.dataset.__iter__.return_value = self.annotations
-
-        self.postprocessor.process_batch = Mock(side_effect=[
-            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
-        ])
-        self.postprocessor.process_dataset = Mock(return_value=(
-            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
-        ))
-        self.postprocessor.full_process = Mock(return_value=(
-            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
-        ))
-
-        self.metric = Mock()
-        self.metric.update_metrics_on_batch = Mock()
-
-        self.evaluator = ModelEvaluator(self.launcher, self.input_feeder, self.adapter, self.data_reader, self.preprocessor, self.postprocessor, self.dataset, self.metric)
-        self.evaluator.store_predictions = Mock()
-        self.evaluator.load = Mock(return_value=(
-            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
-        ))
-
-    def test_process_dataset_without_storing_predictions_and_dataset_processors(self):
-        self.postprocessor.has_dataset_processors = False
-
-        self.evaluator.process_dataset(None, None)
-
-        assert not self.evaluator.store_predictions.called
-        assert not self.evaluator.load.called
-        assert self.launcher.predict.called
-        assert self.postprocessor.process_batch.called
-        assert self.metric.update_metrics_on_batch.call_count == len(self.annotations)
-        assert self.postprocessor.process_dataset.called
-        assert not self.postprocessor.full_process.called
-
-    def test_process_dataset_without_storing_predictions_and_with_dataset_processors(self):
-        self.postprocessor.has_dataset_processors = True
-
-        self.evaluator.process_dataset(None, None)
-
-        assert not self.evaluator.store_predictions.called
-        assert not self.evaluator.load.called
-        assert self.launcher.predict.called
-        assert self.postprocessor.process_batch.called
-        assert self.metric.update_metrics_on_batch.call_count == 1
-        assert self.postprocessor.process_dataset.called
-        assert not self.postprocessor.full_process.called
-
-    def test_process_dataset_with_storing_predictions_and_without_dataset_processors(self):
-        self.postprocessor.has_dataset_processors = False
-
-        self.evaluator.process_dataset('path', None)
-
-        assert self.evaluator.store_predictions.called
-        assert not self.evaluator.load.called
-        assert self.launcher.predict.called
-        assert self.postprocessor.process_batch.called
-        assert self.metric.update_metrics_on_batch.call_count == len(self.annotations)
-        assert self.postprocessor.process_dataset.called
-        assert not self.postprocessor.full_process.called
-
-    def test_process_dataset_with_storing_predictions_and_with_dataset_processors(self):
-        self.postprocessor.has_dataset_processors = True
-
-        self.evaluator.process_dataset('path', None)
-
-        assert self.evaluator.store_predictions.called
-        assert not self.evaluator.load.called
-        assert self.launcher.predict.called
-        assert self.postprocessor.process_batch.called
-        assert self.metric.update_metrics_on_batch.call_count == 1
-        assert self.postprocessor.process_dataset.called
-        assert not self.postprocessor.full_process.called
-
-    def test_process_dataset_with_loading_predictions_and_without_dataset_processors(self, mocker):
-        mocker.patch('accuracy_checker.evaluators.model_evaluator.get_path')
-        self.postprocessor.has_dataset_processors = False
-
-        self.evaluator.process_dataset('path', None)
-
-        assert not self.evaluator.store_predictions.called
-        assert self.evaluator.load.called
-        assert not self.launcher.predict.called
-        assert not self.postprocessor.process_batch.called
-        assert self.metric.update_metrics_on_batch.call_count == 1
-        assert not self.postprocessor.process_dataset.called
-        assert self.postprocessor.full_process.called
-
-    def test_process_dataset_with_loading_predictions_and_with_dataset_processors(self, mocker):
-        mocker.patch('accuracy_checker.evaluators.model_evaluator.get_path')
-        self.postprocessor.has_dataset_processors = True
-
-        self.evaluator.process_dataset('path', None)
-
-        assert not self.evaluator.store_predictions.called
-        assert self.evaluator.load.called
-        assert not self.launcher.predict.called
-        assert not self.postprocessor.process_batch.called
-        assert self.metric.update_metrics_on_batch.call_count == 1
-        assert not self.postprocessor.process_dataset.called
-        assert self.postprocessor.full_process.called
diff --git a/tools/accuracy_checker/tests/test_postprocessor.py b/tools/accuracy_checker/tests/test_postprocessor.py
deleted file mode 100644 (file)
index 81c14c3..0000000
+++ /dev/null
@@ -1,1070 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-import pytest
-
-from accuracy_checker.config import ConfigError
-from accuracy_checker.postprocessor import PostprocessingExecutor
-
-from accuracy_checker.representation import (
-    DetectionAnnotation,
-    DetectionPrediction,
-    ContainerAnnotation,
-    ContainerPrediction,
-    ClassificationAnnotation
-)
-
-from .common import make_representation, make_segmentation_representation
-
-
-def postprocess_data(executor, annotations, predictions):
-    return executor.full_process(annotations, predictions)
-
-
-class TestPostprocessor:
-    def test_without_apply_to_and_sources_filter_raise_config_error_exception(self):
-        config = [{'type': 'filter', 'labels': [1]}]
-
-        with pytest.raises(ConfigError):
-            PostprocessingExecutor(config)
-
-    def test_both_provided_apply_to_and_sources_filter_raise_config_error_exception(self):
-        config = [{
-            'type': 'filter',
-            'apply_to': 'prediction',
-            'annotation_source': 'annotation',
-            'labels': [1]
-        }]
-
-        with pytest.raises(ConfigError):
-            PostprocessingExecutor(config)
-
-    def test_filter_annotations_unsupported_source_type_in_container_raise_type_error_exception(self):
-        config = [{'type': 'filter', 'annotation_source': 'annotation', 'labels': [1]}]
-        annotation = ContainerAnnotation({'annotation': ClassificationAnnotation()})
-        executor = PostprocessingExecutor(config)
-
-        with pytest.raises(TypeError):
-            postprocess_data(executor, [annotation], [None])
-
-    def test_filter_annotations_source_not_found_raise_config_error_exception(self):
-        config = [{'type': 'filter', 'annotation_source': 'ann', 'labels': [1]}]
-        annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        executor = PostprocessingExecutor(config)
-
-        with pytest.raises(ConfigError):
-            postprocess_data(executor, [annotation], [None])
-
-    def test_filter_predictions_unsupported_source_type_raise_type_error_exception(self):
-        config = [{
-            'type': 'filter',
-            'prediction_source': 'detection_out',
-            'labels': [1],
-            'remove_filtered': False
-        }]
-        prediction = ContainerPrediction({'detection_out': ClassificationAnnotation()})
-        executor = PostprocessingExecutor(config)
-
-        with pytest.raises(TypeError):
-            postprocess_data(executor, [None], [prediction])
-
-    def test_filter_predictions_source_not_found_raise_config_error_exception(self):
-        config = [{
-            'type': 'filter', 'prediction_source': 'undefined', 'labels': [1]
-        }]
-        prediction = ContainerPrediction({'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]})
-        executor = PostprocessingExecutor(config)
-
-        with pytest.raises(ConfigError):
-            postprocess_data(executor, [None], [prediction])
-
-    def test_filter_container_annotations_by_labels_with_ignore_using_source(self):
-        config = [{
-            'type': 'filter', 'annotation_source': 'annotation', 'labels': [1], 'remove_filtered': False
-        }]
-        annotation = ContainerAnnotation({
-            'annotation':  make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-            )[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_container_annotations_by_labels_with_ignore_using_apply_to(self):
-        config = [{
-            'type': 'filter',
-            'apply_to': 'annotation',
-            'labels': [1],
-            'remove_filtered': False
-        }]
-        annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-            )[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_regular_annotations_by_labels_with_ignore(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': False}]
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected = make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-            )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_multi_source_annotations_by_labels_with_ignore(self):
-        config = [{
-            'type': 'filter',
-            'annotation_source': ['annotation1', 'annotation2'],
-            'labels': [1],
-            'remove_filtered': False
-        }]
-        annotation = ContainerAnnotation({
-            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
-            'annotation2': make_representation('1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation1': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-            )[0],
-            'annotation2': make_representation(
-                '1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [0, 1]}]
-            )[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_multi_source_annotations_by_labels_with_ignore_using_apply_to(self):
-        config = [{
-            'type': 'filter',
-            'apply_to': 'annotation',
-            'labels': [1],
-            'remove_filtered': False
-        }]
-        annotation = ContainerAnnotation({
-            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
-            'annotation2': make_representation('1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation1': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-            )[0],
-            'annotation2': make_representation(
-                '1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [0, 1]}]
-            )[0]
-        })
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_regular_annotations_by_labels_with_remove_using_annotation_source_warm_user_warning(self):
-        config = [{
-            'type': 'filter',
-            'annotation_source': 'annotation',
-            'labels': [1],
-            'remove_filtered': True
-        }]
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected = make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-
-        with pytest.warns(UserWarning):
-            postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_regular_annotations_by_labels_with_remove_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': True}]
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected = make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_annotations_by_labels_with_remove_on_container(self):
-        config = [{
-            'type': 'filter',
-            'annotation_source': 'annotation',
-            'labels': [1],
-            'remove_filtered': True
-        }]
-        annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_annotations_by_labels_with_remove_on_container_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': True}]
-        annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_multi_source_annotations_by_labels_with_remove(self):
-        config = [{
-            'type': 'filter',
-            'annotation_source': ['annotation1', 'annotation2'],
-            'labels': [1], 'remove_filtered': True
-        }]
-        annotation = ContainerAnnotation({
-            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
-            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation1': make_representation('0 0 0 10 10', is_ground_truth=True)[0],
-            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_multi_source_by_labels_with_remove_on_container_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': True}]
-        annotation = ContainerAnnotation({
-            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
-            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-        expected = ContainerAnnotation({
-            'annotation1': make_representation('0 0 0 10 10', is_ground_truth=True)[0],
-            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_predictions_by_labels_with_ignore(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': ['to_be_filtered'], 'remove_filtered': False}]
-        prediction = DetectionPrediction(labels=['some_label', 'to_be_filtered'])
-        expected = DetectionPrediction(labels=['some_label', 'to_be_filtered'], metadata={'difficult_boxes': [1]})
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_predictions_by_labels_with_ignore_on_container(self):
-        config = [{
-            'type': 'filter',
-            'prediction_source': 'detection_out',
-            'labels': [1],
-            'remove_filtered': False
-        }]
-        prediction = ContainerPrediction({
-            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected = ContainerPrediction({'detection_out': make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-        )[0]})
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_predictions_by_labels_with_ignore_on_container_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': False}]
-        prediction = ContainerPrediction({
-            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected = ContainerPrediction({'detection_out': make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-        )[0]})
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_multi_source_predictions_by_labels_with_ignore(self):
-        config = [{
-            'type': 'filter', 'prediction_source': ['detection_out1', 'detection_out2'], 'labels': [1],
-            'remove_filtered': False
-        }]
-        prediction = ContainerPrediction({
-            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
-            'detection_out2': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected = ContainerPrediction({
-            'detection_out1': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-            )[0],
-            'detection_out2': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-            )[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_multi_source_predictions_by_labels_with_ignore_using_apply_to(self):
-        config = [{
-            'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': False
-        }]
-        prediction = ContainerPrediction({
-            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
-            'detection_out2': make_representation('1 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected = ContainerPrediction({
-            'detection_out1': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-            )[0],
-            'detection_out2': make_representation(
-                '1 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [0, 1]}]
-            )[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_predictions_by_labels_with_remove(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': True}]
-        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)
-        expected = make_representation('0 0 0 10 10', score=1)
-
-        postprocess_data(PostprocessingExecutor(config), [None], prediction)
-
-        assert prediction == expected
-
-    def test_filter_predictions_by_labels_with_remove_on_container(self):
-        config = [{
-            'type': 'filter', 'prediction_source': 'detection_out', 'labels': [0], 'remove_filtered': True
-        }]
-        prediction = ContainerPrediction({
-            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected = ContainerPrediction({'detection_out':  make_representation('1 0 0 11 11', score=1)[0]})
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_predictions_by_labels_with_remove_on_container_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [0], 'remove_filtered': True}]
-        prediction = ContainerPrediction({
-            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected = ContainerPrediction({'detection_out': make_representation('1 0 0 11 11', score=1)[0]})
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_multi_source_predictions_by_labels_with_remove(self):
-        config = [{
-            'type': 'filter',
-            'prediction_source': ['detection_out1', 'detection_out2'],
-            'labels': [1],
-            'remove_filtered': True
-        }]
-        prediction = ContainerPrediction({
-            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
-            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
-        })
-        expected = ContainerPrediction({
-            'detection_out1': make_representation('0 0 0 10 10', score=1)[0],
-            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_multi_source_predictions_by_labels_with_remove_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': True}]
-        prediction = ContainerPrediction({
-            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
-            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
-        })
-        expected = ContainerPrediction({
-            'detection_out1': make_representation('0 0 0 10 10', score=1)[0],
-            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
-
-        assert prediction == expected
-
-    def test_filter_regular_annotations_and_regular_predictions_by_labels_with_ignore_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
-        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        expected_prediction = make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-        )[0]
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected_annotation = make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-        )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_regular_annotations_and_regular_predictions_by_labels_with_remove_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
-        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)
-        expected_prediction = make_representation('0 0 0 10 10', score=1)
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)
-        expected_annotation = make_representation('0 0 0 10 10', is_ground_truth=True)
-
-        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_container_annotations_and_regular_predictions_by_labels_with_ignore_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
-        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        expected_prediction = make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-        )[0]
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected_annotation = make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-        )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_container_annotations_and_regular_predictions_by_labels_with_remove_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
-        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        expected_prediction = make_representation('0 0 0 10 10', score=1)[0]
-        annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected_annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_regular_annotations_and_container_predictions_by_labels_with_ignore_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
-        prediction = ContainerPrediction({
-            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected_prediction = ContainerPrediction({
-            'detection_out': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-            )[0]
-        })
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected_annotation = make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-        )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_regular_annotations_and_container_predictions_by_labels_with_remove_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
-        prediction = ContainerPrediction({
-            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected_prediction = ContainerPrediction({'detection_out': make_representation('0 0 0 10 10', score=1)[0]})
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected_annotation = make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_container_annotations_and_container_predictions_by_labels_with_ignore_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
-        prediction = ContainerPrediction({
-            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected_prediction = ContainerPrediction({
-            'detection_out': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
-            )[0]
-        })
-        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        expected_annotation = make_representation(
-            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-        )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_container_annotations_and_container_predictions_by_labels_with_remove_using_apply_to(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
-        prediction = ContainerPrediction({
-            'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
-        })
-        expected_prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10', score=1)[0]})
-        annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected_annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_container_annotations_and_container_predictions_by_labels_with_ignore_using_sources(self):
-        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
-        prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]})
-        expected_prediction = ContainerPrediction({
-            'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}])[0]
-        })
-        annotation = ContainerAnnotation({
-            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
-        })
-        expected_annotation = ContainerAnnotation({
-            'annotation': make_representation(
-                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
-            )[0]
-        })
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_container_annotations_and_container_predictions_by_labels_with_remove_using_sources(self):
-        config = [{'type': 'filter', 'annotation_source': 'annotation', 'prediction_source': 'prediction',
-                   'labels': [1], 'remove_filtered': True}]
-        prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]})
-        expected_prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10', score=1)[0]})
-        annotation = ContainerAnnotation(
-            {'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]})
-        expected_annotation = ContainerAnnotation(
-            {'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]})
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_filter_annotations_by_min_confidence_do_nothing(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_confidence': 0.5, 'remove_filtered': True}]
-        annotations = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)
-        expected_annotations = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)
-
-        postprocess_data(PostprocessingExecutor(config), annotations, [None])
-
-        assert np.array_equal(annotations, expected_annotations)
-
-    def test_filter_predictions_by_min_confidence_with_ignore(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_confidence': 0.5, 'remove_filtered': False}]
-        predictions = [
-            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.3, 0.8])[0],
-            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.5, 0.4])[0]
-        ]
-        expected_predictions = [
-            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.3, 0.8], meta=[{'difficult_boxes': [0]}])[0],
-            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.5, 0.4], meta=[{'difficult_boxes': [1]}])[0]
-        ]
-
-        executor = PostprocessingExecutor(config)
-        postprocess_data(executor, [None, None], predictions)
-
-        assert np.array_equal(predictions, expected_predictions)
-
-    def test_filter_predictions_by_min_confidence_with_remove(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_confidence': 0.5, 'remove_filtered': True}]
-        predictions = [
-            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.3, 0.8])[0],
-            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.5, 0.4])[0]
-        ]
-        expected_predictions = [
-            make_representation('1 0 0 11 11', score=0.8)[0],
-            make_representation('0 0 0 10 10', score=0.5)[0]
-        ]
-
-        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
-
-        assert np.array_equal(predictions, expected_predictions)
-
-    def test_filter_annotations_by_height_range_with_ignored(self):
-        config = [{
-            'type': 'filter',
-            'apply_to': 'annotation',
-            'height_range': '(10.0, 20.0)',
-            'remove_filtered': False
-        }]
-        annotations = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True)[0]
-        ]
-        expected = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': [1]}])[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True, meta=[{'difficult_boxes': [0, 1]}])[0]
-        ]
-
-        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])
-
-        assert np.array_equal(annotations, expected)
-
-    def test_filter_annotations_by_height_range_with_remove(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'height_range': '(10.0, 20.0)', 'remove_filtered': True}]
-        annotations = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True)[0]
-        ]
-        expected = [
-            make_representation('0 0 5 0 15', is_ground_truth=True)[0],
-            make_representation('', is_ground_truth=True)[0]
-        ]
-
-        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])
-
-        assert np.array_equal(annotations, expected)
-
-    def test_filter_predictions_by_height_range_with_ignored(self):
-        config = [{
-            'type': 'filter',
-            'apply_to': 'prediction',
-            'height_range': '(10.0, 20.0)',
-            'remove_filtered': False
-        }]
-        predictions = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', score=1)[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', score=1)[0]
-        ]
-        expected = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', score=1, meta=[{'difficult_boxes': [1]}])[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', score=1, meta=[{'difficult_boxes': [0, 1]}])[0]
-        ]
-
-        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
-
-        assert np.array_equal(predictions, expected)
-
-    def test_filter_predictions_by_height_range_with_remove(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'height_range': '(10.0, 20.0)', 'remove_filtered': True}]
-        predictions = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', score=1)[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', score=1)[0]
-        ]
-        expected = [
-            make_representation('0 0 5 0 15', score=1)[0],
-            make_representation('', score=1)[0]
-        ]
-
-        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
-
-        assert np.array_equal(predictions, expected)
-
-    def test_filter_predictions_by_unknown_min_visibility_raises_value_error_exception(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_visibility': 'unknown'}]
-        predictions = [
-           make_representation('0 0 5 0 15; 1 0 10 0 15', score=1)[0],
-           make_representation('0 0 5 0 35; 1 0 10 0 40', score=1)[0]
-        ]
-
-        with pytest.raises(ValueError):
-            postprocess_data(PostprocessingExecutor(config), [None], predictions)
-
-    def test_filter_annotations_by_unknown_min_visibility_raises_value_error_exception(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'unknown'}]
-        annotations = [DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0])]
-
-        with pytest.raises(ValueError):
-            postprocess_data(PostprocessingExecutor(config), annotations, [None])
-
-    def test_filter_predictions_by_visibility_raises_value_error_with_unknown_visibility(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_visibility': 'heavy occluded'}]
-        predictions = [DetectionPrediction(
-            y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'visibilities': ['unknown']}
-        )]
-
-        with pytest.raises(ValueError):
-            postprocess_data(PostprocessingExecutor(config), [None], predictions)
-
-    def test_filter_annotations_by_visibility_raises_value_error_with_unknown_visibility(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'heavy occluded'}]
-        annotations = [DetectionAnnotation(
-            y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'visibilities': ['unknown']}
-        )]
-
-        with pytest.raises(ValueError):
-            postprocess_data(PostprocessingExecutor(config), annotations, [None])
-
-    def test_filter_by_visibility_does_nothing_with_annotations_without_visibility(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'heavy occluded'}]
-        annotations = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True)[0]
-        ]
-        expected = [
-            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': []}])[0],
-            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True, meta=[{'difficult_boxes': []}])[0]
-        ]
-
-        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])
-
-        assert np.array_equal(annotations, expected)
-
-    def test_filter_by_visibility_does_nothing_with_predictions_without_visibility(self):
-        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_visibility': 'heavy occluded'}]
-        predictions = [
-            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0]),
-            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[35.0, 50.0])
-        ]
-        expected = [
-            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'difficult_boxes': []}),
-            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[35.0, 50.0], metadata={'difficult_boxes': []})
-        ]
-
-        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
-
-        assert np.array_equal(predictions, expected)
-
-    def test_filter_by_visibility_does_nothing_with_default_visibility_level_and_heavy_occluded(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'heavy occluded'}]
-        annotation = make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0]
-        expected = make_representation(
-            '0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': []}]
-        )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_by_visibility_does_nothing_with_default_visibility_level_and_partially_occluded(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'partially occluded'}]
-        annotation = make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0]
-        expected = make_representation(
-            '0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': []}]
-        )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_filter_by_visibility_filters_partially_occluded_remove_filtered(self):
-        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'partially occluded',
-                   'remove_filtered': True}]
-        annotation = make_representation(
-            '0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True,
-            meta=[{'visibilities': ['heavy occluded', 'partially occluded']}]
-        )[0]
-        expected = make_representation(
-            '1 0 10 0 15', is_ground_truth=True, meta=[{'visibilities': ['heavy occluded', 'partially occluded']}]
-        )[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_nms(self, mocker):
-        mock = mocker.patch('accuracy_checker.postprocessor.nms.NMS.process_all', return_value=([], []))
-        config = [{'type': 'nms', 'overlap': 0.4}]
-        postprocess_data(PostprocessingExecutor(config), [], [])
-        mock.assert_called_once_with([], [])
-
-    def test_resize_prediction_boxes(self):
-        config = [{'type': 'resize_prediction_boxes'}]
-        annotation = DetectionAnnotation(metadata={'image_size': [(100, 100, 3)]})
-        prediction = make_representation('0 0 0 5 5; 1 7 7 8 8', score=1)[0]
-        expected = make_representation('0 0 0 500 500; 1 700 700 800 800', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected
-
-    def test_clip_annotation_denormalized_boxes(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': False}]
-        meta = {'image_size': [(10, 10, 3)]}
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
-        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', is_ground_truth=True, meta=[meta])[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_clip_annotation_normalized_boxes(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': True}]
-        meta = {'image_size': [(10, 10, 3)]}
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
-        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', is_ground_truth=True, meta=[meta])[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_clip_annotation_denormalized_boxes_with_size(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': False, 'size': 10}]
-        meta = {'image_size': [(10, 10, 3)]}
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
-        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', is_ground_truth=True, meta=[meta])[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_clip_annotation_normalized_boxes_with_size_as_normalized(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': True, 'size': 10}]
-        meta = {'image_size': [(10, 10, 3)]}
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
-        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', is_ground_truth=True, meta=[meta])[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
-
-        assert annotation == expected
-
-    def test_clip_prediction_denormalized_boxes(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': False}]
-        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
-        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
-        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected
-
-    def test_clip_prediction_normalized_boxes(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': True}]
-        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
-        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
-        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', score=1)[0]
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected
-
-    def test_clip_predictions_denormalized_boxes_with_size(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': False, 'size': 10}]
-        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
-        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
-        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected
-
-    def test_clip_predictions_normalized_boxes_with_size_as_normalized(self):
-        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': True, 'size': 10}]
-        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
-        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
-        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected
-
-    def test_cast_to_int_default(self):
-        config = [{'type': 'cast_to_int'}]
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
-        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        expected_prediction = make_representation('0 -1 0 6 5; 1 -10 12 11 10', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_cast_to_int_to_nearest(self):
-        config = [{'type': 'cast_to_int', 'round_policy': 'nearest'}]
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
-        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        expected_prediction = make_representation('0 -1 0 6 5; 1 -10 12 11 10', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_cast_to_int_to_nearest_to_zero(self):
-        config = [{'type': 'cast_to_int', 'round_policy': 'nearest_to_zero'}]
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
-        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        expected_prediction = make_representation('0 -1 0 5 5; 1 -9 11 10 10', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_cast_to_int_to_lower(self):
-        config = [{'type': 'cast_to_int', 'round_policy': 'lower'}]
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
-        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        expected_prediction = make_representation('0 -2 0 5 5; 1 -10 11 10 10', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_cast_to_int_to_greater(self):
-        config = [{'type': 'cast_to_int', 'round_policy': 'greater'}]
-        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
-        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
-        expected_prediction = make_representation('0 -1 1 6 6; 1 -9 12 11 11', score=1)[0]
-
-        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
-
-        assert prediction == expected_prediction and annotation == expected_annotation
-
-    def test_cast_to_int_to_unknown_raise_config_error(self):
-        config = [{'type': 'cast_to_int', 'round_policy': 'unknown'}]
-
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_extend_segmentation_mask_with_float_filling_raise_config_error(self):
-        config = [{'type': 'extend_segmentation_mask', 'filling_label':  0.5}]
-
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_extend_segmentation_mask_default(self):
-        config = [{'type': 'extend_segmentation_mask'}]
-        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
-        prediction = make_segmentation_representation(np.zeros((7, 7)), ground_truth=False)
-        expected_annotation_mask = np.zeros((7, 7))
-        expected_annotation_mask[0, :] = 255
-        expected_annotation_mask[:, 0] = 255
-        expected_annotation_mask[-1, :] = 255
-        expected_annotation_mask[:, -1] = 255
-        expected_prediction_mask = np.zeros((7, 7))
-        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
-        assert np.array_equal(prediction[0].mask, expected_prediction_mask)
-        assert np.array_equal(annotation[0].mask, expected_annotation_mask)
-
-    def test_extend_segmentation_mask_do_nothing(self):
-        config = [{'type': 'extend_segmentation_mask'}]
-        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
-        prediction = make_segmentation_representation(np.zeros((5, 5)), ground_truth=False)
-        expected_mask = np.zeros((5, 5))
-        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
-        assert np.array_equal(prediction[0].mask, expected_mask)
-        assert np.array_equal(annotation[0].mask, expected_mask)
-
-    def test_extend_segmentation_mask_asymmetrical(self):
-        config = [{'type': 'extend_segmentation_mask'}]
-        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
-        prediction = make_segmentation_representation(np.zeros((6, 7)), ground_truth=False)
-        expected_annotation_mask = np.zeros((6, 7))
-        expected_annotation_mask[:, 0] = 255
-        expected_annotation_mask[-1, :] = 255
-        expected_annotation_mask[:, -1] = 255
-        expected_prediction_mask = np.zeros((6, 7))
-        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
-        assert np.array_equal(prediction[0].mask, expected_prediction_mask)
-        assert np.array_equal(annotation[0].mask, expected_annotation_mask)
-
-    def test_extend_segmentation_mask_raise_config_error_if_prediction_less_annotation(self):
-        config = [{'type': 'extend_segmentation_mask'}]
-        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
-        prediction = make_segmentation_representation(np.zeros((4, 4)), ground_truth=False)
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), annotation, prediction)
-
-    def test_extend_segmentation_mask_with_filling_label(self):
-        config = [{'type': 'extend_segmentation_mask', 'filling_label': 1}]
-        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
-        prediction = make_segmentation_representation(np.zeros((7, 7)), ground_truth=False)
-        expected_annotation_mask = np.zeros((7, 7))
-        expected_annotation_mask[0, :] = 1
-        expected_annotation_mask[:, 0] = 1
-        expected_annotation_mask[-1, :] = 1
-        expected_annotation_mask[:, -1] = 1
-        expected_prediction_mask = np.zeros((7, 7))
-        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
-        assert np.array_equal(prediction[0].mask, expected_prediction_mask)
-        assert np.array_equal(annotation[0].mask, expected_annotation_mask)
-
-
-class TestPostprocessorExtraArgs:
-    def test_cast_to_int_raise_config_error_on_extra_args(self):
-        config = {'type': 'cast_to_int', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_clip_boxes_raise_config_error_on_extra_args(self):
-        config = {'type': 'clip_boxes', 'size': 1, 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_correct_yolo_v2_boxes_raise_config_error_on_extra_args(self):
-        config = {'type': 'correct_yolo_v2_boxes', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_encode_segmentation_mask_raise_config_error_on_extra_args(self):
-        config = {'type': 'encode_segmentation_mask', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_filter_raise_config_error_on_extra_args(self):
-        config = {'type': 'filter', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_nms_raise_config_error_on_extra_args(self):
-        config = {'type': 'nms', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_normalize_landmarks_points_raise_config_error_on_extra_args(self):
-        config = {'type': 'normalize_landmarks_points', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_resize_prediction_boxes_raise_config_error_on_extra_args(self):
-        config = {'type': 'resize_prediction_boxes', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_resize_segmentation_mask_raise_config_error_on_extra_args(self):
-        config = {'type': 'resize_segmentation_mask', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
-
-    def test_extend_segmentation_mask_raise_config_error_on_extra_args(self):
-        config = {'type': 'resize_segmentation_mask', 'something_extra': 'extra'}
-        with pytest.raises(ConfigError):
-            postprocess_data(PostprocessingExecutor(config), [None], [None])
diff --git a/tools/accuracy_checker/tests/test_preprocessor.py b/tools/accuracy_checker/tests/test_preprocessor.py
deleted file mode 100644 (file)
index afc8d70..0000000
+++ /dev/null
@@ -1,611 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import cv2
-import numpy as np
-import pytest
-
-from accuracy_checker.config import ConfigError
-from accuracy_checker.preprocessor import (
-    Crop,
-    Normalize,
-    Preprocessor,
-    Resize,
-    Flip,
-    BgrToRgb,
-    CropRect,
-    ExtendAroundRect,
-    PointAligner
-)
-from accuracy_checker.preprocessor.preprocessing_executor import PreprocessingExecutor
-from accuracy_checker.preprocessor.preprocessors import OPENCV_INTERPOLATION
-from accuracy_checker.data_readers import DataRepresentation
-
-
-class TestResize:
-    def test_default_resize(self, mocker):
-        cv2_resize_mock = mocker.patch('accuracy_checker.preprocessor.preprocessors.cv2.resize')
-        resize = Preprocessor.provide('resize', {'type': 'resize', 'size': 200})
-
-        input_mock = mocker.Mock()
-        resize(DataRepresentation(input_mock))
-
-        assert not resize.use_pil
-        assert resize.dst_width == 200
-        assert resize.dst_height == 200
-        cv2_resize_mock.assert_called_once_with(
-            input_mock, (200, 200), interpolation=OPENCV_INTERPOLATION['LINEAR']
-        )
-
-    def test_custom_resize(self, mocker):
-        cv2_resize_mock = mocker.patch('accuracy_checker.preprocessor.preprocessors.cv2.resize')
-
-        resize = Preprocessor.provide(
-            'resize', {'type': 'resize', 'dst_width': 126, 'dst_height': 128, 'interpolation': 'CUBIC'}
-        )
-
-        input_mock = mocker.Mock()
-        resize(DataRepresentation(input_mock))
-
-        assert not resize.use_pil
-        assert resize.dst_width == 126
-        assert resize.dst_height == 128
-        cv2_resize_mock.assert_called_once_with(
-            input_mock, (126, 128),
-            interpolation=OPENCV_INTERPOLATION['CUBIC']
-        )
-
-    def test_resize_without_save_aspect_ratio(self):
-        name = 'mock_preprocessor'
-        config = {'type': 'resize', 'dst_width': 150, 'dst_height': 150}
-        input_image = np.ones((100, 50, 3))
-        resize = Preprocessor.provide('resize', config, name)
-
-        result = resize(DataRepresentation(input_image)).data
-
-        assert result.shape == (150, 150, 3)
-
-    def test_resize_save_aspect_ratio_unknown_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide(
-                'resize', {'type': 'resize', 'dst_width': 100, 'dst_height': 150, 'aspect_ratio_scale': 'unknown'}
-            )
-
-    def test_resize_save_aspect_ratio_height(self):
-        input_image = np.ones((100, 50, 3))
-        resize = Preprocessor.provide('resize', {
-            'type': 'resize', 'dst_width': 100, 'dst_height': 150,
-            'interpolation': 'CUBIC', 'aspect_ratio_scale': 'height'
-        })
-        result = resize(DataRepresentation(input_image)).data
-
-        assert result.shape == (300, 100, 3)
-
-    def test_resize_save_aspect_ratio_width(self):
-        input_image = np.ones((100, 50, 3))
-        resize = Preprocessor.provide('resize', {
-            'type': 'resize', 'dst_width': 150, 'dst_height': 150, 'aspect_ratio_scale': 'width'
-        })
-        result = resize(DataRepresentation(input_image)).data
-
-        assert result.shape == (150, 75, 3)
-
-    def test_resize_save_aspect_ratio_for_greater_dim(self):
-        input_image = np.ones((100, 50, 3))
-        resize = Preprocessor.provide('resize', {
-            'type': 'resize',
-            'dst_width': 100,
-            'dst_height': 150,
-            'aspect_ratio_scale': 'greater'
-        })
-        result = resize(DataRepresentation(input_image)).data
-
-        assert result.shape == (300, 100, 3)
-
-    def test_resize_to_negative_size_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('resize', {'type': 'resize', 'size': -100})
-
-    def test_resize_to_negative_destination_width_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('resize', {'type': 'resize', 'dst_width': -100, 'dst_height': 100})
-
-    def test_resize_to_negative_destination_height_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('resize', {'type': 'resize', 'dst_width': 100, 'dst_height': -100})
-
-    def test_resize_with_both_provided_size_and_dst_height_dst_width_warn(self):
-        input_image = np.ones((100, 50, 3))
-
-        with pytest.warns(None) as warnings:
-            resize = Preprocessor.provide(
-                'resize', {'type': 'resize', 'dst_width': 100, 'dst_height': 100, 'size': 200}
-            )
-            assert len(warnings) == 1
-            result = resize(DataRepresentation(input_image)).data
-            assert result.shape == (200, 200, 3)
-
-    def test_resize_provided_only_dst_height_raise_config_error(self):
-        with pytest.raises(ValueError):
-            Preprocessor.provide('resize', {'type': 'resize', 'dst_height': 100})
-
-    def test_resize_provided_only_dst_width_raise_config_error(self):
-        with pytest.raises(ValueError):
-            Preprocessor.provide('resize', {'type': 'resize', 'dst_width': 100})
-
-
-class TestNormalization:
-    def test_normalization_without_mean_and_std_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('normalization', {'type': 'normalization'})
-
-    def test_custom_normalization_with_mean(self):
-        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '(1, 2, 3)'})
-        source = np.full_like((3, 300, 300), 100)
-        input_ref = source.copy() - (1, 2, 3)
-        result = normalization(DataRepresentation(source))
-
-        assert normalization.mean == (1, 2, 3)
-        assert normalization.std is None
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_custom_normalization_with_precomputed_mean(self):
-        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': 'cifar10'})
-
-        source = np.full_like((3, 300, 300), 100)
-        input_ref = source.copy() - normalization.PRECOMPUTED_MEANS['cifar10']
-        result = normalization(DataRepresentation(source))
-
-        assert normalization.mean == normalization.PRECOMPUTED_MEANS['cifar10']
-        assert normalization.std is None
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_custom_normalization_with_mean_as_scalar(self):
-        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '1'})
-
-        source = np.full_like((3, 300, 300), 100)
-        input_ref = source.copy() - 1
-        result = normalization(DataRepresentation(source))
-
-        assert normalization.mean == (1.0, )
-        assert normalization.std is None
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_custom_normalization_with_std(self):
-        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'std': '(1, 2, 3)'})
-
-        source = np.full_like((3, 300, 300), 100)
-        input_ref = source.copy() / (1, 2, 3)
-        result = normalization(DataRepresentation(source))
-
-        assert normalization.mean is None
-        assert normalization.std == (1, 2, 3)
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_custom_normalization_with_precomputed_std(self):
-        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'std': 'cifar10'})
-
-        source = np.full_like((3, 300, 300), 100)
-        input_ref = source.copy() / normalization.PRECOMPUTED_STDS['cifar10']
-        result = normalization(DataRepresentation(source))
-
-        assert normalization.mean is None
-        assert normalization.std == normalization.PRECOMPUTED_STDS['cifar10']
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_custom_normalization_with_std_as_scalar(self):
-        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'std': '2'})
-        source = np.full_like((3, 300, 300), 100)
-        input_ref = source.copy() / 2
-        result = normalization(DataRepresentation(source))
-
-        assert normalization.mean is None
-        assert normalization.std == (2.0, )
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_custom_normalization_with_mean_and_std(self):
-        normalization = Preprocessor.provide(
-            'normalization', {'type': 'normalization', 'mean': '(1, 2, 3)', 'std': '(4, 5, 6)'}
-        )
-
-        input_ = np.full_like((3, 300, 300), 100)
-        input_ref = (input_ - (1, 2, 3)) / (4, 5, 6)
-        result = normalization(DataRepresentation(input_))
-
-        assert normalization.mean == (1, 2, 3)
-        assert normalization.std == (4, 5, 6)
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_custom_normalization_with_mean_and_std_as_scalars(self):
-        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '2', 'std': '5'})
-
-        input_ = np.full_like((3, 300, 300), 100)
-        input_ref = (input_ - (2, )) / (5, )
-        result = normalization(DataRepresentation(input_))
-
-        assert normalization.mean == (2, )
-        assert normalization.std == (5, )
-        assert np.all(input_ref == result.data)
-        assert result.metadata == {'image_size': (3,)}
-
-    def test_normalization_with_zero_in_std_values_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('normalization', {'type': 'normalization', 'std': '(4, 0, 6)'})
-
-    def test_normalization_with_zero_as_std_value_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('normalization', {'type': 'normalization', 'std': '0'})
-
-    def test_normalization_with_not_channel_wise_mean_list_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '3, 2'})
-
-    def test_normalization_with_not_channel_wise_std_list_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('normalization', {'type': 'normalization', 'std': '3, 2'})
-
-    def test_normalization_with_unknown_precomputed_mean_raise_config_error(self):
-        with pytest.raises(ValueError):
-            Preprocessor.provide('normalization', {'type': 'normalization', 'mean': 'unknown'})
-
-    def test_normalization_with_unknown_precomputed_std_raise_config_error(self):
-        with pytest.raises(ValueError):
-            Preprocessor.provide('normalization', {'type': 'normalization', 'std': 'unknown'})
-
-
-class TestPreprocessingEvaluator:
-    def test_preprocessing_evaluator(self):
-        config = [{'type': 'normalization', 'mean': '(1, 2, 3)'}, {'type': 'resize', 'size': 200}]
-        preprocessor = PreprocessingExecutor(config)
-
-        assert 2 == len(preprocessor.processors)
-        assert isinstance(preprocessor.processors[0], Normalize)
-        assert isinstance(preprocessor.processors[1], Resize)
-        assert preprocessor.processors[0].mean == (1, 2, 3)
-        assert preprocessor.processors[1].dst_width == 200
-
-
-class TestCrop:
-    def test_crop_higher(self):
-        crop = Crop({'dst_width': 50, 'dst_height': 33, 'type': 'crop'})
-        image = np.zeros((100, 100, 3))
-        image_rep = crop(DataRepresentation(image))
-
-        assert image_rep.data.shape == (33, 50, 3)
-        assert image_rep.metadata == {'image_size': (100, 100, 3)}
-
-    def test_crop_to_size(self):
-        crop = Crop({'size': 50, 'type': 'crop'})
-        image = np.zeros((100, 100, 3))
-        image_rep = crop(DataRepresentation(image))
-
-        assert image_rep.data.shape == (50, 50, 3)
-        assert image_rep.metadata == {'image_size': (100, 100, 3)}
-
-    def test_crop_higher_non_symmetric(self):
-        crop = Crop({'dst_width': 50, 'dst_height': 12, 'type': 'crop'})
-        image = np.zeros((70, 50, 3))
-        image_rep = crop(DataRepresentation(image))
-
-        assert image_rep.data.shape == (12, 50, 3)
-        assert image_rep.metadata == {'image_size': (70, 50, 3)}
-
-    def test_crop_less(self):
-        crop = Crop({'dst_width': 151, 'dst_height': 42, 'type': 'crop'})
-        image = np.zeros((30, 30, 3))
-        image_rep = crop(DataRepresentation(image))
-
-        assert image_rep.data.shape == (42, 151, 3)
-        assert image_rep.metadata == {'image_size': (30, 30, 3)}
-
-    def test_crop_less_non_symmetric(self):
-        crop = Crop({'dst_width': 42, 'dst_height': 151, 'type': 'crop'})
-        image = np.zeros((30, 40, 3))
-        image_rep = crop(DataRepresentation(image))
-
-        assert image_rep.data.shape == (151, 42, 3)
-        assert image_rep.metadata == {'image_size': (30, 40, 3)}
-
-    def test_crop_to_negative_size_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Crop({'size': -151, 'type': 'crop'})
-
-    def test_crop_to_negative_destination_width_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Crop({'dst_width': -100, 'dst_height': 100, 'type': 'crop'})
-
-    def test_crop_to_negative_destination_height_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            Crop({'dst_width': 100, 'dst_height': -100, 'type': 'crop'})
-
-    def test_crop_with_both_provided_size_and_dst_height_dst_width_warn(self):
-        image = np.zeros((30, 40, 3))
-        with pytest.warns(None) as warnings:
-            crop = Crop({'dst_width': 100, 'dst_height': 100, 'size': 200, 'type': 'crop'})
-            assert len(warnings) == 1
-            result = crop.process(DataRepresentation(image))
-            assert result.data.shape == (200, 200, 3)
-            assert result.metadata == {'image_size': (30, 40, 3)}
-
-
-class TestFlip:
-    def test_horizontal_flip(self):
-        image = np.random.randint(0, 255, (30, 40, 3))
-        expected_image = cv2.flip(image, 0)
-        flip = Flip({'type': 'flip', 'mode': 'horizontal'})
-        assert np.array_equal(expected_image, flip.process(DataRepresentation(image)).data)
-
-    def test_vertical_flip(self):
-        image = np.random.randint(0, 255, (30, 40, 3))
-        expected_image = cv2.flip(image, 1)
-        flip = Flip({'type': 'flip', 'mode': 'vertical'})
-        assert np.array_equal(expected_image, flip.process(DataRepresentation(image)).data)
-
-    def test_flip_raise_config_error_if_mode_not_provided(self):
-        with pytest.raises(ConfigError):
-            Flip({'type': 'flip'})
-
-    def test_flip_raise_config_error_if_mode_unknown(self):
-        with pytest.raises(ConfigError):
-            Flip({'type': 'flip', 'mode': 'unknown'})
-
-
-class TestBGRtoRGB:
-    def test_bgr_to_rgb(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        bgr_to_rgb = BgrToRgb({'type': 'bgr_to_rgb'})
-        assert np.array_equal(expected_image, bgr_to_rgb.process(DataRepresentation(image)).data)
-
-
-class TestCropRect:
-    def test_crop_rect_if_rect_not_provided(self):
-        image = np.zeros((30, 40, 3))
-        crop_rect = CropRect({'type': 'crop_rect'})
-        assert np.array_equal(image, crop_rect(image, {}))
-
-    def test_crop_rect_if_rect_equal_image(self):
-        image = np.zeros((30, 40, 3))
-        crop_rect = CropRect({'type': 'crop_rect'})
-        assert np.array_equal(image, crop_rect(DataRepresentation(image), {'rect': [0, 0, 40, 30]}).data)
-
-    def test_crop_rect(self):
-        image = np.zeros((30, 40, 3))
-        image[:, 20:, :] = 1
-        expected_image = np.ones((30, 20, 3))
-        crop_rect = CropRect({'type': 'crop_rect'})
-        assert np.array_equal(expected_image, crop_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data)
-
-    def test_crop_rect_negative_coordinates_of_rect(self):
-        image = np.zeros((30, 40, 3))
-        image[:, 20:, :] = 1
-        expected_image = image
-        crop_rect = CropRect({'type': 'crop_rect'})
-        assert np.array_equal(expected_image, crop_rect(DataRepresentation(image), {'rect': [-20, 0, 40, 30]}).data)
-
-    def test_crop_rect_more_image_size_coordinates_of_rect(self):
-        image = np.zeros((30, 40, 3))
-        image[:, 20:, :] = 1
-        expected_image = np.ones((30, 20, 3))
-        crop_rect = CropRect({'type': 'crop_rect'})
-        assert np.array_equal(expected_image, crop_rect(DataRepresentation(image), {'rect': [20, 0, 40, 50]}).data)
-
-
-class TestExtendAroundRect:
-    def test_default_extend_around_rect_without_rect(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = image
-        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect'})
-        assert np.array_equal(expected_image, extend_image_around_rect(DataRepresentation(image), {}).data)
-
-    def test_default_extend_around_rect(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = image
-        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect'})
-        assert np.array_equal(
-            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data
-        )
-
-    def test_extend_around_rect_with_positive_augmentation(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(0), int(11), cv2.BORDER_REPLICATE)
-        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
-        assert np.array_equal(
-            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data
-        )
-
-    def test_extend_around_rect_with_negative_augmentation(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = image
-        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': -0.5})
-        assert np.array_equal(
-            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data
-        )
-
-    def test_extend_around_rect_with_rect_equal_image(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(20.5), int(41), cv2.BORDER_REPLICATE)
-        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
-        assert np.array_equal(
-            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [0, 0, 40, 30]}).data
-        )
-
-    def test_extend_around_rect_negative_coordinates_of_rect(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(20.5), int(41), cv2.BORDER_REPLICATE)
-        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
-        assert np.array_equal(
-            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [-20, 0, 40, 30]}).data
-        )
-
-    def test_extend_around_rect_more_image_size_coordinates_of_rect(self):
-        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
-        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(0), int(11), cv2.BORDER_REPLICATE)
-        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
-        assert np.array_equal(
-            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 50]}).data
-        )
-
-
-class TestPointAlignment:
-    def test_point_alignment_width_negative_size_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            PointAligner({'type': 'point_alignment', 'size': -100})
-
-    def test_point_alignment_negative_destination_width_raise_config_error(self):
-        with pytest.raises(ConfigError):
-            PointAligner({'type': 'point_alignment', 'dst_width': -100, 'dst_height': 100})
-
-    def test_point_alignment_to_negative_destination_height_raise_config_error(self):
-        with pytest.raises(ValueError):
-            PointAligner({'type': 'point_alignment', 'dst_width': 100, 'dst_height': -100})
-
-    def test_point_alignment_provided_only_dst_height_raise_config_error(self):
-        with pytest.raises(ValueError):
-            PointAligner({'type': 'point_alignment', 'dst_height': 100})
-
-    def test_point_alignment_provided_only_dst_width_raise_config_error(self):
-        with pytest.raises(ValueError):
-            PointAligner({'type': 'point_alignment', 'dst_width': 100})
-
-    def test_point_alignment_both_provided_size_and_dst_height_dst_width_warn(self):
-        input_image = np.ones((100, 50, 3))
-
-        with pytest.warns(None) as warnings:
-            point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 100, 'dst_height': 100, 'size': 200})
-            assert len(warnings) == 1
-            result = point_aligner(DataRepresentation(input_image), {}).data
-            assert result.shape == (100, 50, 3)
-
-    def test_point_alignment_not_provided_points_im_meta(self):
-        input_image = np.ones((100, 50, 3))
-
-        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 100, 'dst_height': 100})
-        result = point_aligner(DataRepresentation(input_image), {}).data
-        assert result.shape == (100, 50, 3)
-
-    def test_point_alignment_default_use_normalization(self):
-        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
-
-        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40})
-        result = point_aligner(
-            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
-        ).data
-        transformation_matrix = point_aligner.transformation_from_points(
-            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks
-        )
-        expected_result = cv2.warpAffine(image, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
-
-        assert np.array_equal(result, expected_result)
-
-    def test_point_alignment_use_normalization(self):
-        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
-
-        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40, 'normalize': True})
-        result = point_aligner(
-            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
-        ).data
-        transformation_matrix = point_aligner.transformation_from_points(
-            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks
-        )
-        expected_result = cv2.warpAffine(image, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
-
-        assert np.array_equal(result, expected_result)
-
-    def test_point_alignment_without_normalization(self):
-        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
-
-        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40, 'normalize': False})
-        result = point_aligner(
-            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
-        ).data
-        transformation_matrix = point_aligner.transformation_from_points(
-            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks * 40
-        )
-        expected_result = cv2.warpAffine(image, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
-
-        assert np.array_equal(result, expected_result)
-
-    def test_point_alignment_with_drawing_points(self):
-        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
-
-        point_aligner = PointAligner({
-            'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40, 'draw_points': True
-        })
-        result = point_aligner(
-            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
-        ).data
-        transformation_matrix = point_aligner.transformation_from_points(
-            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks
-        )
-        expected_result = image
-        for point in PointAligner.ref_landmarks:
-            cv2.circle(expected_result, (int(point[0]), int(point[1])), 5, (255, 0, 0), -1)
-        expected_result = cv2.warpAffine(expected_result, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
-
-        assert np.array_equal(result, expected_result)
-
-    def test_point_alignment_with_resizing(self):
-        image = np.random.randint(0, 255, (80, 80, 3)).astype(np.uint8)
-
-        point_aligner = PointAligner({'type': 'point_alignment', 'size': 40})
-        result = point_aligner(
-            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
-        ).data
-        transformation_matrix = point_aligner.transformation_from_points(
-            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks * 0.5
-        )
-        expected_result = cv2.resize(image, (40, 40))
-        expected_result = cv2.warpAffine(expected_result, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
-
-        assert np.array_equal(result, expected_result)
-
-
-class TestPreprocessorExtraArgs:
-    def test_resize_raise_config_error_on_extra_args(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('resize', {'type': 'resize', 'size': 1, 'something_extra': 'extra'})
-
-    def test_normalization_raise_config_error_on_extra_args(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('normalization', {'type': 'normalization', 'mean': 0, 'something_extra': 'extra'})
-
-    def test_bgr_to_rgb_raise_config_error_on_extra_args(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('bgr_to_rgb',  {'type': 'bgr_to_rgb', 'something_extra': 'extra'})
-
-    def test_flip_raise_config_error_on_extra_args(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('flip', {'type': 'flip', 'something_extra': 'extra'})
-
-    def test_crop_accuracy_raise_config_error_on_extra_args(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('crop', {'type': 'crop', 'size': 1, 'something_extra': 'extra'})
-
-    def test_extend_around_rect_raise_config_error_on_extra_args(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('extend_around_rect', {'type': 'extend_around_rect', 'something_extra': 'extra'})
-
-    def test_point_alignment_raise_config_error_on_extra_args(self):
-        with pytest.raises(ConfigError):
-            Preprocessor.provide('point_alignment', {'type': 'point_alignment', 'something_extra': 'extra'})
diff --git a/tools/accuracy_checker/tests/test_presenter.py b/tools/accuracy_checker/tests/test_presenter.py
deleted file mode 100644 (file)
index 4d2b5d4..0000000
+++ /dev/null
@@ -1,552 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-import pytest
-from unittest.mock import MagicMock, call
-from accuracy_checker.metrics import MetricsExecutor
-from accuracy_checker.presenters import ScalarPrintPresenter, VectorPrintPresenter, EvaluationResult
-from accuracy_checker.representation import ClassificationAnnotation, ClassificationPrediction
-
-
-class TestPresenter:
-    def test_config_default_presenter(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-        config = [{'type': 'accuracy', 'top_k': 1}]
-        dispatcher = MetricsExecutor(config, None)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for presenter, _ in dispatcher.iterate_metrics(annotations, predictions):
-            assert isinstance(presenter, ScalarPrintPresenter)
-
-    def test_config_scalar_presenter(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-        config = [{'type': 'accuracy', 'top_k': 1, 'presenter': 'print_scalar'}]
-        dispatcher = MetricsExecutor(config, None)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for presenter, _ in dispatcher.iterate_metrics(annotations, predictions):
-            assert isinstance(presenter, ScalarPrintPresenter)
-
-    def test_config_vector_presenter(self):
-        annotations = [ClassificationAnnotation('identifier', 3)]
-        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
-        config = [{'type': 'accuracy', 'top_k': 1, 'presenter': 'print_vector'}]
-        dispatcher = MetricsExecutor(config, None)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for presenter, _ in dispatcher.iterate_metrics(annotations, predictions):
-            assert isinstance(presenter, VectorPrintPresenter)
-
-    def test_config_unknown_presenter(self):
-        config = [{'type': 'accuracy', 'top_k': 1, 'presenter': 'print_somehow'}]
-        with pytest.raises(ValueError):
-            MetricsExecutor(config, None)
-
-    def test_scalar_presenter_with_scalar_data(self, mocker):
-        mock_write_scalar_result = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=0.1,
-            reference_value=None,
-            threshold=None,
-            meta={},
-        )
-        presenter = ScalarPrintPresenter()
-        presenter.write_result(result)
-        mock_write_scalar_result.assert_called_once_with(
-            result.evaluated_value,
-            result.name,
-            result.threshold,
-            None,
-            postfix='%',
-            scale=100,
-            result_format='{:.2f}'
-        )
-
-    def test_scalar_presenter_with_vector_data(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=None,
-            threshold=None,
-            meta={},
-        )
-        presenter = ScalarPrintPresenter()
-        presenter.write_result(result)
-        mock_write_scalar_res.assert_called_once_with(
-            np.mean(result.evaluated_value),
-            result.name,
-            result.threshold,
-            None,
-            postfix='%',
-            scale=100,
-            result_format='{:.2f}'
-        )
-
-    def test_default_format_for_scalar_presenter_with_ignore_formatting(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.456],
-            reference_value=None,
-            threshold=None,
-            meta={},
-        )
-        presenter = ScalarPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        mock_write_scalar_res.assert_called_once_with(
-            np.mean(result.evaluated_value),
-            result.name,
-            result.threshold,
-            None,
-            postfix=' ',
-            scale=1,
-            result_format='{}'
-        )
-
-    def test_reference_value_for_scalar_presenter(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.456],
-            reference_value=45.6,
-            threshold=None,
-            meta={},
-        )
-        presenter = ScalarPrintPresenter()
-        presenter.write_result(result)
-        mock_write_scalar_res.assert_called_once_with(
-            np.mean(result.evaluated_value),
-            result.name,
-            result.threshold,
-            0.0,
-            postfix='%',
-            scale=100,
-            result_format='{:.2f}'
-        )
-
-    def test_reference_value_for_scalar_presenter_with_ignore_results_formatting(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.456],
-            reference_value=45.6,
-            threshold=None,
-            meta={},
-        )
-        presenter = ScalarPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        mock_write_scalar_res.assert_called_once_with(
-            np.mean(result.evaluated_value),
-            result.name,
-            result.threshold,
-            0.0,
-            postfix=' ',
-            scale=1,
-            result_format='{}'
-        )
-
-    def test_specific_format_for_scalar_presenter_with_ignore_formatting(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.456],
-            reference_value=None,
-            threshold=None,
-            meta={'scale': 0.5, 'postfix': 'km/h', 'data_format': '{:.4f}'},
-        )
-        presenter = ScalarPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        mock_write_scalar_res.assert_called_once_with(
-            np.mean(result.evaluated_value),
-            result.name,
-            result.reference_value,
-            result.threshold,
-            postfix=' ',
-            scale=1,
-            result_format='{}'
-        )
-
-    def test_vector_presenter_with_scaler_data(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=0.4,
-            reference_value=None,
-            threshold=None,
-            meta={},
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        mock_write_scalar_res.assert_called_once_with(
-            result.evaluated_value,
-            result.name,
-            None,
-            result.threshold,
-            postfix='%',
-            scale=100,
-            value_name=None,
-            result_format='{:.2f}'
-        )
-
-    def test_vector_presenter_with_scaler_data_compare_with_reference(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=0.4,
-            reference_value=42,
-            threshold=None,
-            meta={},
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        mock_write_scalar_res.assert_called_once_with(
-            result.evaluated_value,
-            result.name,
-            result.threshold,
-            2,
-            postfix='%',
-            scale=100,
-            value_name=None,
-            result_format='{:.2f}'
-        )
-
-    def test_vector_presenter_with_scaler_data_compare_with_reference_ignore_formatting(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=0.4,
-            reference_value=42,
-            threshold=None,
-            meta={},
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        mock_write_scalar_res.assert_called_once_with(
-            result.evaluated_value,
-            result.name,
-            result.threshold,
-            2,
-            postfix=' ',
-            scale=1,
-            value_name=None,
-            result_format='{}'
-        )
-
-    def test_vector_presenter_with_vector_data_contain_one_element(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=[0.4],
-            reference_value=None,
-            threshold=None,
-            meta={'names': ['prediction']}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        mock_write_scalar_res.assert_called_once_with(
-            result.evaluated_value[0],
-            result.name,
-            None,
-            result.threshold,
-            postfix='%',
-            scale=100,
-            value_name=result.meta['names'][0],
-            result_format='{:.2f}'
-        )
-
-    def test_vector_presenter_with_vector_data_contain_one_element_compare_with_reference(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=[0.4],
-            reference_value=42,
-            threshold=None,
-            meta={},
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        mock_write_scalar_res.assert_called_once_with(
-            result.evaluated_value[0],
-            result.name,
-            result.threshold,
-            2,
-            postfix='%',
-            scale=100,
-            value_name=None,
-            result_format='{:.2f}'
-        )
-
-    def test_vector_presenter__with_vector_data_contain_one_element_compare_with_reference_ignore_formatting(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.4],
-            reference_value=42,
-            threshold=None,
-            meta={},
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        mock_write_scalar_res.assert_called_once_with(
-            result.evaluated_value[0],
-            result.name,
-            result.threshold,
-            2,
-            postfix=' ',
-            scale=1,
-            value_name=None,
-            result_format='{}'
-        )
-
-    def test_vector_presenter_with_vector_data_with_default_postfix_and_scale(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=None,
-            threshold=None,
-            meta={'names': ['class1', 'class2']}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        calls = [
-            call(
-                result.evaluated_value[0], result.name,
-                postfix='%', scale=100, value_name=result.meta['names'][0], result_format='{:.2f}'
-            ),
-            call(
-                result.evaluated_value[1], result.name,
-                postfix='%', scale=100, value_name=result.meta['names'][1],  result_format='{:.2f}'
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, 100)), result.name, result.threshold,
-                None, value_name='mean', postfix='%', scale=1, result_format='{:.2f}'
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
-
-    def test_vector_presenter_with_vector_data_has_default_format_with_ignore_formatting(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=None,
-            threshold=None,
-            meta={'names': ['class1', 'class2']}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        calls = [
-            call(
-                result.evaluated_value[0], result.name,
-                postfix=' ', scale=1, value_name=result.meta['names'][0], result_format='{}'
-            ),
-            call(
-                result.evaluated_value[1], result.name,
-                postfix=' ', scale=1, value_name=result.meta['names'][1], result_format='{}'
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, 1)), result.name, result.threshold, None,
-                value_name='mean', postfix=' ', scale=1, result_format='{}'
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
-
-    def test_vector_presenter_with_vector_data_with_default_formating_compare_with_ref(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=49,
-            threshold=None,
-            meta={'names': ['class1', 'class2']}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        calls = [
-            call(
-                result.evaluated_value[0], result.name,
-                postfix='%', scale=100, value_name=result.meta['names'][0], result_format='{:.2f}'
-            ),
-            call(
-                result.evaluated_value[1], result.name,
-                postfix='%', scale=100, value_name=result.meta['names'][1],  result_format='{:.2f}'
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, 100)), result.name, result.threshold,
-                1, value_name='mean', postfix='%', scale=1, result_format='{:.2f}'
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
-
-    def test_vector_presenter_with_vector_data_has_default_format_with_ignore_formatting_compare_with_ref(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='vector_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=49,
-            threshold=None,
-            meta={'names': ['class1', 'class2']}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        calls = [
-            call(
-                result.evaluated_value[0], result.name,
-                postfix=' ', scale=1, value_name=result.meta['names'][0], result_format='{}'
-            ),
-            call(
-                result.evaluated_value[1], result.name,
-                postfix=' ', scale=1, value_name=result.meta['names'][1], result_format='{}'
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, 1)), result.name,  result.threshold, 1,
-                value_name='mean', postfix=' ', scale=1, result_format='{}'
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
-
-    def test_vector_presenter_with_vector_data_has_specific_format_with_ignore_formatting(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=None,
-            threshold=None,
-            meta={'names': ['class1', 'class2'], 'scale': 0.5, 'postfix': 'km/h', 'data_format': '{:.4f}'}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result, ignore_results_formatting=True)
-        calls = [
-            call(
-                result.evaluated_value[0], result.name,
-                postfix=' ', scale=1, value_name=result.meta['names'][0], result_format='{}'
-            ),
-            call(
-                result.evaluated_value[1], result.name,
-                postfix=' ', scale=1, value_name=result.meta['names'][1], result_format='{}'
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, 1)), result.name, result.reference_value, result.threshold,
-                value_name='mean', postfix=' ', scale=1, result_format='{}'
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
-
-    def test_vector_presenter_with_vector_data_with_scalar_postfix(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=None,
-            threshold=None,
-            meta={'names': ['class1', 'class2'], 'postfix': '_'}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        calls = [
-            call(result.evaluated_value[0], result.name,
-                 postfix=result.meta['postfix'], scale=100, value_name=result.meta['names'][0], result_format='{:.2f}'
-                 ),
-            call(
-                result.evaluated_value[1], result.name,
-                postfix=result.meta['postfix'], scale=100, value_name=result.meta['names'][1], result_format='{:.2f}'
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, 100)), result.name,
-                result.threshold, None, value_name='mean', postfix=result.meta['postfix'], scale=1,  result_format='{:.2f}'
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
-
-    def test_vector_presenter_with_vector_data_with_scalar_scale(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=None,
-            threshold=None,
-            meta={'names': ['class1', 'class2'], 'scale': 10}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        calls = [
-            call(
-                result.evaluated_value[0], result.name,
-                postfix='%', scale=result.meta['scale'], value_name=result.meta['names'][0], result_format='{:.2f}'
-            ),
-            call(
-                result.evaluated_value[1], result.name,
-                postfix='%', scale=result.meta['scale'], value_name=result.meta['names'][1], result_format='{:.2f}'
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, result.meta['scale'])), result.name, None, result.threshold,
-                value_name='mean', postfix='%', scale=1, result_format='{:.2f}'
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
-
-    def test_vector_presenter_with_vector_data_with_vector_scale(self, mocker):
-        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
-        result = EvaluationResult(
-            name='scalar_metric',
-            metric_type='metric',
-            evaluated_value=[0.4, 0.6],
-            reference_value=None,
-            threshold=None,
-            meta={'names': ['class1', 'class2'], 'scale': [1, 2]}
-        )
-        presenter = VectorPrintPresenter()
-        presenter.write_result(result)
-        calls = [
-            call(
-                result.evaluated_value[0], result.name,
-                postfix='%', scale=result.meta['scale'][0], result_format='{:.2f}', value_name=result.meta['names'][0]
-            ),
-            call(
-                result.evaluated_value[1], result.name, postfix='%',
-                scale=result.meta['scale'][1], result_format='{:.2f}', value_name=result.meta['names'][1]
-            ),
-            call(
-                np.mean(np.multiply(result.evaluated_value, result.meta['scale'])), result.name, result.threshold,
-                None, result_format='{:.2f}', value_name='mean', postfix='%', scale=1
-            )
-        ]
-        mock_write_scalar_res.assert_has_calls(calls)
diff --git a/tools/accuracy_checker/tests/test_regression_metrics.py b/tools/accuracy_checker/tests/test_regression_metrics.py
deleted file mode 100644 (file)
index 5e47804..0000000
+++ /dev/null
@@ -1,342 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import pytest
-from accuracy_checker.metrics import MetricsExecutor
-from accuracy_checker.representation import RegressionPrediction, RegressionAnnotation
-from accuracy_checker.presenters import EvaluationResult
-
-
-class TestRegressionMetric:
-    def setup_method(self):
-        self.module = 'accuracy_checker.metrics.metric_evaluator'
-
-    def test_mae_with_zero_diff_between_annotation_and_prediction(self):
-        annotations = [RegressionAnnotation('identifier', 3)]
-        predictions = [RegressionPrediction('identifier', 3)]
-        config = [{'type': 'mae'}]
-        expected = EvaluationResult(
-            pytest.approx([0.0, 0.0]),
-            None,
-            'mae',
-            'mae',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mae_with_negative_diff_between_annotation_and_prediction(self):
-        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
-        predictions = [RegressionPrediction('identifier', 5), RegressionPrediction('identifier2', 5)]
-        config = [{'type': 'mae'}]
-        expected = EvaluationResult(
-            pytest.approx([3.0, 1.0]),
-            None,
-            'mae',
-            'mae',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mae_with_positive_diff_between_annotation_and_prediction(self):
-        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
-        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier2', -3)]
-        config = [{'type': 'mae'}]
-        expected = EvaluationResult(
-            pytest.approx([3.0, 1.0]),
-            None,
-            'mae',
-            'mae',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mse_with_zero_diff_between_annotation_and_prediction(self):
-        annotations = [RegressionAnnotation('identifier', 3)]
-        predictions = [RegressionPrediction('identifier', 3)]
-        config = [{'type': 'mse'}]
-        expected = EvaluationResult(
-            pytest.approx([0.0, 0.0]),
-            None,
-            'mse',
-            'mse',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mse_with_negative_diff_between_annotation_and_prediction(self):
-        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
-        predictions = [RegressionPrediction('identifier', 5), RegressionPrediction('identifier2', 5)]
-        config = [{'type': 'mse'}]
-        expected = EvaluationResult(
-            pytest.approx([10.0, 6.0]),
-            None,
-            'mse',
-            'mse',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mse_with_positive_diff_between_annotation_and_prediction(self):
-        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
-        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier2', -3)]
-        config = [{'type': 'mse'}]
-        expected = EvaluationResult(
-            pytest.approx([10.0, 6.0]),
-            None,
-            'mse',
-            'mse',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_missed_interval(self):
-        config = [{'type': 'mae_on_interval'}]
-        with pytest.raises(ValueError):
-            MetricsExecutor(config, None)
-
-    def test_mae_on_interval_default_all_missed(self):
-        annotations = [RegressionAnnotation('identifier', -2)]
-        predictions = [RegressionPrediction('identifier', 1)]
-        config = [{'type': 'mae_on_interval', 'end': 1}]
-        expected = EvaluationResult(
-            pytest.approx([0.0]),
-            None,
-            'mae_on_interval',
-            'mae_on_interval',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': [], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        with pytest.warns(UserWarning) as warnings:
-            for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-                assert len(warnings) == 1
-                assert evaluation_result == expected
-
-    def test_mae_on_interval_default_all_not_in_range_not_ignore_out_of_range(self):
-        annotations = [RegressionAnnotation('identifier', -1), RegressionAnnotation('identifier', 2)]
-        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier', 2)]
-        expected = EvaluationResult(
-            pytest.approx([2.0, 0.0, 0.0, 0.0]),
-            None,
-            'mae_on_interval',
-            'mae_on_interval',
-            None,
-            {
-                'postfix': ' ',
-                'scale': 1,
-                'names': ['mean: < 0.0', 'std: < 0.0', 'mean: > 1.0', 'std: > 1.0'],
-                'calculate_mean': False
-            }
-        )
-        config = [{'type': 'mae_on_interval', 'end': 1, 'ignore_values_not_in_interval': False}]
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mae_on_interval_values_in_range(self):
-        annotations = [RegressionAnnotation('identifier', 0.5), RegressionAnnotation('identifier', 0.5)]
-        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier', 0.25)]
-        config = [{'type': 'mae_on_interval', 'end': 1}]
-        expected = EvaluationResult(
-            pytest.approx([0.375, 0.125]),
-            None,
-            'mae_on_interval',
-            'mae_on_interval',
-            None,
-            {'postfix': ' ', 'scale': 1, 'names': ['mean: <= 0.0 < 1.0', 'std: <= 0.0 < 1.0'], 'calculate_mean': False}
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mae_on_interval_default_not_ignore_out_of_range(self):
-        annotations = [
-            RegressionAnnotation('identifier', -1),
-            RegressionAnnotation('identifier',  2),
-            RegressionAnnotation('identifier', 0.5)
-        ]
-        predictions = [
-            RegressionPrediction('identifier', 1),
-            RegressionPrediction('identifier', 2),
-            RegressionPrediction('identifier', 1)
-        ]
-        config = [{'type': 'mae_on_interval', 'end': 1, 'ignore_values_not_in_interval': False}]
-        expected = EvaluationResult(
-            pytest.approx([2.0, 0.0, 0.5, 0.0,  0.0, 0.0]),
-            None,
-            'mae_on_interval',
-            'mae_on_interval',
-            None,
-            {
-                'postfix': ' ',
-                'scale': 1,
-                'names': [
-                    'mean: < 0.0',
-                    'std: < 0.0',
-                    'mean: <= 0.0 < 1.0',
-                    'std: <= 0.0 < 1.0',
-                    'mean: > 1.0',
-                    'std: > 1.0'
-                ],
-                'calculate_mean': False
-            }
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mae_on_interval_with_given_interval(self):
-        annotations = [
-            RegressionAnnotation('identifier', -1),
-            RegressionAnnotation('identifier',  2),
-            RegressionAnnotation('identifier',  1)
-        ]
-        predictions = [
-            RegressionPrediction('identifier', 1),
-            RegressionPrediction('identifier', 3),
-            RegressionPrediction('identifier', 1)
-        ]
-        config = [{'type': 'mae_on_interval', 'intervals': [0.0, 2.0, 4.0]}]
-        expected = EvaluationResult(
-            pytest.approx([0.0, 0.0, 1.0, 0.0]),
-            None,
-            'mae_on_interval',
-            'mae_on_interval',
-            None,
-            {
-                'postfix': ' ',
-                'scale': 1,
-                'names': ['mean: <= 0.0 < 2.0', 'std: <= 0.0 < 2.0', 'mean: <= 2.0 < 4.0', 'std: <= 2.0 < 4.0'],
-                'calculate_mean': False
-            }
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mae_on_interval_with_repeated_values(self):
-        annotations = [
-            RegressionAnnotation('identifier', -1),
-            RegressionAnnotation('identifier',  2),
-            RegressionAnnotation('identifier', 1)
-        ]
-        predictions = [
-            RegressionPrediction('identifier', 1),
-            RegressionPrediction('identifier', 3),
-            RegressionPrediction('identifier', 1)
-        ]
-        config = [{'type': 'mae_on_interval', 'intervals': [0.0, 2.0, 2.0, 4.0]}]
-        expected = EvaluationResult(
-            pytest.approx([0.0, 0.0, 1.0, 0.0]),
-            None,
-            'mae_on_interval',
-            'mae_on_interval',
-            None,
-            {
-                'postfix': ' ',
-                'scale': 1,
-                'names': ['mean: <= 0.0 < 2.0', 'std: <= 0.0 < 2.0', 'mean: <= 2.0 < 4.0', 'std: <= 2.0 < 4.0'],
-                'calculate_mean': False
-            }
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_mae_on_interval_with_unsorted_values(self):
-        annotations = [
-            RegressionAnnotation('identifier', -1),
-            RegressionAnnotation('identifier',  2),
-            RegressionAnnotation('identifier',  1)
-        ]
-        predictions = [
-            RegressionPrediction('identifier', 1),
-            RegressionPrediction('identifier', 3),
-            RegressionPrediction('identifier', 1)
-        ]
-        config = [{'type': 'mae_on_interval', 'intervals': [2.0,  0.0, 4.0]}]
-        expected = EvaluationResult(
-            pytest.approx([0.0, 0.0, 1.0, 0.0]),
-            None,
-            'mae_on_interval',
-            'mae_on_interval',
-            None,
-            {
-                'postfix': ' ', 'scale': 1,
-                'names': ['mean: <= 0.0 < 2.0', 'std: <= 0.0 < 2.0', 'mean: <= 2.0 < 4.0', 'std: <= 2.0 < 4.0'],
-                'calculate_mean': False
-            }
-        )
-        dispatcher = MetricsExecutor(config, None)
-
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
diff --git a/tools/accuracy_checker/tests/test_reid_metrics.py b/tools/accuracy_checker/tests/test_reid_metrics.py
deleted file mode 100644 (file)
index b73008a..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-from accuracy_checker.metrics.reid import eval_cmc
-
-
-class TestCMC:
-    def test_only_distance_matrix(self):
-        distance_matrix = np.array([
-            [0, 1, 2, 3, 4],
-            [1, 0, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [1, 2, 3, 4, 0]
-        ])
-        m, n = distance_matrix.shape
-
-        result = eval_cmc(
-            distance_matrix,
-            query_ids=np.arange(m),
-            gallery_ids=np.arange(n),
-            query_cams=np.zeros(m).astype(np.int32),
-            gallery_cams=np.ones(n).astype(np.int32)
-        )
-
-        assert np.all(result[:5] == [0.6, 0.6, 0.8, 1.0, 1.0])
-
-    def test_duplicate_ids(self):
-        distance_matrix = np.array([
-            [0, 1, 2, 3],
-            [0, 1, 2, 3],
-            [0, 1, 2, 3],
-            [0, 1, 2, 3]
-        ])
-
-        result = eval_cmc(
-            distance_matrix,
-            query_ids=np.array([0, 0, 1, 1]),
-            gallery_ids=np.array([0, 0, 1, 1]),
-            top_k=4,
-            gallery_cams=np.ones(distance_matrix.shape[1]).astype(np.int32),
-            query_cams=np.zeros(distance_matrix.shape[0]).astype(np.int32),
-            separate_camera_set=False,
-            single_gallery_shot=False
-        )
-
-        assert np.all(result == [0.5, 0.5, 1, 1])
-
-    def test_duplicate_cams(self):
-        distance_matrix = np.tile(np.arange(5), (5, 1))
-
-        result = eval_cmc(
-            distance_matrix,
-            query_ids=np.array([0, 0, 0, 1, 1]),
-            gallery_ids=np.array([0, 0, 0, 1, 1]),
-            query_cams=np.array([0, 0, 0, 0, 0]),
-            gallery_cams=np.array([0, 1, 1, 1, 1]),
-            top_k=5,
-            separate_camera_set=False,
-            single_gallery_shot=False
-        )
-
-        assert np.all(result == [0.6, 0.6, 0.6, 1, 1])
diff --git a/tools/accuracy_checker/tests/test_segmentation_metrics.py b/tools/accuracy_checker/tests/test_segmentation_metrics.py
deleted file mode 100644 (file)
index 56e13b6..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import pytest
-import numpy as np
-from accuracy_checker.metrics import MetricsExecutor
-from accuracy_checker.presenters import EvaluationResult
-from .common import single_class_dataset, multi_class_dataset, make_segmentation_representation
-
-
-def create_config(metric_name, use_argmax=False):
-    return [{'type': metric_name, 'use_argmax': use_argmax}]
-
-
-def generate_expected_result(values, metric_name, labels=None):
-    meta = {'names': list(labels.values())} if labels else {}
-
-    return EvaluationResult(pytest.approx(values), None, metric_name, metric_name, None, meta)
-
-
-class TestPixelAccuracy:
-    name = 'segmentation_accuracy'
-
-    def test_one_class(self):
-        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
-        dispatcher = MetricsExecutor(create_config(self.name), single_class_dataset())
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result(1.0, self.name)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class_not_matched(self):
-        annotations = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), False)
-        dispatcher = MetricsExecutor(create_config(self.name), multi_class_dataset())
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result(0.0, self.name)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class(self):
-        annotations = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), False)
-        dispatcher = MetricsExecutor(create_config(self.name), multi_class_dataset())
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result((5.0+1.0+1.0)/(8.0+1.0+1.0), self.name)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-
-class TestMeanAccuracy:
-    name = 'mean_accuracy'
-
-    def test_one_class(self):
-        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
-        dataset = single_class_dataset()
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result([1.0, 0.0], self.name, dataset.labels)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class_not_matched(self):
-        annotations = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), True)
-        predictions = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), False)
-        dataset = multi_class_dataset()
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result([0.0, 0.0, 0.0, 0.0], self.name, dataset.labels)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class(self):
-        dataset = multi_class_dataset()
-        annotations = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), False)
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result([1.0, 1.0, 0.0, 0.5], self.name, dataset.labels)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-
-class TestMeanIOU:
-    name = 'mean_iou'
-
-    def test_one_class(self):
-        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
-        dataset = single_class_dataset()
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result([1.0, 0.0], self.name, dataset.labels)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class_not_matched(self):
-        annotations = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), True)
-        predictions = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), False)
-        dataset = multi_class_dataset()
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result([0.0, 0.0, 0.0, 0.0], self.name, dataset.labels)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class(self):
-        dataset = multi_class_dataset()
-        annotations = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), False)
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result([0.625, 1.0, 0.0, 0.5], self.name, dataset.labels)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-
-class TestSegmentationFWAcc:
-    name = 'frequency_weighted_accuracy'
-
-    def test_one_class(self):
-        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
-        dataset = single_class_dataset()
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result(1.0, self.name)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class_not_matched(self):
-        annotations = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), True)
-        predictions = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), False)
-        dataset = multi_class_dataset()
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result(0.0, self.name)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
-
-    def test_multi_class(self):
-        dataset = multi_class_dataset()
-        annotations = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), True)
-        predictions = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), False)
-        dispatcher = MetricsExecutor(create_config(self.name), dataset)
-        dispatcher.update_metrics_on_batch(annotations, predictions)
-        expected = generate_expected_result(0.5125, self.name)
-        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
-            assert evaluation_result == expected
diff --git a/tools/accuracy_checker/tests/test_utils.py b/tools/accuracy_checker/tests/test_utils.py
deleted file mode 100644 (file)
index 4ac9cdf..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-Copyright (c) 2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from accuracy_checker.utils import concat_lists, contains_all, contains_any, overrides, zipped_transform
-
-
-def test_concat_lists():
-    assert ['a', 'b'] == concat_lists(['a'], ['b'])
-    assert ['a', 'b', 'c'] == concat_lists(['a'], ['b'], ['c'])
-    assert ['a', 'b', 'c'] == concat_lists(['a', 'b'], ['c'])
-    assert ['a'] == concat_lists(['a'], [])
-    assert [] == concat_lists([], [], [])
-    assert [] == concat_lists([])
-
-
-def test_contains_all():
-    assert contains_all([1, 2, 3], [1, 2])
-    assert contains_all([1, 2, 3], [1, 2], [3])
-    assert not contains_all([1, 2, 3], [1, 5])
-
-
-def test_contains_any():
-    assert contains_any([1, 2, 3], [1])
-    assert contains_any([1, 2, 3], [4, 5, 2])
-    assert not contains_any([1, 2, 3], [4, 5])
-
-
-class TestZippedTransform:
-    def test_two_iterables(self):
-        as_ = [2, 3, 5]
-        bs = [2, 3, 6]
-
-        ras, rbs = zipped_transform(lambda a, b: (a + b, a - b), as_, bs)
-
-        assert ras == [4, 6, 11]
-        assert rbs == [0, 0, -1]
-        assert as_ == [2, 3, 5]
-        assert bs == [2, 3, 6]
-
-    def test_inplace(self):
-        as_ = [2, 3, 5]
-        bs = [2, 3, 6]
-
-        zipped_transform(lambda a, b: (a + b, a - b), as_, bs, inplace=True)
-
-        assert as_ == [4, 6, 11]
-        assert bs == [0, 0, -1]
-
-    def test_three_iterables(self):
-        as_ = [1, 1, 1]
-        bs = [2, 2, 2]
-        cs = [3, 3, 3]
-
-        ras, rbs, rcs = zipped_transform(lambda a, b, c: (a + 1, b + 2, c + 3), as_, bs, cs)
-
-        assert ras == [2, 2, 2]
-        assert rbs == [4, 4, 4]
-        assert rcs == [6, 6, 6]
-
-    def test_none_function(self):
-        xs = [1, 1, 1]
-        ys = [1, 1, 1]
-        zipped_transform(lambda a, b: None, xs, ys)
-
-
-class TestOverrides:
-    def test_negative(self):
-        class A:
-            def foo(self):
-                pass
-
-        class B(A):
-            pass
-
-        assert not overrides(B, 'foo')
-        assert not overrides(B(), 'foo')
-
-    def test_positive(self):
-        class A:
-            def foo(self):
-                pass
-
-        class B(A):
-            def foo(self):
-                pass
-
-        assert overrides(B, 'foo')
-        assert overrides(B(), 'foo')
-
-    def test_three_class(self):
-        class A:
-            def foo(self): pass
-
-        class B(A):
-            pass
-
-        class C(B):
-            def foo(self): pass
-
-        assert overrides(C, 'foo')
-        assert not overrides(B, 'foo')
-
-    def test_custom_base(self):
-        class A:
-            def foo(self): pass
-
-        class B:
-            def foo(self): pass
-
-        class C:
-            pass
-
-        assert overrides(B, 'foo', A)
-        assert not overrides(C, 'foo', A)
index 16dcdc0..fb42742 100644 (file)
-# OpenVINOâ„¢ Benchmark Python* package
-Inference Engine `openvino.tools.benchmark` Python\* package consists types to measure synchronous mode latency.  
-The package depends on `openvino.tools.accuracy_checker` the package.
+# Benchmark Python* Application
 
-Please, refer to https://docs.openvinotoolkit.org for details.
+This topic demonstrates how to run the Benchmark Application demo, which performs inference using convolutional networks.
 
-## Usage
-You can use the `openvino.tools.calibration` package in a simple way:
-```Python
-import openvino.tools.benchmark as benchmark
+## How It Works
+
+Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine
+plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend
+on the mode defined with the `-api` command-line parameter.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
+### Synchronous API
+
+For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values:
+* Number of iterations defined with the `-niter` command-line argument
+* Time duration specified with the `-t` command-line argument
+* Both of them (execution will continue until both conditions are met)
+* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
+
+During the execution, the application collects two types of metrics:
+* Latency for each infer request executed with `Infer` method
+* Duration of all executions
+
+Reported latency value is calculated as mean value of all collected latencies. Reported throughput value is a derivative from reported latency and additionally depends on batch size.
+
+### Asynchronous API
+For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of executions is defined by one of the two values:
+* Number of iterations defined with the `-niter` command-line argument
+* Time duration specified with the `-t` command-line argument
+* Both of them (execution will continue until both conditions are met)
+* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
+
+The infer requests are executed asynchronously. Callback is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration.
+
+## Running
+Notice that the benchmark_app usually produces optimal performance for any device out of the box.
+
+**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.:
+```
+$benchmark_app -m <model> -i <input> -d CPU
+```
+
+But it is still may be non-optimal for some cases, especially for very small networks. More details can read in [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md).
+
+Running the application with the `-h` or `--help`' option yields the following usage message:
+
+```
+usage: benchmark_app.py [-h] [-i PATH_TO_INPUT] -m PATH_TO_MODEL
+                        [-d TARGET_DEVICE]
+                        [-l PATH_TO_EXTENSION] [-c PATH_TO_CLDNN_CONFIG]
+                        [-api {sync,async}] [-niter NUMBER_ITERATIONS]
+                        [-b BATCH_SIZE]
+                        [-stream_output [STREAM_OUTPUT]] [-t TIME]
+                        [-progress [PROGRESS]] [-nstreams NUMBER_STREAMS]
+                        [-nthreads NUMBER_THREADS] [-pin {YES,NO}]
+                        [--exec_graph_path EXEC_GRAPH_PATH]
+                        [-pc [PERF_COUNTS]]
+
+Options:
+  -h, --help            Show this help message and exit.
+  -i PATH_TO_INPUT, --path_to_input PATH_TO_INPUT
+                        Optional. Path to a folder with images and/or binaries
+                        or to specific image or binary file.
+  -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL
+                        Required. Path to an .xml file with a trained model.
+  -d TARGET_DEVICE, --target_device TARGET_DEVICE
+                        Optional. Specify a target device to infer on: CPU,
+                        GPU, FPGA, HDDL or MYRIAD.
+                        Use "-d HETERO:<comma separated devices list>" format to specify HETERO plugin.
+                        Use "-d MULTI:<comma separated devices list>" format to specify MULTI plugin.
+                        The application looks for a suitable plugin for the specified device.
+  -l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION
+                        Optional. Required for CPU custom layers. Absolute
+                        path to a shared library with the kernels
+                        implementations.
+  -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG
+                        Optional. Required for GPU custom kernels. Absolute
+                        path to an .xml file with the kernels description.
+  -api {sync,async}, --api_type {sync,async}
+                        Optional. Enable using sync/async API. Default value
+                        is async.
+  -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS
+                        Optional. Number of iterations. If not specified, the
+                        number of iterations is calculated depending on a
+                        device.
+  -b BATCH_SIZE, --batch_size BATCH_SIZE
+                        Optional. Batch size value. If not specified, the
+                        batch size value is determined from IR
+  -stream_output [STREAM_OUTPUT]
+                        Optional. Print progress as a plain text. When
+                        specified, an interactive progress bar is replaced
+                        with a multiline output.
+  -t TIME, --time TIME  Optional. Time in seconds to execute topology.
+  -progress [PROGRESS]  Optional. Show progress bar (can affect performance
+                        measurement). Default values is "False".
+  -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
+                       Optional. Number of streams to use for inference on the CPU/GPU in throughput mode
+                       (for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
+                       Default value is determined automatically for a device. 
+                       Please note that although the automatic selection usually provides a reasonable performance, 
+                       it still may be non-optimal for some cases, especially for very small networks.
+  -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
+                        Number of threads to use for inference on the CPU
+                        (including HETERO  and MULTI cases).
+  -pin {YES,NO}, --infer_threads_pinning {YES,NO}
+                        Optional. Enable ("YES" is default value) or disable
+                        ("NO")CPU threads pinning for CPU-involved inference.
+  --exec_graph_path EXEC_GRAPH_PATH
+                        Optional. Path to a file where to store executable
+                        graph information serialized.
+  -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
+                        Optional. Report performance counters.
 
-config = benchmark.CommandLineReader.read()
-result = benchmark.Benchmark(config).run()
-print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
 ```
-### Explanation
-1. Import `openvino.tools.benchmark` types:
-```Python
-import openvino.tools.benchmark as benchmark
+
+Running the application with the empty list of options yields the usage message given above and an error message.
+
+Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values.
+If a model has only image input(s), please a provide folder with images or a path to an image as input.
+If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input.
+If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
+
+To run the demo, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+For example, to do inference of an image using a trained network with multiple outputs on CPU, run the following command:
+
 ```
+python3 benchmark_app.py -i <path_to_image>/inputImage.bmp -m <path_to_model>/multiple-output.xml -d CPU
+```
+
+## Demo Output
+
+The application outputs number of executed iterations, total duration of execution, latency and throughput.
+Additionally, if you set the `-pc` parameter, the application outputs performance counters.
+If you set `-exec_graph_path`, the application reports executable graph information serialized.
+
+```
+[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
+Progress: |................................| 100.00%
+
+[Step 9/9] Dumping statistics report
+Progress: |................................| 100.00%
+
+Count:      4408 iterations
+Duration:   60153.52 ms
+Latency:    51.8244 ms
+Throughput: 73.28 FPS
 
-2. Read configuration and execute the benchmark:
-```Python
-config = benchmark.CommandLineReader.read()
-result = benchmark.Benchmark(config).run()
 ```
 
-3. Print results:
-```Python
-print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
-```
\ No newline at end of file
+## See Also
+* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
index d5f2cf5..e69de29 100644 (file)
@@ -1,26 +0,0 @@
-"""
-Copyright (C) 2018-2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from .benchmark import Benchmark
-from .command_line_reader import CommandLineReader
-from .configuration import Configuration
-
-__version__ = "0.0.1"
-__all__ = [
-    'Benchmark',
-    'CommandLineReader',
-    'Configuration'
-]
diff --git a/tools/benchmark/__main__.py b/tools/benchmark/__main__.py
deleted file mode 100644 (file)
index 5beda67..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Copyright (C) 2018-2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import openvino.tools.benchmark as benchmark
-
-
-def benchmark():
-
-    config = benchmark.CommandLineReader.read()
-    result = benchmark.Benchmark(config).run()
-    print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
-
-
-if __name__ == '__main__':
-    benchmark()
index 52a0b39..dc6d5f8 100644 (file)
 """
-Copyright (C) 2018-2019 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
 
       http://www.apache.org/licenses/LICENSE-2.0
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
 """
+from datetime import datetime
+from statistics import median
+from openvino.inference_engine import IENetwork, IECore, get_version
 
-import numpy
-import datetime
+from .utils.constants import CPU_DEVICE_NAME, MULTI_DEVICE_NAME, GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME
+from .utils.logging import logger
+from .utils.utils import get_duration_seconds, parse_value_per_device, parse_devices
 
-import openvino.inference_engine as ie
 
-from ..accuracy_checker.accuracy_checker.config import ConfigReader
-from ..accuracy_checker.accuracy_checker.evaluators.model_evaluator import ModelEvaluator
-from ..accuracy_checker.accuracy_checker.progress_reporters import PrintProgressReporter, TQDMReporter
 
-from ..network import Network
-
-from .configuration import Configuration
-from .logging import info
-
-
-class BenchmarkCallback:
-    def __init__(self, configuration: Configuration, network: Network=None, iterations_count:int=1000):
-        self._latency = None
-        self._configuration = configuration
-        self._network = network
-        self._iterations_count = iterations_count if iterations_count else 1000
-
-    def output_callback(self, value, latency = None):
-        pass
-
-
-    def benchmark_callback(self, network_inputs_data):
-        latencies = list()
-
-        if self._network:
-            ie_network = self._network.ie_network
-        else:
-            ie_network = ie.IENetwork(self._configuration.model, self._configuration.weights)
-
-        do_reshape = False
-        for name in ie_network.inputs.keys():
-            if name in network_inputs_data and \
-                    tuple(ie_network.inputs[name].shape) != network_inputs_data[name].shape:
-                do_reshape = True
-                break
-
-        if do_reshape:
-            new_shapes = {layer_name: data.shape for layer_name, data in network_inputs_data.items()}
+class Benchmark:
+    def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type):
+        self.device = device.upper()
+        self.ie = IECore()
+        self.nireq = number_infer_requests
+        self.niter = number_iterations
+        self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device)
+        self.api_type = api_type
+        self.device_number_streams = {}
+
+    def __del__(self):
+        del self.ie
+
+    def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None):
+        if GPU_DEVICE_NAME in self.device:
+            if path_to_cldnn_config:
+                self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME)
+                logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config))
+        if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device:
+            if path_to_extension:
+                self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME)
+                logger.info('CPU extensions is loaded {}'.format(path_to_extension))
+
+    def get_version_info(self) -> str:
+        logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version()))
+        version_string = 'Device info\n'
+        for device, version in self.ie.get_versions(self.device).items():
+            version_string += '{: <9}{}\n'.format('', device)
+            version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major,
+                                                               version.minor)
+            version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number)
+        return version_string
+
+    @staticmethod
+    def reshape(ie_network: IENetwork, batch_size: int):
+        new_shapes = {}
+        for input_layer_name, input_layer in ie_network.inputs.items():
+            shape = input_layer.shape
+            layout = input_layer.layout
+
+            try:
+                batch_index = layout.index('N')
+            except ValueError:
+                batch_index = 1 if layout == 'C' else -1
+
+            if batch_index != -1 and shape[batch_index] != batch_size:
+                shape[batch_index] = batch_size
+                new_shapes[input_layer_name] = shape
+
+        if new_shapes:
+            logger.info('Resizing network to batch = {}'.format(batch_size))
             ie_network.reshape(new_shapes)
 
-        plugin = ie.IEPlugin(self._configuration.device)
-        if self._configuration.cpu_extension:
-            plugin.add_cpu_extension(self._configuration.cpu_extension)
-        exec_network = plugin.load(ie_network)
-
-        # warming up
-        exec_network.infer(network_inputs_data)
-
-        for i in range(self._iterations_count):
-            start = datetime.datetime.now()
-            exec_network.infer(network_inputs_data)
-            latencies.append((datetime.datetime.now() - start).microseconds)
-        self._latency = numpy.mean(latencies) / 1000000.0
-
-        del ie_network
-        del exec_network
-        del plugin
-
-
-    @property
-    def latency(self) -> float:
-        return self._latency
-
-
-class BenchmarkResult:
-    def __init__(self, latency):
-        self._latency = latency
-
-    @property
-    def latency(self) -> float:
-        return self._latency
-
-
-class InferOptions:
-    def __init__(self, iterations_count=1000):
-        self._iterations_count = iterations_count
-
-    @property
-    def iterations_count(self) -> int:
-        return self._iterations_count
-
-
-class Benchmark:
-    def __init__(self, configuration: Configuration):
-        if configuration is None:
-            raise ValueError("configuration is None")
-
-        self._configuration = configuration
-        pass
-
-    def run(
-        self,
-        network: Network = None,
-        statistics=None,
-        quantization_levels=None,
-        iterations_count:int = 1000) -> BenchmarkResult:
-
-        model = self._configuration.config['models'][0]
-        launcher_config = model['launchers'][0]
-        dataset_config = model['datasets'][0]
-
-        model_evaluator = ModelEvaluator.from_configs(launcher_config, dataset_config)
-        try:
-            if network:
-                del model_evaluator.launcher.network
-                del model_evaluator.launcher.exec_network
-                model_evaluator.launcher.network = network.ie_network
-                model_evaluator.launcher.exec_network = model_evaluator.launcher.plugin.load(network.ie_network)
-
-            ie_network = model_evaluator.launcher.network
-
-            if statistics:
-                network_stats = {}
-                for layer_name, node_statistic in statistics.items():
-                    network_stats[layer_name] = ie.LayerStats(
-                        min=tuple(node_statistic.min_outputs),
-                        max=tuple(node_statistic.max_outputs))
-                ie_network.stats.update(network_stats)
-
-            if quantization_levels:
-                for layer_name, value in quantization_levels.items():
-                    params = ie_network.layers[layer_name].params
-                    params["quantization_level"] = value
-                    ie_network.layers[layer_name].params = params
-
-            if model_evaluator.dataset.size != 1:
-                info("only one first image is used from dataset annotation to perform benchmark")
-                model_evaluator.dataset.size = 1
-
-            process_dataset_callback = BenchmarkCallback(
-                configuration=self._configuration,
-                network=network,
-                iterations_count=iterations_count)
-
-            model_evaluator.process_dataset(
-                None,
-                progress_reporter=None,
-                output_callback=process_dataset_callback.output_callback,
-                benchmark=process_dataset_callback.benchmark_callback)
-
-            if len(model_evaluator.launcher.exec_network.requests) != 1:
-                raise ValueError("unexpected network requests count")
-
-            latency = process_dataset_callback.latency
-        finally:
-            model_evaluator.release()
-
-        return BenchmarkResult(latency)
+    def set_config(self, number_streams: int, api_type: str = 'async',
+                   number_threads: int = None, infer_threads_pinning: int = None):
+        devices = parse_devices(self.device)
+        self.device_number_streams = parse_value_per_device(devices, number_streams)
+        for device in devices:
+            if device == CPU_DEVICE_NAME:  # CPU supports few special performance-oriented keys
+                # limit threading for CPU portion of inference
+                if number_threads:
+                    self.ie.set_config({'CPU_THREADS_NUM': str(number_threads)}, device)
+
+                if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device:
+                    self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME)
+                else:
+                    # pin threads for CPU portion of inference
+                    self.ie.set_config({'CPU_BIND_THREAD': infer_threads_pinning}, device)
+
+                # for CPU execution, more throughput-oriented execution via streams
+                # for pure CPU execution, more throughput-oriented execution via streams
+                if api_type == 'async':
+                    cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
+                    if device in self.device_number_streams.keys():
+                        cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
+                    self.ie.set_config(cpu_throughput, device)
+                    self.device_number_streams[device] = self.ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')
+
+            elif device == GPU_DEVICE_NAME:
+                if api_type == 'async':
+                    gpu_throughput = {'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'}
+                    if device in self.device_number_streams.keys():
+                        gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
+                    self.ie.set_config(gpu_throughput, device)
+                    self.device_number_streams[device] = self.ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')
+
+                if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device:
+                    # multi-device execution with the CPU+GPU performs best with GPU trottling hint,
+                    # which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
+                    self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device)
+
+            elif device == MYRIAD_DEVICE_NAME:
+                self.ie.set_config({'LOG_LEVEL': 'LOG_INFO',
+                                    'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME)
+
+    def load_network(self, ie_network: IENetwork, perf_counts: bool, number_infer_requests: int = None):
+        config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')}
+
+        exe_network = self.ie.load_network(ie_network,
+                                           self.device,
+                                           config=config,
+                                           num_requests=number_infer_requests or 0)
+
+        return exe_network
+
+    def infer(self, request_queue, requests_input_data, batch_size, progress_bar):
+        progress_count = 0
+        # warming up - out of scope
+        infer_request = request_queue.get_idle_request()
+        if not infer_request:
+            raise Exception('No idle Infer Requests!')
+
+        if self.api_type == 'sync':
+            infer_request.infer(requests_input_data[infer_request.req_id])
+        else:
+            infer_request.start_async(requests_input_data[infer_request.req_id])
+
+        request_queue.wait_all()
+        request_queue.reset_times()
+
+        start_time = datetime.now()
+        exec_time = (datetime.now() - start_time).total_seconds()
+        iteration = 0
+
+        # Start inference & calculate performance
+        # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
+        while (self.niter and iteration < self.niter) or \
+              (self.duration_seconds and exec_time < self.duration_seconds) or \
+              (self.api_type == 'async' and iteration % self.nireq):
+            infer_request = request_queue.get_idle_request()
+            if not infer_request:
+                raise Exception('No idle Infer Requests!')
+
+            if self.api_type == 'sync':
+                infer_request.infer(requests_input_data[infer_request.req_id])
+            else:
+                infer_request.start_async(requests_input_data[infer_request.req_id])
+            iteration += 1
+
+            exec_time = (datetime.now() - start_time).total_seconds()
+
+            if self.duration_seconds:
+                # calculate how many progress intervals are covered by current iteration.
+                # depends on the current iteration time and time of each progress interval.
+                # Previously covered progress intervals must be skipped.
+                progress_interval_time = self.duration_seconds / progress_bar.total_num
+                new_progress = int(exec_time / progress_interval_time - progress_count)
+                progress_bar.add_progress(new_progress)
+                progress_count += new_progress
+            elif self.niter:
+                progress_bar.add_progress(1)
+
+        # wait the latest inference executions
+        request_queue.wait_all()
+
+        total_duration_sec = request_queue.get_duration_in_seconds()
+        times = request_queue.times
+        times.sort()
+        latency_ms = median(times)
+        fps = batch_size * 1000 / latency_ms
+        if self.api_type == 'async':
+            fps = batch_size * iteration / total_duration_sec
+        progress_bar.finish()
+        return fps, latency_ms, total_duration_sec, iteration
diff --git a/tools/benchmark/command_line_reader.py b/tools/benchmark/command_line_reader.py
deleted file mode 100644 (file)
index 4599b28..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-"""
-Copyright (C) 2018-2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import os
-import collections
-import errno
-import pathlib
-from functools import partial
-from argparse import ArgumentParser
-from typing import Union
-
-from ..accuracy_checker.accuracy_checker.config import ConfigReader
-from ..accuracy_checker.accuracy_checker.utils import get_path
-from ..network import Network
-
-from .configuration import Configuration
-from .logging import info
-
-
-class CommandLineReader:
-    """
-    Class for parsing input config
-    """
-    @staticmethod
-    def read():
-        args, unknown_args = CommandLineReader.__build_arguments_parser().parse_known_args()
-        if unknown_args:
-            info("unknown command line arguments: {0}".format(unknown_args))
-
-        args.target_framework = "dlsdk"
-        args.aocl = None
-
-        merged_config = ConfigReader.merge(args)
-        launcher = merged_config['models'][0]['launchers'][0]
-
-        batch_size = args.batch_size if args.batch_size else (launcher['batch'] if 'batch' in launcher else None)
-        if not batch_size:
-            with Network(str(launcher['model']), str(launcher['weights'])) as network:
-                batch_size = network.ie_network.batch_size
-
-        return Configuration(
-            config = merged_config,
-            model = str(launcher['model']),
-            weights = str(launcher['weights']),
-            cpu_extension = (str(launcher['cpu_extensions']) if 'cpu_extensions' in launcher else None),
-            gpu_extension = (str(launcher['gpu_extensions']) if 'gpu_extensions' in launcher else None),
-            device = launcher['device'],
-            benchmark_iterations_count = args.benchmark_iterations_count)
-
-    @staticmethod
-    def __build_arguments_parser():
-        parser = ArgumentParser(description='openvino.tools.benchmark')
-
-        parser.add_argument(
-            '-d', '--definitions',
-            help='Optional. Path to the YML file with definitions',
-            type=str,
-            required=False)
-
-        parser.add_argument(
-            '-c',
-            '--config',
-            help='Required. Path to the YML file with local configuration',
-            type=get_path,
-            required=True)
-
-        parser.add_argument(
-            '-m', '--models',
-            help='Optional. Prefix path to the models and weights',
-            type=partial(get_path, is_directory=True),
-            default=pathlib.Path.cwd(),
-            required=False)
-
-        parser.add_argument(
-            '-s', '--source',
-            help='Optional. prefix path to the data source',
-            type=partial(get_path, is_directory=True),
-            default=pathlib.Path.cwd(),
-            required=False)
-
-        parser.add_argument(
-            '-a', '--annotations',
-            help='Optional. prefix path to the converted annotations and datasets meta data',
-            type=partial(get_path, is_directory=True),
-            default=pathlib.Path.cwd(),
-            required=False)
-
-        parser.add_argument(
-            '-e', '--extensions',
-            help='Optional. Prefix path to extensions folder',
-            type=partial(get_path, is_directory=True),
-            default=pathlib.Path.cwd(),
-            required=False)
-
-        parser.add_argument(
-            '--cpu_extensions_mode',
-            help='Optional. specified preferable set of processor instruction for automatic searching cpu extension lib',
-            required=False,
-            choices=['avx2', 'sse4'])
-
-        parser.add_argument(
-            '-b', '--bitstreams',
-            help='Optional. prefix path to bitstreams folder',
-            type=partial(get_path, is_directory=True),
-            default=pathlib.Path.cwd(),
-            required=False)
-
-        parser.add_argument(
-            '-C', '--converted_models', '--converted-models',
-            help='Optional. directory to store Model Optimizer converted models. Used for DLSDK launcher only',
-            type=partial(get_path, is_directory=True),
-            default=pathlib.Path.cwd(),
-            required=False)
-
-        parser.add_argument(
-            '-td', '--target_devices', '--target-devices',
-            help='Optional. Space-separated list of devices for infer',
-            required=False,
-            nargs='+',
-            default=["CPU"])
-
-        parser.add_argument(
-            '-tt', '--target_tags', '--target-tags',
-            help='Optional. Space-separated list of launcher tags for infer',
-            required=False,
-            nargs='+')
-
-        parser.add_argument(
-            '--batch-size',
-            help='Optional. Batch size value. If not specified, the batch size value is determined from IR',
-            type=int,
-            required=False)
-
-        parser.add_argument(
-            '-ic',
-            '--benchmark_iterations_count',
-            help='Optional. Benchmark itertations count. (1000 is default)',
-            type=float,
-            required=False,
-            default=1000)
-
-        return parser
\ No newline at end of file
diff --git a/tools/benchmark/configuration.py b/tools/benchmark/configuration.py
deleted file mode 100644 (file)
index af3d6dc..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Copyright (C) 2018-2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-
-class Configuration:
-    def __init__(
-        self,
-        config: str,
-        model: str,
-        weights: str,
-        device: str,
-        cpu_extension: str,
-        gpu_extension: str,
-        benchmark_iterations_count: int
-    ):
-
-        self._config = config
-        self._model = model
-        self._weights = weights
-        self._device = device
-        self._cpu_extension = cpu_extension
-        self._gpu_extension = gpu_extension
-        self._benchmark_iterations_count = benchmark_iterations_count
-
-    @property
-    def config(self) -> str:
-        return self._config
-
-    @property
-    def model(self) -> str:
-        return self._model
-
-    @property
-    def weights(self) -> str:
-        return self._weights
-
-    @property
-    def device(self) -> str:
-        return self._device
-
-    @property
-    def cpu_extension(self) -> str:
-        return self._cpu_extension
-
-    @property
-    def gpu_extension(self) -> str:
-        return self._gpu_extension
-
-    @property
-    def benchmark_iterations_count(self):
-        return self._benchmark_iterations_count
\ No newline at end of file
diff --git a/tools/benchmark/logging.py b/tools/benchmark/logging.py
deleted file mode 100644 (file)
index f3fec90..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-"""
-Copyright (C) 2018-2019 Intel Corporation
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import logging
-import logging.config
-import sys
-import warnings
-
-# TODO: move to utils
-_DEFAULT_LOGGER_NAME = 'openvino.tools.benchmark'
-_DEFAULT_LOG_FILE = 'openvino.tools.benchmark.log'
-
-PRINT_INFO = logging.INFO + 5
-logging.addLevelName(PRINT_INFO, "PRINT_INFO")
-
-_LOG_LEVEL_ENVIRON = "CALIBRATION_TOOL_LOG_LEVEL"
-# _LOGGING_LEVEL = logging.getLevelName(os.environ.get(_LOG_LEVEL_ENVIRON, PRINT_INFO))
-# TODO: refactoring: remove, use original line
-_LOGGING_LEVEL = "DEBUG"
-
-
-class LoggingFormatter(logging.Formatter):
-    def format(self, record: logging.LogRecord):
-        if record.levelno == PRINT_INFO:
-            return record.msg
-        return super().format(record)
-
-
-class ConsoleHandler(logging.StreamHandler):
-    def __init__(self, default_stream=sys.stdout):
-        super().__init__(default_stream)
-        self.default_stream = default_stream
-        self.err_stream = sys.stderr
-
-    def emit(self, record):
-        if record.levelno >= logging.WARNING:
-            self.stream = self.err_stream
-        else:
-            self.stream = self.default_stream
-        super().emit(record)
-
-
-_LOGGING_CONFIGURATION = {
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'default': {
-            '()': LoggingFormatter,
-            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s',
-            'datefmt': '%H:%M:%S'
-        },
-        'detailed': {
-            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s'
-        }
-    },
-    'handlers': {
-        'console': {
-            'level': 'DEBUG',
-            '()': ConsoleHandler,
-            'formatter': 'default',
-        }
-    },
-
-    'loggers': {
-        _DEFAULT_LOGGER_NAME: {
-            'handlers': ['console'],
-            'level': _LOGGING_LEVEL,
-            'propagate': False
-        }
-    }
-}
-
-logging.config.dictConfig(_LOGGING_CONFIGURATION)
-
-_default_logger = logging.getLogger(_DEFAULT_LOGGER_NAME)
-
-
-def _warning_handler(message, category, filename, lineno):
-    s = warnings.formatwarning(message, category, filename, lineno)
-    _default_logger.warning(s)
-
-
-warnings.showwarning = _warning_handler
-
-
-def get_logger(logger_name: str):
-    if logger_name.startswith(_DEFAULT_LOGGER_NAME):
-        return _default_logger.getChild(logger_name)
-    return logging.getLogger(logger_name)
-
-
-def error(msg, *args, **kwargs):
-    _default_logger.error(msg, *args, **kwargs)
-
-
-def warning(msg, *args, raise_warning=True, **kwargs):
-    if raise_warning:
-        warnings.warn(msg)
-    else:
-        _default_logger.warning(msg, *args, **kwargs)
-
-
-def info(msg, *args, **kwargs):
-    _default_logger.info(msg, *args, **kwargs)
-
-
-def debug(msg, *args, **kwargs):
-    _default_logger.debug(msg, *args, **kwargs)
-
-
-def print_info(msg, *args, **kwargs):
-    _default_logger.log(PRINT_INFO, msg, *args, **kwargs)
index 5e3e8ee..7042cb2 100644 (file)
@@ -1,8 +1,4 @@
 py-cpuinfo
 numpy
 progress
-pyyaml
-opencv-python
-shapely
-sklearn
-xmltodict
+opencv-python
\ No newline at end of file
diff --git a/tools/benchmark/utils/constants.py b/tools/benchmark/utils/constants.py
new file mode 100644 (file)
index 0000000..8ad915b
--- /dev/null
@@ -0,0 +1,53 @@
+"""
+ Copyright (C) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the 'License');
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an 'AS IS' BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+VPU_DEVICE_NAME = 'VPU'
+MYRIAD_DEVICE_NAME = 'MYRIAD'
+HDDL_DEVICE_NAME = 'HDDL'
+FPGA_DEVICE_NAME = 'FPGA'
+CPU_DEVICE_NAME = 'CPU'
+GPU_DEVICE_NAME = 'GPU'
+HETERO_DEVICE_NAME = 'HETERO'
+MULTI_DEVICE_NAME = 'MULTI'
+UNKNOWN_DEVICE_TYPE = 'UNKNOWN'
+
+XML_EXTENSION = '.xml'
+BIN_EXTENSION = '.bin'
+
+XML_EXTENSION_PATTERN = '*' + XML_EXTENSION
+
+IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP']
+BINARY_EXTENSIONS = ['BIN']
+
+DEVICE_DURATION_IN_SECS = {
+    CPU_DEVICE_NAME: 60,
+    GPU_DEVICE_NAME: 60,
+    VPU_DEVICE_NAME: 60,
+    MYRIAD_DEVICE_NAME: 60,
+    HDDL_DEVICE_NAME: 60,
+    FPGA_DEVICE_NAME: 120,
+    UNKNOWN_DEVICE_TYPE: 120
+}
+
+DEVICE_NIREQ_ASYNC = {
+    CPU_DEVICE_NAME: 2,
+    GPU_DEVICE_NAME: 2,
+    VPU_DEVICE_NAME: 4,
+    MYRIAD_DEVICE_NAME: 4,
+    HDDL_DEVICE_NAME: 100,
+    FPGA_DEVICE_NAME: 3,
+    UNKNOWN_DEVICE_TYPE: 1
+}
diff --git a/tools/benchmark/utils/infer_request_wrap.py b/tools/benchmark/utils/infer_request_wrap.py
new file mode 100644 (file)
index 0000000..37a757d
--- /dev/null
@@ -0,0 +1,82 @@
+"""
+ Copyright (C) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from datetime import datetime
+import threading
+
+
+class InferReqWrap:
+    def __init__(self, request, req_id, callback_queue):
+        self.req_id = req_id
+        self.request = request
+        self.request.set_completion_callback(self.callback, self.req_id)
+        self.callbackQueue = callback_queue
+
+    def callback(self, status_code, user_data):
+        if user_data != self.req_id:
+            print('Request ID {} does not correspond to user data {}'.format(self.req_id, user_data))
+        elif status_code:
+            print('Request {} failed with status code {}'.format(self.req_id, status_code))
+        self.callbackQueue(self.req_id, self.request.latency)
+
+    def start_async(self, input_data):
+        self.request.async_infer(input_data)
+
+    def infer(self, input_data):
+        self.request.infer(input_data)
+        self.callbackQueue(self.req_id, self.request.latency)
+
+
+class InferRequestsQueue:
+    def __init__(self, requests):
+        self.idleIds = []
+        self.requests = []
+        self.times = []
+        for req_id in range(len(requests)):
+            self.requests.append(InferReqWrap(requests[req_id], req_id, self.put_idle_request))
+            self.idleIds.append(req_id)
+        self.startTime = datetime.max
+        self.endTime = datetime.min
+        self.cv = threading.Condition()
+
+    def reset_times(self):
+        self.times.clear()
+
+    def get_duration_in_seconds(self):
+        return (self.endTime - self.startTime).total_seconds()
+
+    def put_idle_request(self, req_id, latency):
+        self.cv.acquire()
+        self.times.append(latency)
+        self.idleIds.append(req_id)
+        self.endTime = max(self.endTime, datetime.now())
+        self.cv.notify()
+        self.cv.release()
+
+    def get_idle_request(self):
+        self.cv.acquire()
+        while len(self.idleIds) == 0:
+            self.cv.wait()
+        req_id = self.idleIds.pop()
+        self.startTime = min(datetime.now(), self.startTime)
+        self.cv.release()
+        return self.requests[req_id]
+
+    def wait_all(self):
+        self.cv.acquire()
+        while len(self.idleIds) != len(self.requests):
+            self.cv.wait()
+        self.cv.release()
diff --git a/tools/benchmark/utils/inputs_filling.py b/tools/benchmark/utils/inputs_filling.py
new file mode 100644 (file)
index 0000000..8dcbee3
--- /dev/null
@@ -0,0 +1,189 @@
+"""
+ Copyright (C) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import os
+import cv2
+import numpy as np
+
+from glob import glob
+
+from .constants import IMAGE_EXTENSIONS, BINARY_EXTENSIONS
+from .logging import logger
+
+
+def is_image(blob):
+    if blob.layout != "NCHW":
+        return False
+    channels = blob.shape[1]
+    return channels == 3
+
+
+def is_image_info(blob):
+    if blob.layout != "NC":
+        return False
+    channels = blob.shape[1]
+    return channels >= 2
+
+
+def get_inputs(path_to_input, batch_size, input_info, requests):
+    input_image_sizes = {}
+    for key in input_info.keys():
+        if is_image(input_info[key]):
+            input_image_sizes[key] = (input_info[key].shape[2], input_info[key].shape[3])
+        logger.info("Network input '{}' precision {}, dimensions ({}): {}".format(key,
+                                                                                  input_info[key].precision,
+                                                                                  input_info[key].layout,
+                                                                                  " ".join(str(x) for x in
+                                                                                           input_info[key].shape)))
+
+    images_count = len(input_image_sizes.keys())
+    binaries_count = len(input_info) - images_count
+
+    image_files = list()
+    binary_files = list()
+
+    if path_to_input:
+        image_files = get_files_by_extensions(path_to_input, IMAGE_EXTENSIONS)
+        image_files.sort()
+        binary_files = get_files_by_extensions(path_to_input, BINARY_EXTENSIONS)
+        binary_files.sort()
+
+    if (len(image_files) == 0) and (len(binary_files) == 0):
+        logger.warn("No input files were given: all inputs will be filled with random values!")
+    else:
+        binary_to_be_used = binaries_count * batch_size * len(requests)
+        if binary_to_be_used > 0 and len(binary_files) == 0:
+            logger.warn("No supported binary inputs found! Please check your file extensions: {}".format(
+                ",".join(BINARY_EXTENSIONS)))
+        elif binary_to_be_used > len(binary_files):
+            logger.warn(
+                "Some binary input files will be duplicated: {} files are required, but only {} were provided".format(
+                    binary_to_be_used, len(binary_files)))
+        elif binary_to_be_used < len(binary_files):
+            logger.warn(
+                "Some binary input files will be ignored: only {} files are required from {}".format(binary_to_be_used,
+                                                                                                     len(binary_files)))
+
+        images_to_be_used = images_count * batch_size * len(requests)
+        if images_to_be_used > 0 and len(image_files) == 0:
+            logger.warn("No supported image inputs found! Please check your file extensions: {}".format(
+                ",".join(IMAGE_EXTENSIONS)))
+        elif images_to_be_used > len(image_files):
+            logger.warn(
+                "Some image input files will be duplicated: {} files are required, but only {} were provided".format(
+                    images_to_be_used, len(image_files)))
+        elif images_to_be_used < len(image_files):
+            logger.warn(
+                "Some image input files will be ignored: only {} files are required from {}".format(images_to_be_used,
+                                                                                                    len(image_files)))
+
+    requests_input_data = []
+    for request_id in range(0, len(requests)):
+        logger.info("Infer Request {} filling".format(request_id))
+        input_data = {}
+        keys = list(input_info.keys())
+        for key in keys:
+            if is_image(input_info[key]):
+                # input is image
+                if (len(image_files) > 0):
+                    input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key),
+                                                           len(keys), input_info[key].shape)
+                    continue
+
+            # input is binary
+            if (len(binary_files) > 0):
+                input_data[key] = fill_blob_with_binary(binary_files, input_info[key].shape)
+                continue
+
+            # most likely input is image info
+            if is_image_info(input_info[key]) and len(input_image_sizes) == 1:
+                image_size = input_image_sizes[list(input_image_sizes.keys()).pop()]
+                logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" +
+                            str(image_size[1]))
+                input_data[key] = fill_blob_with_image_info(image_size, input_info[key].shape)
+                continue
+
+            # fill with random data
+            logger.info("Fill input '{}' with random values ({} is expected)".format(key, "image" if is_image(
+                input_info[key]) else "some binary data"))
+            input_data[key] = fill_blob_with_random(input_info[key].precision, input_info[key].shape)
+
+        requests_input_data.append(input_data)
+
+    return requests_input_data
+
+
+def get_files_by_extensions(path_to_input, extensions):
+    input_files = list()
+    if os.path.isfile(path_to_input):
+        input_files.append(path_to_input)
+    else:
+        path = os.path.join(path_to_input, '*')
+        files = glob(path, recursive=True)
+        for file in files:
+            file_extension = file.rsplit('.').pop().upper()
+            if file_extension in extensions:
+                input_files.append(file)
+    return input_files
+
+
+def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_size, shape):
+    images = np.ndarray(shape)
+    image_index = request_id * batch_size * input_size + input_id
+    for b in range(batch_size):
+        image_index %= len(image_paths)
+        image_filename = image_paths[image_index]
+        logger.info('Prepare image {}'.format(image_filename))
+        image = cv2.imread(image_filename)
+
+        new_im_size = tuple(shape[2:])
+        if image.shape[:-1] != new_im_size:
+            logger.warn("Image is resized from ({}) to ({})".format(image.shape[:-1], new_im_size))
+            image = cv2.resize(image, new_im_size)
+
+        image = image.transpose((2, 1, 0))
+        images[b] = image
+
+        image_index += input_size
+    return images
+
+
+def fill_blob_with_image_info(image_size, shape):
+    im_info = np.ndarray(shape)
+    for b in range(shape[0]):
+        for i in range(shape[1]):
+            im_info[b][i] = image_size[i] if i in [0, 1] else 1
+
+    return im_info
+
+
+def fill_blob_with_random(precision, shape):
+    if precision == "FP32":
+        return np.random.rand(*shape).astype(np.float32)
+    elif precision == "FP16":
+        return np.random.rand(*shape).astype(np.float16)
+    elif precision == "I32":
+        return np.random.rand(*shape).astype(np.int32)
+    elif precision == "U8":
+        return np.random.rand(*shape).astype(np.uint8)
+    elif precision == "I8":
+        return np.random.rand(*shape).astype(np.int8)
+    elif precision == "U16":
+        return np.random.rand(*shape).astype(np.uint16)
+    elif precision == "I16":
+        return np.random.rand(*shape).astype(np.int16)
+    else:
+        raise Exception("Input precision is not supported: " + precision)
 
 from progress.bar import Bar
 
+
 class ProgressBar:
     def __init__(self, total_num, stream_output=False, progress_enabled=False):
         self.stream_output = stream_output
         self.is_finished = True
         self.progress_enabled = progress_enabled
+        self.percent_to_update = 1
+        self.cur_progress = 0
+        self.total_num = total_num
         self.reset(total_num)
 
     def add_progress(self, num):
         self.is_finished = False
         if self.progress_enabled:
-           for i in range(num):
-              self.bar.next()
-              if self.stream_output:
-                  print()
+            self.cur_progress += num
+            total_progress = self.bar.max
+            if self.cur_progress > total_progress:
+                self.cur_progress = total_progress
+
+            prev_progress = self.bar.index
+            prev_percent = 100 * prev_progress / total_progress
+            cur_percent = 100 * self.cur_progress / total_progress
+            if prev_progress == 0 or \
+               self.cur_progress == total_progress or \
+               prev_percent + self.percent_to_update <= cur_percent:
+                self.bar.next(self.cur_progress - self.bar.index)
+                if self.stream_output:
+                    print()
 
-    def finish(self, num = 0):
-        if (num > 0):
+    def finish(self, num=0):
+        if num:
             self.add_progress(num)
 
         self.is_finished = True
@@ -42,10 +56,10 @@ class ProgressBar:
 
     def reset(self, total_num):
         if self.progress_enabled:
-            self.bar = Bar('Progress:', max = total_num, fill = '.', suffix='%(percent).2f%%')
+            self.bar = Bar('Progress:', max=total_num, fill='.', suffix='%(percent).d%%')
 
     def new_bar(self, total_num):
         if self.is_finished:
             self.reset(total_num)
         else:
-           raise Exception("Cannot create a new bar. Current bar is still in progress")
+            raise Exception('Cannot create a new bar. Current bar is still in progress')
diff --git a/tools/benchmark/utils/statistics_report.py b/tools/benchmark/utils/statistics_report.py
new file mode 100644 (file)
index 0000000..daa0490
--- /dev/null
@@ -0,0 +1,119 @@
+"""
+ Copyright (C) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import os
+import sys
+from enum import Enum
+
+from .logging import logger
+
+## statistics reports types
+noCntReport = 'no_counters'
+averageCntReport = 'average_counters'
+detailedCntReport = 'detailed_counters'
+
+## Responsible for collecting of statistics and dumping to .csv file
+class StatisticsReport:
+    class Config():
+        def __init__(self, report_type, report_folder):
+            self.report_type = report_type
+            self.report_folder = report_folder
+
+    class Category(Enum):
+        COMMAND_LINE_PARAMETERS = 0,
+        RUNTIME_CONFIG = 1,
+        EXECUTION_RESULTS = 2
+
+    def __init__(self, config):
+        self.config = config
+        self.parameters = {}
+        self.csv_separator = ';'
+
+    def add_parameters(self, category, parameters):
+        if category not in self.parameters.keys():
+            self.parameters[category] = parameters
+        else:
+            self.parameters[category].extend(parameters)
+
+    def dump(self):
+        def dump_parameters(f, parameters):
+            for k, v in parameters:
+                f.write('{}{}{}\n'.format(k, self.csv_separator, v))
+
+        with open(os.path.join(self.config.report_folder, 'benchmark_report.csv'), 'w') as f:
+            if self.Category.COMMAND_LINE_PARAMETERS in self.parameters.keys():
+                f.write('Command line parameters\n')
+                dump_parameters(f, self.parameters[self.Category.COMMAND_LINE_PARAMETERS])
+                f.write('\n')
+
+            if self.Category.RUNTIME_CONFIG in self.parameters.keys():
+                f.write('Configuration setup\n')
+                dump_parameters(f, self.parameters[self.Category.RUNTIME_CONFIG])
+                f.write('\n')
+
+            if self.Category.EXECUTION_RESULTS in self.parameters.keys():
+                f.write('Execution results\n')
+                dump_parameters(f, self.parameters[self.Category.EXECUTION_RESULTS])
+                f.write('\n')
+
+            logger.info("Statistics report is stored to {}".format(f.name))
+
+    def dump_performance_counters_request(self, f, perf_counts):
+        total = 0
+        total_cpu = 0
+        f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n']))
+        for k, v in sorted(perf_counts.items(), key=lambda x: x[1]['execution_index']):
+            f.write(self.csv_separator.join([k, v['status'], v['layer_type'], v['exec_type'], str(v['real_time']/1000.0), str(v['cpu_time']/1000.0)]))
+            f.write('\n')
+            total += v['real_time']
+            total_cpu += v['cpu_time']
+        f.write(self.csv_separator.join(['Total','','','',str(total/1000.0),str(total_cpu/1000.0)]))
+        f.write('\n\n')
+
+    def dump_performance_counters(self, perf_counts):
+        if self.config.report_type == '' or self.config.report_type == noCntReport:
+            logger.info("Statistics collecting for performance counters was not requested. No reports are dumped.")
+            return
+
+        if not perf_counts:
+            logger.info('Peformance counters are empty. No reports are dumped.')
+            return
+
+        filename = os.path.join(self.config.report_folder, 'benchmark_{}_report.csv'.format(self.config.report_type))
+        with open(filename, 'w') as f:
+            if self.config.report_type == detailedCntReport:
+                for pc in perf_counts:
+                    self.dump_performance_counters_request(f, pc)
+            elif self.config.report_type == averageCntReport:
+                def get_average_performance_counters(perf_counts):
+                    performance_counters_avg = {}
+                    ## iterate over each processed infer request and handle its PM data
+                    for i in range(0, len(perf_counts)):
+                        ## iterate over each layer from sorted vector and add required PM data to the per-layer maps
+                        for k in perf_counts[0].keys():
+                            if k not in performance_counters_avg.keys():
+                                performance_counters_avg[k] = perf_counts[i][k]
+                            else:
+                                performance_counters_avg[k]['real_time'] += perf_counts[i][k]['real_time']
+                                performance_counters_avg[k]['cpu_time'] += perf_counts[i][k]['cpu_time']
+                    for _, v in performance_counters_avg.items():
+                        v['real_time'] /= len(perf_counts)
+                        v['cpu_time'] /= len(perf_counts)
+                    return performance_counters_avg
+                self.dump_performance_counters_request(f, get_average_performance_counters(perf_counts))
+            else:
+                raise Exception('PM data can only be collected for average or detailed report types')
+
+            logger.info('Pefromance counters report is stored to {}'.format(filename))
diff --git a/tools/benchmark/utils/utils.py b/tools/benchmark/utils/utils.py
new file mode 100644 (file)
index 0000000..8fe49b6
--- /dev/null
@@ -0,0 +1,248 @@
+"""
+ Copyright (C) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import os
+
+from openvino.inference_engine import IENetwork
+
+from .constants import DEVICE_DURATION_IN_SECS, UNKNOWN_DEVICE_TYPE, DEVICE_NIREQ_ASYNC, BIN_EXTENSION, \
+    CPU_DEVICE_NAME, GPU_DEVICE_NAME
+from .inputs_filling import is_image
+from .logging import logger
+
+
+def static_vars(**kwargs):
+    def decorate(func):
+        for k in kwargs:
+            setattr(func, k, kwargs[k])
+        return func
+
+    return decorate
+
+
+@static_vars(step_id=0)
+def next_step(additional_info=''):
+    step_names = {
+        1: "Parsing and validating input arguments",
+        2: "Loading Inference Engine",
+        3: "Reading the Intermediate Representation network",
+        4: "Resizing network to match image sizes and given batch",
+        5: "Configuring input of the model",
+        6: "Setting device configuration",
+        7: "Loading the model to the device",
+        8: "Setting optimal runtime parameters",
+        9: "Creating infer requests and filling input blobs with images",
+        10: "Measuring performance",
+        11: "Dumping statistics report",
+    }
+
+    next_step.step_id += 1
+    if next_step.step_id not in step_names.keys():
+        raise Exception('Step ID {} is out of total steps number '.format(next_step.step_id, str(len(step_names))))
+
+    step_info_template = '[Step {}/{}] {}'
+    step_name = step_names[next_step.step_id] + (' ({})'.format(additional_info) if additional_info else '')
+    step_info_template = step_info_template.format(next_step.step_id, len(step_names), step_name)
+    print(step_info_template)
+
+
+def read_network(path_to_model: str):
+    xml_filename = os.path.abspath(path_to_model)
+    head, tail = os.path.splitext(xml_filename)
+    bin_filename = os.path.abspath(head + BIN_EXTENSION)
+
+    ie_network = IENetwork(xml_filename, bin_filename)
+
+    input_info = ie_network.inputs
+
+    if not input_info:
+        raise AttributeError('No inputs info is provided')
+
+    return ie_network
+
+
+def config_network_inputs(ie_network: IENetwork):
+    input_info = ie_network.inputs
+
+    for key in input_info.keys():
+        if is_image(input_info[key]):
+            # Set the precision of input data provided by the user
+            # Should be called before load of the network to the plugin
+            input_info[key].precision = 'U8'
+
+
+def get_number_iterations(number_iterations: int, nireq: int, api_type: str):
+    niter = number_iterations
+
+    if api_type == 'async' and niter:
+        niter = int((niter + nireq - 1) / nireq) * nireq
+        if number_iterations != niter:
+            logger.warn('Number of iterations was aligned by request number '
+                        'from {} to {} using number of requests {}'.format(number_iterations, niter, nireq))
+
+    return niter
+
+
+def get_duration_seconds(time, number_iterations, device):
+    if time:
+        # time limit
+        return time
+
+    if not number_iterations:
+        return get_duration_in_secs(device)
+    return 0
+
+
+def get_duration_in_milliseconds(duration):
+    return duration * 1000
+
+
+def get_duration_in_secs(target_device):
+    duration = 0
+    for device in DEVICE_DURATION_IN_SECS:
+        if device in target_device:
+            duration = max(duration, DEVICE_DURATION_IN_SECS[device])
+
+    if duration == 0:
+        duration = DEVICE_DURATION_IN_SECS[UNKNOWN_DEVICE_TYPE]
+        logger.warn('Default duration {} seconds is used for unknown device {}'.format(duration, target_device))
+
+    return duration
+
+
+def get_nireq(target_device):
+    nireq = 0
+    for device in DEVICE_NIREQ_ASYNC:
+        if device in target_device:
+            nireq = max(nireq, DEVICE_NIREQ_ASYNC[device])
+
+    if nireq == 0:
+        nireq = DEVICE_NIREQ_ASYNC[UNKNOWN_DEVICE_TYPE]
+        logger.warn('Default number of requests {} is used for unknown device {}'.format(nireq, target_device))
+
+    return nireq
+
+
+def parse_devices(device_string):
+    devices = device_string
+    if ':' in devices:
+        devices = devices.partition(':')[2]
+    return [d[:d.index('(')] if '(' in d else d for d in devices.split(',')]
+
+
+def parse_value_per_device(devices, values_string):
+    # Format: <device1>:<value1>,<device2>:<value2> or just <value>
+    result = {}
+    if not values_string:
+        return result
+    device_value_strings = values_string.upper().split(',')
+    for device_value_string in device_value_strings:
+        device_value_vec = device_value_string.split(':')
+        if len(device_value_vec) == 2:
+            for device in devices:
+                if device == device_value_vec[0]:
+                    value = int(device_value_vec[1])
+                    result[device_value_vec[0]] = value
+                    break
+        elif len(device_value_vec) == 1:
+            value = int(device_value_vec[0])
+            for device in devices:
+                result[device] = value
+        elif not device_value_vec:
+            raise Exception('Unknown string format: ' + values_string)
+    return result
+
+
+def process_help_inference_string(benchmark_app):
+    output_string = 'Start inference {}ronously'.format(benchmark_app.api_type)
+    if benchmark_app.api_type == 'async':
+        output_string += ', {} inference requests'.format(benchmark_app.nireq)
+
+        device_ss = ''
+        if CPU_DEVICE_NAME in benchmark_app.device:
+            device_ss += str(benchmark_app.ie.get_config(CPU_DEVICE_NAME, 'CPU_THROUGHPUT_STREAMS'))
+            device_ss += ' streams for {}'.format(CPU_DEVICE_NAME)
+        if GPU_DEVICE_NAME in benchmark_app.device:
+            device_ss += ', ' if device_ss else ''
+            device_ss += str(benchmark_app.ie.get_config(GPU_DEVICE_NAME, 'GPU_THROUGHPUT_STREAMS'))
+            device_ss += ' streams for {}'.format(GPU_DEVICE_NAME)
+
+        if device_ss:
+            output_string += ' using ' + device_ss
+
+    limits = ''
+
+    if benchmark_app.niter and not benchmark_app.duration_seconds:
+        limits += '{} iterations'.format(benchmark_app.niter)
+
+    if benchmark_app.duration_seconds:
+        limits += '{} ms duration'.format(get_duration_in_milliseconds(benchmark_app.duration_seconds))
+    if limits:
+        output_string += ', limits: ' + limits
+
+    return output_string
+
+
+def dump_exec_graph(exe_network, exec_graph_path):
+    try:
+        exec_graph_info = exe_network.get_exec_graph_info()
+        exec_graph_info.serialize(exec_graph_path)
+        logger.info('Executable graph is stored to {}'.format(exec_graph_path))
+        del exec_graph_info
+    except Exception as e:
+        logger.exception(e)
+
+
+def print_perf_counters(perf_counts_list):
+    for ni in range(len(perf_counts_list)):
+        perf_counts = perf_counts_list[ni]
+        total_time = 0
+        total_time_cpu = 0
+        logger.info("Performance counts for {}-th infer request".format(ni))
+        for layer, stats in sorted(perf_counts.items(), key=lambda x: x[1]['execution_index']):
+            max_layer_name = 30
+            print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(
+                layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer,
+                stats['status'],
+                'layerType: ' + str(stats['layer_type']),
+                'realTime: ' + str(stats['real_time']),
+                'cpu: ' + str(stats['cpu_time']),
+                'execType: ' + str(stats['exec_type'])))
+            total_time += stats['real_time']
+            total_time_cpu += stats['cpu_time']
+        print('Total time:     {} microseconds'.format(total_time))
+        print('Total CPU time: {} microseconds\n'.format(total_time_cpu))
+
+def get_command_line_arguments(argv):
+    parameters = []
+    arg_name = ''
+    arg_value = ''
+    for arg in argv[1:]:
+        if '=' in arg:
+            arg_name, arg_value = arg.split('=')
+            parameters.append((arg_name, arg_value))
+            arg_name = ''
+            arg_value = ''
+        else:
+          if arg[0] == '-':
+              if arg_name is not '':
+                parameters.append((arg_name, arg_value))
+                arg_value = ''
+              arg_name = arg
+          else:
+              arg_value = arg
+    if arg_name is not '':
+        parameters.append((arg_name, arg_value))
+    return parameters
index cc381a9..027628c 100644 (file)
@@ -107,7 +107,9 @@ class AggregatedStatistics:
 
         n_index = sample + n * itteration
         if n_index >= channels.shape[1]:
-            channels.resize((channels.shape[0], channels.shape[1] + 1, channels.shape[2]), refcheck=False)
+            channels.resize((channels.shape[0], n_index + 1, channels.shape[2]), refcheck=False)
+        if channel >= channels.shape[0]:
+            channels.resize((channel + 1, channels.shape[1], channels.shape[2]), refcheck=False)
 
         channels.itemset((channel, n_index, self.INDEX_MIN), data[sample][channel].min())
         channels.itemset((channel, n_index, self.INDEX_MAX), data[sample][channel].max())
index 3df403e..aea44fa 100644 (file)
@@ -18,14 +18,14 @@ from abc import abstractmethod
 import numpy as np
 import os
 import tempfile
+from pathlib import Path
 from typing import Dict
 
 import openvino.inference_engine as ie
 
-from ..accuracy_checker.accuracy_checker.progress_reporters import TQDMReporter, ProgressReporter
-from ..accuracy_checker.accuracy_checker.config import ConfigReader
-from ..accuracy_checker.accuracy_checker.evaluators.model_evaluator import ModelEvaluator
-from ..accuracy_checker.accuracy_checker.presenters import get_result_format_parameters
+from accuracy_checker.progress_reporters import TQDMReporter, ProgressReporter
+from accuracy_checker.evaluators.model_evaluator import ModelEvaluator
+from accuracy_checker.presenters import get_result_format_parameters
 
 from ..utils.network_info import NetworkInfo
 from ..utils.building.network_builder import NetworkBuilder
@@ -33,14 +33,11 @@ from ..utils.building.layer import Layer
 
 from .logging import info, debug
 from .calibrator_configuration import CalibratorConfiguration
-from .aggregated_statistics import AggregatedStatistics
 from .nrmsd import compare_nrmsd
 from .single_layer_network import SingleLayerNetwork
 from .inference_result import InferenceResult
 from .calibration_metrics import CalibrationMetrics
-from .infer_raw_results import InferRawResults
 from .accuracy.metric_factory import MetricFactory
-from .accuracy.metric_in_percent import MetricInPercent
 
 from .process_dataset_callbacks.collect_results_callback import CollectResultsCallback
 from .process_dataset_callbacks.calculate_accuracy_callback import CalculateAccuracyCallback
@@ -82,24 +79,38 @@ class BaseCalibrator:
         if self._configuration.gpu_extension and self._configuration.device == 'GPU':
             self.plugin.set_config('CONFIG_FILE', self._configuration.gpu_extension)
 
-    def will_be_fused_workaround(self, layer:ie.IENetLayer, network_info:NetworkInfo=None):
-        if layer.type == "Const" or layer.type == "Tile":
-            if not network_info:
-                network_info = NetworkInfo(self._configuration.model)
-            only_expected = network_info.explore_inputs(network_info.get_layer(layer.name), ['Const', 'Tile'])
-            return only_expected, network_info
-        return False, network_info
-
-    def add_outputs(self, network:ie.IENetwork, output_layers: list=None) -> ie.IENetwork:
-        if output_layers is None:
-            output_layers = network.layers.values()
-
-        network_info = None
-        for layer in output_layers:
-            fused, network_info = self.will_be_fused_workaround(layer, network_info)
-            if not fused:
-                network.add_outputs([layer.name])
-        return network
+    def get_allowed_outputs(self, desired_layers: list=None) -> list:
+        network_tmp = self.create_network()
+        # During network loading some layers are trancated. Outputs could not be added to these layers
+        self.plugin.load(network_tmp)
+
+        output_names = list()
+        excluded_list = ['gather']
+        children_require_stat = ['convolution', 'fullyconnected']
+        for layer in network_tmp.layers.values():
+            add = False
+            for child_name in layer.children:
+                if network_tmp.layers[child_name].type.lower() not in excluded_list:
+                    add = True
+                    break
+            if layer.type.lower() == "gather":
+                add = False
+                for child_name in layer.children:
+                    if network_tmp.layers[child_name].type.lower() in children_require_stat:
+                        add = True
+                        break
+            if add:
+                output_names.append(layer.name)
+
+        # return just custom layers if they were set
+        if desired_layers:
+            custom_layers_list = list()
+            for name in output_names:
+                if name in desired_layers:
+                    custom_layers_list.append(name)
+            return custom_layers_list
+
+        return output_names
 
     def create_network(self) -> ie.IENetwork:
         network = ie.IENetwork(self._configuration.model, self._configuration.weights)
@@ -272,39 +283,6 @@ class BaseCalibrator:
                 return False
         return True
 
-    # TODO: add_outputs - remove, not neccessary
-    def infer(self,
-              add_outputs=False,
-              statistics=None,
-              quantization_level: dict = None,
-              collect_resuls: bool = False,
-              collect_layers: set = None,
-              collect_aggregated_statistics: bool = False,
-              network: ie.IENetwork = None,
-              collect_performance_counters: bool = False,
-              ignore_layer_names: list = None) -> InferenceResult:
-
-        if network is None:
-            network = self.create_network()
-
-        if add_outputs:
-            self.add_outputs(network)
-
-        if quantization_level:
-            for layer_name, value in quantization_level.items():
-                params = network.layers[layer_name].params
-                params["quantization_level"] = value
-                network.layers[layer_name].params = params
-
-        return self._infer(
-            network=network,
-            statistics=statistics,
-            collect_resuls=collect_resuls,
-            collect_layers=collect_layers,
-            collect_aggregated_statistics=collect_aggregated_statistics,
-            collect_performance_counters=collect_performance_counters,
-            ignore_layer_names=ignore_layer_names)
-
     def infer_single_layer_network(self,
                                    single_layer_network: SingleLayerNetwork,
                                    full_network_result: InferenceResult):
@@ -333,15 +311,16 @@ class BaseCalibrator:
         accuracy_drop = compare_nrmsd(actual_result_data, expected_result_data)
         return accuracy_drop
 
-    def _infer(
+    def infer(
         self,
-        network=None,
-        statistics=None,
-        collect_aggregated_statistics: bool = True,
-        collect_resuls: bool = True,
+        model_path=None,
+        collect_aggregated_statistics: bool = False,
+        collect_resuls: bool = False,
         collect_layers: set = None,
         collect_performance_counters: bool = False,
-        ignore_layer_names: list = None
+        ignore_layer_names: list = None,
+        per_layer_statistics: dict = None,
+        add_outputs=False
     ) -> InferenceResult:
         '''
         Accuracy checker infer and compare results
@@ -349,29 +328,23 @@ class BaseCalibrator:
         accuracy = 0.0
 
         model = self._configuration.config['models'][0]
-        launcher_config = model['launchers'][0]
-        dataset_config = model['datasets'][0]
+        # Need to copy to keep origin configuration here
+        launcher_config = model['launchers'][0].copy()
+        dataset_config = model['datasets'][0].copy()
+
+        if model_path:
+            launcher_config['model'] = Path(model_path)
+            launcher_config['weights'] = Path(model_path[:len(model_path) - 3] + 'bin')
+
+        if add_outputs:
+            launcher_config['outputs'] = self.get_allowed_outputs()
 
         process_dataset_callback = None
         model_evaluator = ModelEvaluator.from_configs(launcher_config, dataset_config)
         try:
-            if network:
-                del model_evaluator.launcher.network
-                del model_evaluator.launcher.exec_network
-                model_evaluator.launcher.reload_network = False
-                model_evaluator.launcher.network = network
-                model_evaluator.launcher.exec_network = model_evaluator.launcher.plugin.load(network)
-
             if collect_performance_counters:
                 model_evaluator.launcher.plugin.set_config({'PERF_COUNT': 'YES'})
 
-            if statistics:
-                network_stats = {}
-                for layer_name, node_statistic in statistics.items():
-                    network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs),
-                                                              max=tuple(node_statistic.max_outputs))
-                model_evaluator.launcher.network.stats.update(network_stats)
-
             dataset_size = model_evaluator.dataset.size
 
             if self._configuration.progress:
@@ -389,7 +362,7 @@ class BaseCalibrator:
                     model_evaluator.launcher.exec_network,
                     collect_layers=collect_layers,
                     configuration=self._configuration,
-                    statistics=statistics,
+                    per_layer_statistics=per_layer_statistics,
                     normalizer=self,
                     ignore_layer_names=ignore_layer_names)
             else:
diff --git a/tools/calibration/benchmark_facade.py b/tools/calibration/benchmark_facade.py
new file mode 100644 (file)
index 0000000..7ce0bd0
--- /dev/null
@@ -0,0 +1,49 @@
+"""
+Copyright (C) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..benchmark.benchmark import Benchmark
+from ..benchmark.utils.progress_bar import ProgressBar
+from ..benchmark.utils.utils import read_network
+from ..benchmark.utils.inputs_filling import get_inputs
+from ..benchmark.utils.infer_request_wrap import InferRequestsQueue
+
+
+class BenchmarkResult:
+    def __init__(self, latency):
+        self._latency = latency
+
+    @property
+    def latency(self) -> float:
+        return self._latency
+
+class BenchmarkFacade:
+    def __init__(self, device, batch_size, benchmark_iterations_count, cpu_extension):
+        self._benchmark = Benchmark(device, 1, benchmark_iterations_count, None, "sync")
+        self._benchmark.add_extension(cpu_extension)
+        self._progress_bar_total_count = benchmark_iterations_count \
+            if benchmark_iterations_count and not self._benchmark.duration_seconds else 10000
+        self._progress_bar = ProgressBar(self._progress_bar_total_count)
+        self._batch_size = batch_size
+
+    def run(self, path_to_model):
+        ie_network = read_network(path_to_model)
+        exe_network = self._benchmark.load_network(ie_network, True, 1)
+        request_queue = InferRequestsQueue(exe_network.requests)
+        requests_input_data = get_inputs("", self._batch_size, ie_network.inputs, exe_network.requests)
+        fps, latency, fp32_total_duration, fp32_iter = self._benchmark.infer(
+            request_queue, requests_input_data, self._batch_size, self._progress_bar)
+
+        return BenchmarkResult(latency)
index d649304..1763228 100644 (file)
@@ -158,7 +158,7 @@ class CalibrationConfigurationHelper:
             ignore_layer_types_from_file = [line.strip() for line in ignore_layer_types_file.readlines()]
             ignore_layer_types.extend(ignore_layer_types_from_file)
 
-        ignore_layer_names = NetworkInfo(configuration.model).get_layer_names(layer_types=ignore_layer_types)
+        ignore_layer_names = NetworkInfo(configuration.model).get_layer_names_by_types(layer_types=ignore_layer_types)
 
         if configuration.ignore_layer_names_path:
             ignore_layer_names_file = open(configuration.ignore_layer_names_path, 'r')
index aaad6ed..b660802 100644 (file)
@@ -18,9 +18,9 @@ import platform
 
 from ..utils.network_info import NetworkInfo
 
-from ..benchmark.benchmark import Benchmark
 from ..network import Network
 
+from .benchmark_facade import BenchmarkFacade
 from .logging import info, debug, info_performance_counters, info_layer_accuracy_drop
 from .calibrator_configuration import CalibratorConfiguration
 from .calibrator_factory import CalibratorFactory
@@ -39,7 +39,8 @@ class Calibrator:
         if not self._configuration.simplified_mode:
             self._calibrator = CalibratorFactory.create(self._configuration.precision,
                                                         CalibratorConfiguration(configuration))
-            self._benchmark = Benchmark(configuration)
+            self._benchmark = BenchmarkFacade(self._configuration.device, self._configuration.batch_size,
+                                              self._configuration.benchmark_iterations_count, self._configuration.cpu_extension)
             self._ignore_layer_names = CalibrationConfigurationHelper.read_ignore_layer_names(self._configuration)
             self._quantization_levels = self._calibrator.get_quantization_levels(self._ignore_layer_names)
 
@@ -53,11 +54,11 @@ class Calibrator:
         iterations = self._configuration.benchmark_iterations_count
         fp32_latency = 0.0
         if iterations > 0:
-            fp32_latency = self._benchmark.run(iterations_count=self._configuration.benchmark_iterations_count).latency
+            fp32_latency = self._benchmark.run(self._configuration.model).latency
         accuracy = fp32_stats.metrics.accuracy
         info("Original network accuracy: {0:.4f}{1}, latency: {2:0.4f} ms".format(accuracy.value,
                                                                       accuracy.symbol,
-                                                                      1000 * fp32_latency))
+                                                                      fp32_latency))
         info("Original network performance counters:\n")
         info_performance_counters(fp32_stats.performance_counters)
         return RawResults(fp32_stats=fp32_stats, fp32_latency=fp32_latency)
@@ -79,7 +80,6 @@ class Calibrator:
         threshold_low_boundary = self._configuration.threshold_boundary
         threshold_step = self._configuration.threshold_step
 
-        best_accuracy_drop = None
         while threshold >= threshold_low_boundary:
             info("Validate {} accuracy, threshold for activation statistics: {}%".format(
                 self._configuration.precision,
@@ -87,22 +87,19 @@ class Calibrator:
             lp_latency = best_lp_stats.latency
 
             lp_statistics = fp32_aggregated_statistics.get_node_statistics(threshold)
-            with Network.reload(
-                    model_path=self._configuration.model,
-                    statistics=lp_statistics,
-                    quantization_levels=self._quantization_levels,
-                    batch_size=self._configuration.batch_size
-            ) as reloaded_network:
-
-                with self._calibrator.infer(network=reloaded_network.ie_network,
-                                            collect_performance_counters=True) as lp_result:
-                    lp_accuracy = lp_result.metrics.accuracy
-                    lp_performance_counters = lp_result.performance_counters
-                    iterations = self._configuration.benchmark_iterations_count
-                    if iterations > 0:
-                        lp_latency = self._benchmark.run(
-                            network=reloaded_network,
-                            iterations_count=self._configuration.benchmark_iterations_count).latency
+            tmp_model_path = Network.serialize_tmp_model(
+                model_path=self._configuration.model,
+                statistics=lp_statistics,
+                quantization_levels=self._quantization_levels)
+
+            with self._calibrator.infer(model_path=tmp_model_path,
+                                        collect_performance_counters=True) as lp_result:
+                lp_accuracy = lp_result.metrics.accuracy
+                lp_performance_counters = lp_result.performance_counters
+                iterations = self._configuration.benchmark_iterations_count
+                if iterations > 0:
+                    lp_latency = self._benchmark.run(tmp_model_path).latency
+            Network.rm_tmp_location(tmp_model_path)
 
             if lp_accuracy.is_better(best_lp_stats.accuracy, fp32_accuracy):
 
@@ -123,14 +120,14 @@ class Calibrator:
                 self._configuration.precision,
                 lp_accuracy.value,
                 lp_accuracy.symbol,
-                1000.0 * lp_latency))
+                lp_latency))
             threshold = threshold - threshold_step
 
         info("Best {0} accuracy is {1:.4f}{2}, latency: {3:0.4f} ms for threshold {4}%".format(
             self._configuration.precision,
             best_lp_stats.accuracy.value,
             best_lp_stats.accuracy.symbol,
-            1000.0 * best_lp_stats.latency,
+            best_lp_stats.latency,
             best_lp_stats.threshold))
 
         info("{} performance counters:\n".format(self._configuration.precision))
@@ -151,11 +148,15 @@ class Calibrator:
             len(quantization_layers),
             len(NetworkInfo(self._configuration.model).layers)))
 
-        with self._calibrator.infer(add_outputs=True,
-                              collect_resuls=True,
-                              collect_layers=quantization_layers,
-                              statistics=lp_results.statistics,
-                              ignore_layer_names=self._ignore_layer_names) as fp32_result_with_raw_data:
+        # collect raw original precision outputs per image and use each output
+        # to calculate layer accuracy drop
+
+        with self._calibrator.infer(
+                add_outputs=True,
+                collect_resuls=True,
+                collect_layers=quantization_layers,
+                per_layer_statistics=lp_results.statistics,
+                ignore_layer_names=self._ignore_layer_names) as fp32_result_with_raw_data:
             if fp32_result_with_raw_data.layers_accuracy_drop:
                 layers_accuracy_drop = fp32_result_with_raw_data.layers_accuracy_drop
             else:
@@ -178,33 +179,31 @@ class Calibrator:
                 self._quantization_levels[layer_accuracy_drop.layer_name] = layer_accuracy_drop.precision
                 best_lp_latency = 0.0
 
-                with Network.reload(
-                        self._configuration.model,
-                        statistics=lp_results.statistics,
-                        quantization_levels=self._quantization_levels,
-                        batch_size=self._configuration.batch_size
-                ) as reloaded_network:
-
-                    with self._calibrator.infer(network=reloaded_network.ie_network) as layer_int8_result:
-                        lp_results.accuracy = layer_int8_result.metrics.accuracy
-                        iterations = self._configuration.benchmark_iterations_count
-                        if iterations > 0:
-                            best_lp_latency = self._benchmark.run(
-                                network=reloaded_network,
-                                iterations_count=self._configuration.benchmark_iterations_count).latency
-
-                fp32_accuracy = raw_results.fp32_stats.metrics.accuracy
-                accuracy_drop = lp_results.accuracy.calculate_drop(fp32_accuracy)
+                tmp_model_path = Network.serialize_tmp_model(
+                    model_path=self._configuration.model,
+                    statistics=lp_results.statistics,
+                    quantization_levels=self._quantization_levels)
+
+                with self._calibrator.infer(model_path=tmp_model_path) as layer_int8_result:
+                    lp_results.accuracy = layer_int8_result.metrics.accuracy
+                    fp32_accuracy = raw_results.fp32_stats.metrics.accuracy
+                    accuracy_drop = lp_results.accuracy.calculate_drop(fp32_accuracy)
+                    iterations = self._configuration.benchmark_iterations_count
+                    if iterations > 0:
+                        best_lp_latency = self._benchmark.run(tmp_model_path).latency
+                Network.rm_tmp_location(tmp_model_path)
+
+                lp_results.accuracy_drop = accuracy_drop if accuracy_drop < lp_results.accuracy_drop else lp_results.accuracy_drop
                 if not lp_results.accuracy.is_achieved(fp32_accuracy, self._configuration.threshold):
                     info("Was not achieved: original network accuracy: {0:.4f}{1} (latency: {2:.4} ms) VS {3} accuracy: {4:.4f}{5} "
                          "(latency {6:.4f} ms), accuracy drop {7:.4f}%"
                          .format(fp32_accuracy.value,
                                  fp32_accuracy.symbol,
-                                 1000.0 * raw_results.fp32_latency,
+                                 raw_results.fp32_latency,
                                  self._configuration.precision,
                                  lp_results.accuracy.value,
                                  lp_results.accuracy.symbol,
-                                 1000.0 * best_lp_latency,
+                                 best_lp_latency,
                                  accuracy_drop))
 
                 else:
@@ -213,12 +212,12 @@ class Calibrator:
                          "(latency: {6:.4} ms), accuracy drop {7:.4}%"
                          .format(fp32_accuracy.value,
                                  fp32_accuracy.symbol,
-                                 1000.0 * raw_results.fp32_latency,
+                                 raw_results.fp32_latency,
                                  self._configuration.precision,
                                  lp_results.accuracy.value,
                                  lp_results.accuracy.symbol,
-                                 1000.0 * best_lp_latency,
-                                 accuracy_drop))
+                                 best_lp_latency,
+                                 lp_results.accuracy_drop))
 
                     break
         else:
@@ -259,10 +258,10 @@ class Calibrator:
                   "(latency: {5:0.4f} ms), threshold for activation statistics: {6}%")
                  .format(raw_results.fp32_stats.metrics.accuracy.value,
                          raw_results.fp32_stats.metrics.accuracy.symbol,
-                         1000.0 * raw_results.fp32_latency,
+                         raw_results.fp32_latency,
                          lp_results.accuracy.value,
                          lp_results.accuracy.symbol,
-                         1000.0 * lp_results.latency,
+                         lp_results.latency,
                          lp_results.threshold))
             self.return_back_to_fp32(lp_results, raw_results)
 
@@ -272,10 +271,10 @@ class Calibrator:
                  "{3:.4f}{4} (latency: {5:.4} ms) with threshold for activation statistic: {6}%".format(
                     raw_results.fp32_stats.metrics.accuracy.value,
                     raw_results.fp32_stats.metrics.accuracy.symbol,
-                    1000.0 * raw_results.fp32_latency,
+                    raw_results.fp32_latency,
                     lp_results.accuracy.value,
                     lp_results.accuracy.symbol,
-                    1000.0 * lp_results.latency,
+                    lp_results.latency,
                     lp_results.threshold))
 
             quantized_layers_count = 0
index 89e117f..78570e4 100644 (file)
@@ -18,9 +18,9 @@ import os
 import tempfile
 import ntpath
 
-from ..accuracy_checker.accuracy_checker.config import ConfigReader
-from ..accuracy_checker.accuracy_checker.launcher.dlsdk_launcher import DLSDKLauncher
-from ..accuracy_checker.accuracy_checker.launcher.model_conversion import FrameworkParameters
+from accuracy_checker.config import ConfigReader
+from accuracy_checker.launcher.dlsdk_launcher import DLSDKLauncher
+from accuracy_checker.launcher.model_conversion import FrameworkParameters
 
 from ..network import Network
 from ..utils.path import Path
index 0a20f83..779a637 100644 (file)
@@ -18,7 +18,7 @@ import pathlib
 from functools import partial
 from argparse import ArgumentParser
 
-from ..accuracy_checker.accuracy_checker.utils import get_path
+from accuracy_checker.utils import get_path
 from ..utils.path import Path
 
 class CommandLineReader:
index eba75f2..8422b35 100644 (file)
@@ -35,7 +35,7 @@ class CalculateAccuracyCallback:
         exec_network: ie.ExecutableNetwork,
         collect_layers: set,
         configuration: CalibrationConfiguration,
-        statistics: dict,
+        per_layer_statistics: dict,
         normalizer,
         ignore_layer_names=None):
 
@@ -47,7 +47,7 @@ class CalculateAccuracyCallback:
             raise ValueError("configuration is not specified")
         if not collect_layers:
             raise ValueError("layers to collect is not specified")
-        if not statistics:
+        if not per_layer_statistics:
             raise ValueError("statistics is not specified")
         if not normalizer:
             raise ValueError("normalizer is not specified")
@@ -56,8 +56,7 @@ class CalculateAccuracyCallback:
         self._exec_network = exec_network
         self._collect_layers = collect_layers
         self._configuration = configuration
-        self._network_info = NetworkInfo(self._configuration.model)
-        self._statistics = statistics
+        self._per_layer_statistics = per_layer_statistics
         self._normalizer = normalizer
         self._ignore_layer_names = ignore_layer_names
 
@@ -81,11 +80,11 @@ class CalculateAccuracyCallback:
         accuracy_drop = []
         single_layer_network_names = [net.layer_name for net in self._single_layer_networks]
         for layer_name, accuracy_drop_of_this_layer in self._accuracy_drop_dict.items():
-            if layer_name in single_layer_network_names:
+            if layer_name in single_layer_network_names and accuracy_drop_of_this_layer.size != 0:
                 accuracy_drop.append(LayerAccuracyDropInfo(
                     layer_name=layer_name,
                     value=self.accuracy_drop_for_layer(accuracy_drop_of_this_layer),
-                    precision=self._network_info.get_layer(layer_name).precision))
+                    precision=self._network.layers[layer_name].precision))
 
         accuracy_drop.sort(key=lambda accuracy_drop: accuracy_drop.value, reverse=True)
         return accuracy_drop
@@ -135,6 +134,8 @@ class CalculateAccuracyCallback:
         accuracy_drop_list = np.array([])
 
         for raw_data in infer_raw_results:
+            if single_layer_network.input_layer_name not in raw_data:
+                continue
             input_layer_data = raw_data[single_layer_network.input_layer_name]
 
             if tuple(single_layer_network._network.inputs[single_layer_network.input_layer_name].shape) != input_layer_data.shape:
@@ -161,7 +162,7 @@ class CalculateAccuracyCallback:
 
     def set_single_layer_networks(self):
         assert self._configuration is not None, "Configuration should be set"
-        assert self._statistics is not None, "Statistics should be set"
+        assert self._per_layer_statistics is not None, "Statistics should be set"
 
         network_info = NetworkInfo(self._configuration.model)
 
@@ -195,7 +196,7 @@ class CalculateAccuracyCallback:
 
                     network_stats = {}
                     # TODO: initialize only neccessary statistic
-                    for layer_name, node_statistic in self._statistics.items():
+                    for layer_name, node_statistic in self._per_layer_statistics.items():
                         network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs),
                                                                   max=tuple(node_statistic.max_outputs))
                     layer_network.stats.update(network_stats)
@@ -226,7 +227,7 @@ class CalculateAccuracyCallback:
                     self._layers_to_return_to_fp32 = np.append(self._layers_to_return_to_fp32, layer)
                     index += 1
 
-    def callback(self, value, latency=None):
+    def callback(self, value, latency=None, **kwargs):
 
         collect_value = dict()
         for layer_name in value:
index 7900dc7..9ffbc77 100644 (file)
@@ -46,13 +46,18 @@ class CollectResultsCallback:
         self._infer_raw_results = InferRawResults() if collect_resuls else None
         self._latencies = list()
 
-    def callback(self, value, latency = None):
+    def callback(self, value, latency=None, **kwargs):
+        network = kwargs.get('network')
+        exec_network = kwargs.get('exec_network')
+        if not network or not exec_network:
+            network = self._network
+            exec_network = self._exec_network
         if self._collect_aggregated_statistics:
             if not self._aggregated_statistics:
                 self._aggregated_statistics = AggregatedStatistics(
                     iterations_count = self._iterations_count,
                     dataset_size = self._dataset_size)
-            self._aggregated_statistics.add(self._network, self._exec_network, value)
+            self._aggregated_statistics.add(network, exec_network, value)
 
         if self._collect_results:
             if self._collect_layers:
@@ -86,4 +91,4 @@ class CollectResultsCallback:
             self._infer_raw_results.release()
 
     def get_accuracy_drop(self):
-        return None
\ No newline at end of file
+        return None
index 17b7cdb..009f555 100644 (file)
@@ -5,7 +5,7 @@ pillow
 progress
 py-cpuinfo<=4.0
 pyyaml
-scipy<=0.19
+scipy<1.2
 shapely
 sklearn
 tqdm
index c762cb9..6a49b9d 100644 (file)
@@ -18,6 +18,7 @@ import os
 import tempfile
 import shutil
 import ntpath
+from pathlib import Path as std_path
 
 import openvino.inference_engine as ie
 from .utils.path import Path
@@ -45,6 +46,29 @@ class Network:
             if tmp_model_dir:
                 shutil.rmtree(tmp_model_dir)
 
+    @staticmethod
+    def serialize_tmp_model(model_path: str, statistics = None, quantization_levels: dict = None):
+        try:
+            with Network(model_path) as network:
+                if statistics:
+                    network.set_statistics(statistics)
+                if quantization_levels:
+                    network.set_quantization_levels(quantization_levels)
+
+                tmp_model_dir = tempfile.mkdtemp(".model")
+                tmp_model_path = os.path.join(tmp_model_dir, ntpath.basename(model_path))
+                network.serialize(tmp_model_path)
+            return tmp_model_path
+        except:
+            print('Could not serialize temporary IR')
+            raise
+
+    @staticmethod
+    def rm_tmp_location(file_path):
+        if file_path:
+            pdir = std_path(file_path).parent
+            shutil.rmtree(str(pdir))
+
     def __init__(self, model_path: str, weights_path: str=None):
         if model_path is None:
             raise ValueError("model_path is None")
index 707bb07..dd73ba9 100644 (file)
@@ -25,7 +25,6 @@ class Layer:
     def __init__(self, data: dict):
         self._id = int(data['id'])
         self._name = data['name']
-        self._precision = data['precision']
         self._type = data['type']
 
         self._input_ports = Layer.__init_ports(data, 'input')
@@ -67,10 +66,6 @@ class Layer:
         return self._name
 
     @property
-    def precision(self) -> str:
-        return self._precision
-
-    @property
     def type(self) -> str:
         return self._type
 
index d318e46..d3c6ec4 100644 (file)
@@ -91,13 +91,13 @@ class NetworkInfo:
 
         pass
 
-    def get_layer_names(self, layer_types: List[str]) -> List[str]:
-        skipped = []
+    def get_layer_names_by_types(self, layer_types: List[str]) -> List[str]:
+        layer_names = []
         if layer_types:
             for layer in self._layer_by_name.values():
                 if layer.type in layer_types:
-                    skipped.append(layer.name)
-        return skipped
+                    layer_names.append(layer.name)
+        return layer_names
 
     @property
     def layers(self) -> int: